Context Navigation

source: rtems-libbsd/freebsd/sys/vm/uma_core.c @ 62c8ca0

55-freebsd-126-freebsd-12

Last change on this file since 62c8ca0 was 62c8ca0, checked in by Sebastian Huber <sebastian.huber@…>, on 05/18/17 at 07:35:46
Fix INVARIANTS support
Property mode set to `100644`
File size: 91.5 KB

Line
1	#include <machine/rtems-bsd-kernel-space.h>
2
3	/*-
4	* Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
5	* Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
6	* Copyright (c) 2004-2006 Robert N. M. Watson
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions
11	* are met:
12	* 1. Redistributions of source code must retain the above copyright
13	* notice unmodified, this list of conditions, and the following
14	* disclaimer.
15	* 2. Redistributions in binary form must reproduce the above copyright
16	* notice, this list of conditions and the following disclaimer in the
17	* documentation and/or other materials provided with the distribution.
18	*
19	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29	*/
30
31	/*
32	* uma_core.c Implementation of the Universal Memory allocator
33	*
34	* This allocator is intended to replace the multitude of similar object caches
35	* in the standard FreeBSD kernel. The intent is to be flexible as well as
36	* efficient. A primary design goal is to return unused memory to the rest of
37	* the system. This will make the system as a whole more flexible due to the
38	* ability to move memory to subsystems which most need it instead of leaving
39	* pools of reserved memory unused.
40	*
41	* The basic ideas stem from similar slab/zone based allocators whose algorithms
42	* are well known.
43	*
44	*/
45
46	/*
47	* TODO:
48	* - Improve memory usage for large allocations
49	* - Investigate cache size adjustments
50	*/
51
52	#include <sys/cdefs.h>
53	__FBSDID("$FreeBSD$");
54
55	/* I should really use ktr.. */
56	/*
57	#define UMA_DEBUG 1
58	#define UMA_DEBUG_ALLOC 1
59	#define UMA_DEBUG_ALLOC_1 1
60	*/
61
62	#include <rtems/bsd/local/opt_ddb.h>
63	#include <rtems/bsd/local/opt_param.h>
64	#include <rtems/bsd/local/opt_vm.h>
65
66	#include <sys/param.h>
67	#include <sys/systm.h>
68	#include <sys/bitset.h>
69	#include <sys/eventhandler.h>
70	#include <sys/kernel.h>
71	#include <sys/types.h>
72	#include <sys/queue.h>
73	#include <sys/malloc.h>
74	#include <sys/ktr.h>
75	#include <sys/lock.h>
76	#include <sys/sysctl.h>
77	#include <sys/mutex.h>
78	#include <sys/proc.h>
79	#include <sys/random.h>
80	#include <sys/rwlock.h>
81	#include <sys/sbuf.h>
82	#include <sys/sched.h>
83	#include <sys/smp.h>
84	#include <sys/taskqueue.h>
85	#include <sys/vmmeter.h>
86
87	#include <vm/vm.h>
88	#include <vm/vm_object.h>
89	#include <vm/vm_page.h>
90	#include <vm/vm_pageout.h>
91	#include <vm/vm_param.h>
92	#include <vm/vm_map.h>
93	#include <vm/vm_kern.h>
94	#include <vm/vm_extern.h>
95	#include <vm/uma.h>
96	#include <vm/uma_int.h>
97	#include <vm/uma_dbg.h>
98
99	#include <ddb/ddb.h>
100	#ifdef __rtems__
101	#ifdef RTEMS_SMP
102	/*
103	* It is essential that we have a per-processor cache, otherwise the
104	* critical_enter()/critical_exit() protection would be insufficient.
105	*/
106	#undef curcpu
107	#define curcpu rtems_get_current_processor()
108	#undef mp_maxid
109	#define mp_maxid (rtems_get_processor_count() - 1)
110	#define SMP
111	#endif
112	#endif /* __rtems__ */
113
114	#ifdef DEBUG_MEMGUARD
115	#include <vm/memguard.h>
116	#endif
117
118	/*
119	* This is the zone and keg from which all zones are spawned. The idea is that
120	* even the zone & keg heads are allocated from the allocator, so we use the
121	* bss section to bootstrap us.
122	*/
123	static struct uma_keg masterkeg;
124	static struct uma_zone masterzone_k;
125	static struct uma_zone masterzone_z;
126	static uma_zone_t kegs = &masterzone_k;
127	static uma_zone_t zones = &masterzone_z;
128
129	/* This is the zone from which all of uma_slab_t's are allocated. */
130	static uma_zone_t slabzone;
131
132	/*
133	* The initial hash tables come out of this zone so they can be allocated
134	* prior to malloc coming up.
135	*/
136	static uma_zone_t hashzone;
137
138	/* The boot-time adjusted value for cache line alignment. */
139	int uma_align_cache = 64 - 1;
140
141	static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
142
143	#ifndef __rtems__
144	/*
145	* Are we allowed to allocate buckets?
146	*/
147	static int bucketdisable = 1;
148	#else /* __rtems__ */
149	#define bucketdisable 0
150	#endif /* __rtems__ */
151
152	/* Linked list of all kegs in the system */
153	static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
154
155	/* Linked list of all cache-only zones in the system */
156	static LIST_HEAD(,uma_zone) uma_cachezones =
157	LIST_HEAD_INITIALIZER(uma_cachezones);
158
159	/* This RW lock protects the keg list */
160	static struct rwlock_padalign uma_rwlock;
161
162	#ifndef __rtems__
163	/* Linked list of boot time pages */
164	static LIST_HEAD(,uma_slab) uma_boot_pages =
165	LIST_HEAD_INITIALIZER(uma_boot_pages);
166
167	/* This mutex protects the boot time pages list */
168	static struct mtx_padalign uma_boot_pages_mtx;
169	#endif /* __rtems__ */
170
171	static struct sx uma_drain_lock;
172
173	#ifndef __rtems__
174	/* Is the VM done starting up? */
175	static int booted = 0;
176	#define UMA_STARTUP 1
177	#define UMA_STARTUP2 2
178	#endif /* __rtems__ */
179
180	/*
181	* This is the handle used to schedule events that need to happen
182	* outside of the allocation fast path.
183	*/
184	static struct callout uma_callout;
185	#define UMA_TIMEOUT 20 /* Seconds for callout interval. */
186
187	/*
188	* This structure is passed as the zone ctor arg so that I don't have to create
189	* a special allocation function just for zones.
190	*/
191	struct uma_zctor_args {
192	const char *name;
193	size_t size;
194	uma_ctor ctor;
195	uma_dtor dtor;
196	uma_init uminit;
197	uma_fini fini;
198	uma_import import;
199	uma_release release;
200	void *arg;
201	uma_keg_t keg;
202	int align;
203	uint32_t flags;
204	};
205
206	struct uma_kctor_args {
207	uma_zone_t zone;
208	size_t size;
209	uma_init uminit;
210	uma_fini fini;
211	int align;
212	uint32_t flags;
213	};
214
215	struct uma_bucket_zone {
216	uma_zone_t ubz_zone;
217	char *ubz_name;
218	int ubz_entries; /* Number of items it can hold. */
219	int ubz_maxsize; /* Maximum allocation size per-item. */
220	};
221
222	/*
223	* Compute the actual number of bucket entries to pack them in power
224	* of two sizes for more efficient space utilization.
225	*/
226	#define BUCKET_SIZE(n) \
227	(((sizeof(void ) (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
228
229	#ifndef __rtems__
230	#define BUCKET_MAX BUCKET_SIZE(256)
231	#else /* __rtems__ */
232	#define BUCKET_MAX BUCKET_SIZE(128)
233	#endif /* __rtems__ */
234
235	struct uma_bucket_zone bucket_zones[] = {
236	{ NULL, "4 Bucket", BUCKET_SIZE(4), 4096 },
237	{ NULL, "6 Bucket", BUCKET_SIZE(6), 3072 },
238	{ NULL, "8 Bucket", BUCKET_SIZE(8), 2048 },
239	{ NULL, "12 Bucket", BUCKET_SIZE(12), 1536 },
240	{ NULL, "16 Bucket", BUCKET_SIZE(16), 1024 },
241	{ NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
242	{ NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
243	{ NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
244	#ifndef __rtems__
245	{ NULL, "256 Bucket", BUCKET_SIZE(256), 64 },
246	#endif /* __rtems__ */
247	{ NULL, NULL, 0}
248	};
249
250	/*
251	* Flags and enumerations to be passed to internal functions.
252	*/
253	enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI };
254
255	/* Prototypes.. */
256
257	#ifndef __rtems__
258	static void noobj_alloc(uma_zone_t, vm_size_t, uint8_t , int);
259	#endif /* __rtems__ */
260	static void page_alloc(uma_zone_t, vm_size_t, uint8_t , int);
261	#ifndef __rtems__
262	static void startup_alloc(uma_zone_t, vm_size_t, uint8_t , int);
263	#endif /* __rtems__ */
264	static void page_free(void *, vm_size_t, uint8_t);
265	static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
266	static void cache_drain(uma_zone_t);
267	static void bucket_drain(uma_zone_t, uma_bucket_t);
268	static void bucket_cache_drain(uma_zone_t zone);
269	static int keg_ctor(void , int, void , int);
270	static void keg_dtor(void , int, void );
271	static int zone_ctor(void , int, void , int);
272	static void zone_dtor(void , int, void );
273	static int zero_init(void *, int, int);
274	static void keg_small_init(uma_keg_t keg);
275	static void keg_large_init(uma_keg_t keg);
276	static void zone_foreach(void (*zfunc)(uma_zone_t));
277	static void zone_timeout(uma_zone_t zone);
278	static int hash_alloc(struct uma_hash *);
279	static int hash_expand(struct uma_hash , struct uma_hash );
280	static void hash_free(struct uma_hash *hash);
281	static void uma_timeout(void *);
282	static void uma_startup3(void);
283	static void zone_alloc_item(uma_zone_t, void , int);
284	static void zone_free_item(uma_zone_t, void , void , enum zfreeskip);
285	static void bucket_enable(void);
286	static void bucket_init(void);
287	static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
288	static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
289	static void bucket_zone_drain(void);
290	static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, void *, int flags);
291	static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
292	#ifndef __rtems__
293	static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
294	#endif /* __rtems__ */
295	static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
296	static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
297	static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
298	uma_fini fini, int align, uint32_t flags);
299	static int zone_import(uma_zone_t zone, void **bucket, int max, int flags);
300	static void zone_release(uma_zone_t zone, void **bucket, int cnt);
301	static void uma_zero_item(void *item, uma_zone_t zone);
302
303	void uma_print_zone(uma_zone_t);
304	void uma_print_stats(void);
305	static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
306	static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
307
308	#ifdef INVARIANTS
309	static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item);
310	static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item);
311	#endif
312
313	SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
314
315	SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD\|CTLTYPE_INT,
316	0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
317
318	SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD\|CTLTYPE_STRUCT,
319	0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
320
321	static int zone_warnings = 1;
322	SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0,
323	"Warn when UMA zones becomes full");
324
325	/*
326	* This routine checks to see whether or not it's safe to enable buckets.
327	*/
328	static void
329	bucket_enable(void)
330	{
331	#ifndef __rtems__
332	bucketdisable = vm_page_count_min();
333	#endif /* __rtems__ */
334	}
335
336	/*
337	* Initialize bucket_zones, the array of zones of buckets of various sizes.
338	*
339	* For each zone, calculate the memory required for each bucket, consisting
340	* of the header and an array of pointers.
341	*/
342	static void
343	bucket_init(void)
344	{
345	struct uma_bucket_zone *ubz;
346	int size;
347
348	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
349	size = roundup(sizeof(struct uma_bucket), sizeof(void *));
350	size += sizeof(void ) ubz->ubz_entries;
351	ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
352	NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
353	UMA_ZONE_MTXCLASS \| UMA_ZFLAG_BUCKET);
354	}
355	}
356
357	/*
358	* Given a desired number of entries for a bucket, return the zone from which
359	* to allocate the bucket.
360	*/
361	static struct uma_bucket_zone *
362	bucket_zone_lookup(int entries)
363	{
364	struct uma_bucket_zone *ubz;
365
366	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
367	if (ubz->ubz_entries >= entries)
368	return (ubz);
369	ubz--;
370	return (ubz);
371	}
372
373	static int
374	bucket_select(int size)
375	{
376	struct uma_bucket_zone *ubz;
377
378	ubz = &bucket_zones[0];
379	if (size > ubz->ubz_maxsize)
380	return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
381
382	for (; ubz->ubz_entries != 0; ubz++)
383	if (ubz->ubz_maxsize < size)
384	break;
385	ubz--;
386	return (ubz->ubz_entries);
387	}
388
389	static uma_bucket_t
390	bucket_alloc(uma_zone_t zone, void *udata, int flags)
391	{
392	struct uma_bucket_zone *ubz;
393	uma_bucket_t bucket;
394
395	#ifndef __rtems__
396	/*
397	* This is to stop us from allocating per cpu buckets while we're
398	* running out of vm.boot_pages. Otherwise, we would exhaust the
399	* boot pages. This also prevents us from allocating buckets in
400	* low memory situations.
401	*/
402	if (bucketdisable)
403	return (NULL);
404	#endif /* __rtems__ */
405	/*
406	* To limit bucket recursion we store the original zone flags
407	* in a cookie passed via zalloc_arg/zfree_arg. This allows the
408	* NOVM flag to persist even through deep recursions. We also
409	* store ZFLAG_BUCKET once we have recursed attempting to allocate
410	* a bucket for a bucket zone so we do not allow infinite bucket
411	* recursion. This cookie will even persist to frees of unused
412	* buckets via the allocation path or bucket allocations in the
413	* free path.
414	*/
415	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
416	udata = (void *)(uintptr_t)zone->uz_flags;
417	else {
418	if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
419	return (NULL);
420	udata = (void *)((uintptr_t)udata \| UMA_ZFLAG_BUCKET);
421	}
422	if ((uintptr_t)udata & UMA_ZFLAG_CACHEONLY)
423	flags \|= M_NOVM;
424	ubz = bucket_zone_lookup(zone->uz_count);
425	if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0)
426	ubz++;
427	bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags);
428	if (bucket) {
429	#ifdef INVARIANTS
430	bzero(bucket->ub_bucket, sizeof(void ) ubz->ubz_entries);
431	#endif
432	bucket->ub_cnt = 0;
433	bucket->ub_entries = ubz->ubz_entries;
434	}
435
436	return (bucket);
437	}
438
439	static void
440	bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
441	{
442	struct uma_bucket_zone *ubz;
443
444	KASSERT(bucket->ub_cnt == 0,
445	("bucket_free: Freeing a non free bucket."));
446	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
447	udata = (void *)(uintptr_t)zone->uz_flags;
448	ubz = bucket_zone_lookup(bucket->ub_entries);
449	uma_zfree_arg(ubz->ubz_zone, bucket, udata);
450	}
451
452	static void
453	bucket_zone_drain(void)
454	{
455	struct uma_bucket_zone *ubz;
456
457	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
458	zone_drain(ubz->ubz_zone);
459	}
460
461	static void
462	zone_log_warning(uma_zone_t zone)
463	{
464	static const struct timeval warninterval = { 300, 0 };
465
466	if (!zone_warnings \|\| zone->uz_warning == NULL)
467	return;
468
469	if (ratecheck(&zone->uz_ratecheck, &warninterval))
470	printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
471	}
472
473	static inline void
474	zone_maxaction(uma_zone_t zone)
475	{
476
477	if (zone->uz_maxaction.ta_func != NULL)
478	taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction);
479	}
480
481	static void
482	zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
483	{
484	uma_klink_t klink;
485
486	LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
487	kegfn(klink->kl_keg);
488	}
489
490	/*
491	* Routine called by timeout which is used to fire off some time interval
492	* based calculations. (stats, hash size, etc.)
493	*
494	* Arguments:
495	* arg Unused
496	*
497	* Returns:
498	* Nothing
499	*/
500	static void
501	uma_timeout(void *unused)
502	{
503	bucket_enable();
504	zone_foreach(zone_timeout);
505
506	/* Reschedule this event */
507	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
508	}
509
510	/*
511	* Routine to perform timeout driven calculations. This expands the
512	* hashes and does per cpu statistics aggregation.
513	*
514	* Returns nothing.
515	*/
516	static void
517	keg_timeout(uma_keg_t keg)
518	{
519
520	KEG_LOCK(keg);
521	/*
522	* Expand the keg hash table.
523	*
524	* This is done if the number of slabs is larger than the hash size.
525	* What I'm trying to do here is completely reduce collisions. This
526	* may be a little aggressive. Should I allow for two collisions max?
527	*/
528	if (keg->uk_flags & UMA_ZONE_HASH &&
529	keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
530	struct uma_hash newhash;
531	struct uma_hash oldhash;
532	int ret;
533
534	/*
535	* This is so involved because allocating and freeing
536	* while the keg lock is held will lead to deadlock.
537	* I have to do everything in stages and check for
538	* races.
539	*/
540	newhash = keg->uk_hash;
541	KEG_UNLOCK(keg);
542	ret = hash_alloc(&newhash);
543	KEG_LOCK(keg);
544	if (ret) {
545	if (hash_expand(&keg->uk_hash, &newhash)) {
546	oldhash = keg->uk_hash;
547	keg->uk_hash = newhash;
548	} else
549	oldhash = newhash;
550
551	KEG_UNLOCK(keg);
552	hash_free(&oldhash);
553	return;
554	}
555	}
556	KEG_UNLOCK(keg);
557	}
558
559	static void
560	zone_timeout(uma_zone_t zone)
561	{
562
563	zone_foreach_keg(zone, &keg_timeout);
564	}
565
566	/*
567	* Allocate and zero fill the next sized hash table from the appropriate
568	* backing store.
569	*
570	* Arguments:
571	* hash A new hash structure with the old hash size in uh_hashsize
572	*
573	* Returns:
574	* 1 on success and 0 on failure.
575	*/
576	static int
577	hash_alloc(struct uma_hash *hash)
578	{
579	int oldsize;
580	int alloc;
581
582	oldsize = hash->uh_hashsize;
583
584	/* We're just going to go to a power of two greater */
585	if (oldsize) {
586	hash->uh_hashsize = oldsize * 2;
587	alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
588	hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
589	M_UMAHASH, M_NOWAIT);
590	} else {
591	alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
592	hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
593	M_WAITOK);
594	hash->uh_hashsize = UMA_HASH_SIZE_INIT;
595	}
596	if (hash->uh_slab_hash) {
597	bzero(hash->uh_slab_hash, alloc);
598	hash->uh_hashmask = hash->uh_hashsize - 1;
599	return (1);
600	}
601
602	return (0);
603	}
604
605	/*
606	* Expands the hash table for HASH zones. This is done from zone_timeout
607	* to reduce collisions. This must not be done in the regular allocation
608	* path, otherwise, we can recurse on the vm while allocating pages.
609	*
610	* Arguments:
611	* oldhash The hash you want to expand
612	* newhash The hash structure for the new table
613	*
614	* Returns:
615	* Nothing
616	*
617	* Discussion:
618	*/
619	static int
620	hash_expand(struct uma_hash oldhash, struct uma_hash newhash)
621	{
622	uma_slab_t slab;
623	int hval;
624	int i;
625
626	if (!newhash->uh_slab_hash)
627	return (0);
628
629	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
630	return (0);
631
632	/*
633	* I need to investigate hash algorithms for resizing without a
634	* full rehash.
635	*/
636
637	for (i = 0; i < oldhash->uh_hashsize; i++)
638	while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
639	slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
640	SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
641	hval = UMA_HASH(newhash, slab->us_data);
642	SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
643	slab, us_hlink);
644	}
645
646	return (1);
647	}
648
649	/*
650	* Free the hash bucket to the appropriate backing store.
651	*
652	* Arguments:
653	* slab_hash The hash bucket we're freeing
654	* hashsize The number of entries in that hash bucket
655	*
656	* Returns:
657	* Nothing
658	*/
659	static void
660	hash_free(struct uma_hash *hash)
661	{
662	if (hash->uh_slab_hash == NULL)
663	return;
664	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
665	zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
666	else
667	free(hash->uh_slab_hash, M_UMAHASH);
668	}
669
670	/*
671	* Frees all outstanding items in a bucket
672	*
673	* Arguments:
674	* zone The zone to free to, must be unlocked.
675	* bucket The free/alloc bucket with items, cpu queue must be locked.
676	*
677	* Returns:
678	* Nothing
679	*/
680
681	static void
682	bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
683	{
684	int i;
685
686	if (bucket == NULL)
687	return;
688
689	if (zone->uz_fini)
690	for (i = 0; i < bucket->ub_cnt; i++)
691	zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
692	zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
693	bucket->ub_cnt = 0;
694	}
695
696	/*
697	* Drains the per cpu caches for a zone.
698	*
699	* NOTE: This may only be called while the zone is being turn down, and not
700	* during normal operation. This is necessary in order that we do not have
701	* to migrate CPUs to drain the per-CPU caches.
702	*
703	* Arguments:
704	* zone The zone to drain, must be unlocked.
705	*
706	* Returns:
707	* Nothing
708	*/
709	static void
710	cache_drain(uma_zone_t zone)
711	{
712	uma_cache_t cache;
713	int cpu;
714
715	/*
716	* XXX: It is safe to not lock the per-CPU caches, because we're
717	* tearing down the zone anyway. I.e., there will be no further use
718	* of the caches at this point.
719	*
720	* XXX: It would good to be able to assert that the zone is being
721	* torn down to prevent improper use of cache_drain().
722	*
723	* XXX: We lock the zone before passing into bucket_cache_drain() as
724	* it is used elsewhere. Should the tear-down path be made special
725	* there in some form?
726	*/
727	CPU_FOREACH(cpu) {
728	cache = &zone->uz_cpu[cpu];
729	bucket_drain(zone, cache->uc_allocbucket);
730	bucket_drain(zone, cache->uc_freebucket);
731	if (cache->uc_allocbucket != NULL)
732	bucket_free(zone, cache->uc_allocbucket, NULL);
733	if (cache->uc_freebucket != NULL)
734	bucket_free(zone, cache->uc_freebucket, NULL);
735	cache->uc_allocbucket = cache->uc_freebucket = NULL;
736	}
737	ZONE_LOCK(zone);
738	bucket_cache_drain(zone);
739	ZONE_UNLOCK(zone);
740	}
741
742	#ifndef __rtems__
743	static void
744	cache_shrink(uma_zone_t zone)
745	{
746
747	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
748	return;
749
750	ZONE_LOCK(zone);
751	zone->uz_count = (zone->uz_count_min + zone->uz_count) / 2;
752	ZONE_UNLOCK(zone);
753	}
754
755	static void
756	cache_drain_safe_cpu(uma_zone_t zone)
757	{
758	uma_cache_t cache;
759	uma_bucket_t b1, b2;
760
761	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
762	return;
763
764	b1 = b2 = NULL;
765	ZONE_LOCK(zone);
766	critical_enter();
767	cache = &zone->uz_cpu[curcpu];
768	if (cache->uc_allocbucket) {
769	if (cache->uc_allocbucket->ub_cnt != 0)
770	LIST_INSERT_HEAD(&zone->uz_buckets,
771	cache->uc_allocbucket, ub_link);
772	else
773	b1 = cache->uc_allocbucket;
774	cache->uc_allocbucket = NULL;
775	}
776	if (cache->uc_freebucket) {
777	if (cache->uc_freebucket->ub_cnt != 0)
778	LIST_INSERT_HEAD(&zone->uz_buckets,
779	cache->uc_freebucket, ub_link);
780	else
781	b2 = cache->uc_freebucket;
782	cache->uc_freebucket = NULL;
783	}
784	critical_exit();
785	ZONE_UNLOCK(zone);
786	if (b1)
787	bucket_free(zone, b1, NULL);
788	if (b2)
789	bucket_free(zone, b2, NULL);
790	}
791
792	/*
793	* Safely drain per-CPU caches of a zone(s) to alloc bucket.
794	* This is an expensive call because it needs to bind to all CPUs
795	* one by one and enter a critical section on each of them in order
796	* to safely access their cache buckets.
797	* Zone lock must not be held on call this function.
798	*/
799	static void
800	cache_drain_safe(uma_zone_t zone)
801	{
802	int cpu;
803
804	/*
805	* Polite bucket sizes shrinking was not enouth, shrink aggressively.
806	*/
807	if (zone)
808	cache_shrink(zone);
809	else
810	zone_foreach(cache_shrink);
811
812	CPU_FOREACH(cpu) {
813	thread_lock(curthread);
814	sched_bind(curthread, cpu);
815	thread_unlock(curthread);
816
817	if (zone)
818	cache_drain_safe_cpu(zone);
819	else
820	zone_foreach(cache_drain_safe_cpu);
821	}
822	thread_lock(curthread);
823	sched_unbind(curthread);
824	thread_unlock(curthread);
825	}
826	#endif /* __rtems__ */
827
828	/*
829	* Drain the cached buckets from a zone. Expects a locked zone on entry.
830	*/
831	static void
832	bucket_cache_drain(uma_zone_t zone)
833	{
834	uma_bucket_t bucket;
835
836	/*
837	* Drain the bucket queues and free the buckets, we just keep two per
838	* cpu (alloc/free).
839	*/
840	while ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
841	LIST_REMOVE(bucket, ub_link);
842	ZONE_UNLOCK(zone);
843	bucket_drain(zone, bucket);
844	bucket_free(zone, bucket, NULL);
845	ZONE_LOCK(zone);
846	}
847
848	/*
849	* Shrink further bucket sizes. Price of single zone lock collision
850	* is probably lower then price of global cache drain.
851	*/
852	if (zone->uz_count > zone->uz_count_min)
853	zone->uz_count--;
854	}
855
856	static void
857	keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
858	{
859	uint8_t *mem;
860	int i;
861	uint8_t flags;
862
863	mem = slab->us_data;
864	flags = slab->us_flags;
865	i = start;
866	if (keg->uk_fini != NULL) {
867	for (i--; i > -1; i--)
868	keg->uk_fini(slab->us_data + (keg->uk_rsize * i),
869	keg->uk_size);
870	}
871	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
872	zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
873	#ifdef UMA_DEBUG
874	printf("%s: Returning %d bytes.\n", keg->uk_name,
875	PAGE_SIZE * keg->uk_ppera);
876	#endif
877	keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
878	}
879
880	/*
881	* Frees pages from a keg back to the system. This is done on demand from
882	* the pageout daemon.
883	*
884	* Returns nothing.
885	*/
886	static void
887	keg_drain(uma_keg_t keg)
888	{
889	struct slabhead freeslabs = { 0 };
890	uma_slab_t slab, tmp;
891
892	/*
893	* We don't want to take pages from statically allocated kegs at this
894	* time
895	*/
896	if (keg->uk_flags & UMA_ZONE_NOFREE \|\| keg->uk_freef == NULL)
897	return;
898
899	#ifdef UMA_DEBUG
900	printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
901	#endif
902	KEG_LOCK(keg);
903	if (keg->uk_free == 0)
904	goto finished;
905
906	LIST_FOREACH_SAFE(slab, &keg->uk_free_slab, us_link, tmp) {
907	#ifndef __rtems__
908	/* We have nowhere to free these to. */
909	if (slab->us_flags & UMA_SLAB_BOOT)
910	continue;
911	#endif /* __rtems__ */
912
913	LIST_REMOVE(slab, us_link);
914	keg->uk_pages -= keg->uk_ppera;
915	keg->uk_free -= keg->uk_ipers;
916
917	if (keg->uk_flags & UMA_ZONE_HASH)
918	UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
919
920	SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
921	}
922	finished:
923	KEG_UNLOCK(keg);
924
925	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
926	SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
927	keg_free_slab(keg, slab, keg->uk_ipers);
928	}
929	}
930
931	static void
932	zone_drain_wait(uma_zone_t zone, int waitok)
933	{
934
935	/*
936	* Set draining to interlock with zone_dtor() so we can release our
937	* locks as we go. Only dtor() should do a WAITOK call since it
938	* is the only call that knows the structure will still be available
939	* when it wakes up.
940	*/
941	ZONE_LOCK(zone);
942	while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
943	if (waitok == M_NOWAIT)
944	goto out;
945	msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
946	}
947	zone->uz_flags \|= UMA_ZFLAG_DRAINING;
948	bucket_cache_drain(zone);
949	ZONE_UNLOCK(zone);
950	/*
951	* The DRAINING flag protects us from being freed while
952	* we're running. Normally the uma_rwlock would protect us but we
953	* must be able to release and acquire the right lock for each keg.
954	*/
955	zone_foreach_keg(zone, &keg_drain);
956	ZONE_LOCK(zone);
957	zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
958	wakeup(zone);
959	out:
960	ZONE_UNLOCK(zone);
961	}
962
963	void
964	zone_drain(uma_zone_t zone)
965	{
966
967	zone_drain_wait(zone, M_NOWAIT);
968	}
969
970	/*
971	* Allocate a new slab for a keg. This does not insert the slab onto a list.
972	*
973	* Arguments:
974	* wait Shall we wait?
975	*
976	* Returns:
977	* The slab that was allocated or NULL if there is no memory and the
978	* caller specified M_NOWAIT.
979	*/
980	static uma_slab_t
981	keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
982	{
983	uma_alloc allocf;
984	uma_slab_t slab;
985	uint8_t *mem;
986	uint8_t flags;
987	int i;
988
989	mtx_assert(&keg->uk_lock, MA_OWNED);
990	slab = NULL;
991	mem = NULL;
992
993	#ifdef UMA_DEBUG
994	printf("alloc_slab: Allocating a new slab for %s\n", keg->uk_name);
995	#endif
996	allocf = keg->uk_allocf;
997	KEG_UNLOCK(keg);
998
999	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1000	slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
1001	if (slab == NULL)
1002	goto out;
1003	}
1004
1005	/*
1006	* This reproduces the old vm_zone behavior of zero filling pages the
1007	* first time they are added to a zone.
1008	*
1009	* Malloced items are zeroed in uma_zalloc.
1010	*/
1011
1012	if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1013	wait \|= M_ZERO;
1014	else
1015	wait &= ~M_ZERO;
1016
1017	if (keg->uk_flags & UMA_ZONE_NODUMP)
1018	wait \|= M_NODUMP;
1019
1020	/* zone is passed for legacy reasons. */
1021	mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait);
1022	if (mem == NULL) {
1023	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1024	zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
1025	slab = NULL;
1026	goto out;
1027	}
1028
1029	/* Point the slab into the allocated memory */
1030	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
1031	slab = (uma_slab_t )(mem + keg->uk_pgoff);
1032
1033	if (keg->uk_flags & UMA_ZONE_VTOSLAB)
1034	for (i = 0; i < keg->uk_ppera; i++)
1035	vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
1036
1037	slab->us_keg = keg;
1038	slab->us_data = mem;
1039	slab->us_freecount = keg->uk_ipers;
1040	slab->us_flags = flags;
1041	BIT_FILL(SLAB_SETSIZE, &slab->us_free);
1042	#ifdef INVARIANTS
1043	BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree);
1044	#endif
1045
1046	if (keg->uk_init != NULL) {
1047	for (i = 0; i < keg->uk_ipers; i++)
1048	if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
1049	keg->uk_size, wait) != 0)
1050	break;
1051	if (i != keg->uk_ipers) {
1052	keg_free_slab(keg, slab, i);
1053	slab = NULL;
1054	goto out;
1055	}
1056	}
1057	out:
1058	KEG_LOCK(keg);
1059
1060	if (slab != NULL) {
1061	if (keg->uk_flags & UMA_ZONE_HASH)
1062	UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
1063
1064	keg->uk_pages += keg->uk_ppera;
1065	keg->uk_free += keg->uk_ipers;
1066	}
1067
1068	return (slab);
1069	}
1070
1071	#ifndef __rtems__
1072	/*
1073	* This function is intended to be used early on in place of page_alloc() so
1074	* that we may use the boot time page cache to satisfy allocations before
1075	* the VM is ready.
1076	*/
1077	static void *
1078	startup_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
1079	{
1080	uma_keg_t keg;
1081	uma_slab_t tmps;
1082	int pages, check_pages;
1083
1084	keg = zone_first_keg(zone);
1085	pages = howmany(bytes, PAGE_SIZE);
1086	check_pages = pages - 1;
1087	KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
1088
1089	/*
1090	* Check our small startup cache to see if it has pages remaining.
1091	*/
1092	mtx_lock(&uma_boot_pages_mtx);
1093
1094	/* First check if we have enough room. */
1095	tmps = LIST_FIRST(&uma_boot_pages);
1096	while (tmps != NULL && check_pages-- > 0)
1097	tmps = LIST_NEXT(tmps, us_link);
1098	if (tmps != NULL) {
1099	/*
1100	* It's ok to lose tmps references. The last one will
1101	* have tmps->us_data pointing to the start address of
1102	* "pages" contiguous pages of memory.
1103	*/
1104	while (pages-- > 0) {
1105	tmps = LIST_FIRST(&uma_boot_pages);
1106	LIST_REMOVE(tmps, us_link);
1107	}
1108	mtx_unlock(&uma_boot_pages_mtx);
1109	*pflag = tmps->us_flags;
1110	return (tmps->us_data);
1111	}
1112	mtx_unlock(&uma_boot_pages_mtx);
1113	if (booted < UMA_STARTUP2)
1114	panic("UMA: Increase vm.boot_pages");
1115	/*
1116	* Now that we've booted reset these users to their real allocator.
1117	*/
1118	#ifdef UMA_MD_SMALL_ALLOC
1119	keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
1120	#else
1121	keg->uk_allocf = page_alloc;
1122	#endif
1123	return keg->uk_allocf(zone, bytes, pflag, wait);
1124	}
1125	#endif /* __rtems__ */
1126
1127	/*
1128	* Allocates a number of pages from the system
1129	*
1130	* Arguments:
1131	* bytes The number of bytes requested
1132	* wait Shall we wait?
1133	*
1134	* Returns:
1135	* A pointer to the alloced memory or possibly
1136	* NULL if M_NOWAIT is set.
1137	*/
1138	static void *
1139	page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
1140	{
1141	void p; / Returned page */
1142
1143	#ifndef __rtems__
1144	*pflag = UMA_SLAB_KMEM;
1145	p = (void *) kmem_malloc(kmem_arena, bytes, wait);
1146	#else /* __rtems__ */
1147	*pflag = 0;
1148	p = rtems_bsd_page_alloc(bytes, wait);
1149	#endif /* __rtems__ */
1150
1151	return (p);
1152	}
1153
1154	#ifndef __rtems__
1155	/*
1156	* Allocates a number of pages from within an object
1157	*
1158	* Arguments:
1159	* bytes The number of bytes requested
1160	* wait Shall we wait?
1161	*
1162	* Returns:
1163	* A pointer to the alloced memory or possibly
1164	* NULL if M_NOWAIT is set.
1165	*/
1166	static void *
1167	noobj_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait)
1168	{
1169	TAILQ_HEAD(, vm_page) alloctail;
1170	u_long npages;
1171	vm_offset_t retkva, zkva;
1172	vm_page_t p, p_next;
1173	uma_keg_t keg;
1174
1175	TAILQ_INIT(&alloctail);
1176	keg = zone_first_keg(zone);
1177
1178	npages = howmany(bytes, PAGE_SIZE);
1179	while (npages > 0) {
1180	p = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT \|
1181	VM_ALLOC_WIRED \| VM_ALLOC_NOOBJ);
1182	if (p != NULL) {
1183	/*
1184	* Since the page does not belong to an object, its
1185	* listq is unused.
1186	*/
1187	TAILQ_INSERT_TAIL(&alloctail, p, listq);
1188	npages--;
1189	continue;
1190	}
1191	if (wait & M_WAITOK) {
1192	VM_WAIT;
1193	continue;
1194	}
1195
1196	/*
1197	* Page allocation failed, free intermediate pages and
1198	* exit.
1199	*/
1200	TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
1201	vm_page_unwire(p, PQ_NONE);
1202	vm_page_free(p);
1203	}
1204	return (NULL);
1205	}
1206	*flags = UMA_SLAB_PRIV;
1207	zkva = keg->uk_kva +
1208	atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
1209	retkva = zkva;
1210	TAILQ_FOREACH(p, &alloctail, listq) {
1211	pmap_qenter(zkva, &p, 1);
1212	zkva += PAGE_SIZE;
1213	}
1214
1215	return ((void *)retkva);
1216	}
1217	#endif /* __rtems__ */
1218
1219	/*
1220	* Frees a number of pages to the system
1221	*
1222	* Arguments:
1223	* mem A pointer to the memory to be freed
1224	* size The size of the memory being freed
1225	* flags The original p->us_flags field
1226	*
1227	* Returns:
1228	* Nothing
1229	*/
1230	static void
1231	page_free(void *mem, vm_size_t size, uint8_t flags)
1232	{
1233	#ifndef __rtems__
1234	struct vmem *vmem;
1235
1236	if (flags & UMA_SLAB_KMEM)
1237	vmem = kmem_arena;
1238	else if (flags & UMA_SLAB_KERNEL)
1239	vmem = kernel_arena;
1240	else
1241	panic("UMA: page_free used with invalid flags %x", flags);
1242
1243	kmem_free(vmem, (vm_offset_t)mem, size);
1244	#else /* __rtems__ */
1245	if (flags & UMA_SLAB_KERNEL)
1246	free(mem, M_TEMP);
1247	else
1248	rtems_bsd_page_free(mem);
1249	#endif /* __rtems__ */
1250	}
1251
1252	/*
1253	* Zero fill initializer
1254	*
1255	* Arguments/Returns follow uma_init specifications
1256	*/
1257	static int
1258	zero_init(void *mem, int size, int flags)
1259	{
1260	bzero(mem, size);
1261	return (0);
1262	}
1263
1264	/*
1265	* Finish creating a small uma keg. This calculates ipers, and the keg size.
1266	*
1267	* Arguments
1268	* keg The zone we should initialize
1269	*
1270	* Returns
1271	* Nothing
1272	*/
1273	static void
1274	keg_small_init(uma_keg_t keg)
1275	{
1276	u_int rsize;
1277	u_int memused;
1278	u_int wastedspace;
1279	u_int shsize;
1280	u_int slabsize;
1281
1282	if (keg->uk_flags & UMA_ZONE_PCPU) {
1283	u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU;
1284
1285	slabsize = sizeof(struct pcpu);
1286	keg->uk_ppera = howmany(ncpus * sizeof(struct pcpu),
1287	PAGE_SIZE);
1288	} else {
1289	slabsize = UMA_SLAB_SIZE;
1290	keg->uk_ppera = 1;
1291	}
1292
1293	/*
1294	* Calculate the size of each allocation (rsize) according to
1295	* alignment. If the requested size is smaller than we have
1296	* allocation bits for we round it up.
1297	*/
1298	rsize = keg->uk_size;
1299	if (rsize < slabsize / SLAB_SETSIZE)
1300	rsize = slabsize / SLAB_SETSIZE;
1301	if (rsize & keg->uk_align)
1302	rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1303	keg->uk_rsize = rsize;
1304
1305	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 \|\|
1306	keg->uk_rsize < sizeof(struct pcpu),
1307	("%s: size %u too large", __func__, keg->uk_rsize));
1308
1309	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1310	shsize = 0;
1311	else
1312	shsize = sizeof(struct uma_slab);
1313
1314	keg->uk_ipers = (slabsize - shsize) / rsize;
1315	KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1316	("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1317
1318	memused = keg->uk_ipers * rsize + shsize;
1319	wastedspace = slabsize - memused;
1320
1321	/*
1322	* We can't do OFFPAGE if we're internal or if we've been
1323	* asked to not go to the VM for buckets. If we do this we
1324	* may end up going to the VM for slabs which we do not
1325	* want to do if we're UMA_ZFLAG_CACHEONLY as a result
1326	* of UMA_ZONE_VM, which clearly forbids it.
1327	*/
1328	if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) \|\|
1329	(keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1330	return;
1331
1332	/*
1333	* See if using an OFFPAGE slab will limit our waste. Only do
1334	* this if it permits more items per-slab.
1335	*
1336	* XXX We could try growing slabsize to limit max waste as well.
1337	* Historically this was not done because the VM could not
1338	* efficiently handle contiguous allocations.
1339	*/
1340	if ((wastedspace >= slabsize / UMA_MAX_WASTE) &&
1341	(keg->uk_ipers < (slabsize / keg->uk_rsize))) {
1342	keg->uk_ipers = slabsize / keg->uk_rsize;
1343	KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1344	("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1345	#ifdef UMA_DEBUG
1346	printf("UMA decided we need offpage slab headers for "
1347	"keg: %s, calculated wastedspace = %d, "
1348	"maximum wasted space allowed = %d, "
1349	"calculated ipers = %d, "
1350	"new wasted space = %d\n", keg->uk_name, wastedspace,
1351	slabsize / UMA_MAX_WASTE, keg->uk_ipers,
1352	slabsize - keg->uk_ipers * keg->uk_rsize);
1353	#endif
1354	keg->uk_flags \|= UMA_ZONE_OFFPAGE;
1355	}
1356
1357	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1358	(keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1359	keg->uk_flags \|= UMA_ZONE_HASH;
1360	}
1361
1362	/*
1363	* Finish creating a large (> UMA_SLAB_SIZE) uma kegs. Just give in and do
1364	* OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
1365	* more complicated.
1366	*
1367	* Arguments
1368	* keg The keg we should initialize
1369	*
1370	* Returns
1371	* Nothing
1372	*/
1373	static void
1374	keg_large_init(uma_keg_t keg)
1375	{
1376	u_int shsize;
1377
1378	KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
1379	KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1380	("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
1381	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1382	("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
1383
1384	keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
1385	keg->uk_ipers = 1;
1386	keg->uk_rsize = keg->uk_size;
1387
1388	/* We can't do OFFPAGE if we're internal, bail out here. */
1389	if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
1390	return;
1391
1392	/* Check whether we have enough space to not do OFFPAGE. */
1393	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0) {
1394	shsize = sizeof(struct uma_slab);
1395	if (shsize & UMA_ALIGN_PTR)
1396	shsize = (shsize & ~UMA_ALIGN_PTR) +
1397	(UMA_ALIGN_PTR + 1);
1398
1399	if ((PAGE_SIZE * keg->uk_ppera) - keg->uk_rsize < shsize)
1400	keg->uk_flags \|= UMA_ZONE_OFFPAGE;
1401	}
1402
1403	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1404	(keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1405	keg->uk_flags \|= UMA_ZONE_HASH;
1406	}
1407
1408	static void
1409	keg_cachespread_init(uma_keg_t keg)
1410	{
1411	int alignsize;
1412	int trailer;
1413	int pages;
1414	int rsize;
1415
1416	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1417	("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
1418
1419	alignsize = keg->uk_align + 1;
1420	rsize = keg->uk_size;
1421	/*
1422	* We want one item to start on every align boundary in a page. To
1423	* do this we will span pages. We will also extend the item by the
1424	* size of align if it is an even multiple of align. Otherwise, it
1425	* would fall on the same boundary every time.
1426	*/
1427	if (rsize & keg->uk_align)
1428	rsize = (rsize & ~keg->uk_align) + alignsize;
1429	if ((rsize & alignsize) == 0)
1430	rsize += alignsize;
1431	trailer = rsize - keg->uk_size;
1432	pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
1433	pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
1434	keg->uk_rsize = rsize;
1435	keg->uk_ppera = pages;
1436	keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
1437	keg->uk_flags \|= UMA_ZONE_OFFPAGE \| UMA_ZONE_VTOSLAB;
1438	KASSERT(keg->uk_ipers <= SLAB_SETSIZE,
1439	("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
1440	keg->uk_ipers));
1441	}
1442
1443	/*
1444	* Keg header ctor. This initializes all fields, locks, etc. And inserts
1445	* the keg onto the global keg list.
1446	*
1447	* Arguments/Returns follow uma_ctor specifications
1448	* udata Actually uma_kctor_args
1449	*/
1450	static int
1451	keg_ctor(void mem, int size, void udata, int flags)
1452	{
1453	struct uma_kctor_args *arg = udata;
1454	uma_keg_t keg = mem;
1455	uma_zone_t zone;
1456
1457	bzero(keg, size);
1458	keg->uk_size = arg->size;
1459	keg->uk_init = arg->uminit;
1460	keg->uk_fini = arg->fini;
1461	keg->uk_align = arg->align;
1462	keg->uk_free = 0;
1463	keg->uk_reserve = 0;
1464	keg->uk_pages = 0;
1465	keg->uk_flags = arg->flags;
1466	keg->uk_allocf = page_alloc;
1467	keg->uk_freef = page_free;
1468	keg->uk_slabzone = NULL;
1469
1470	/*
1471	* The master zone is passed to us at keg-creation time.
1472	*/
1473	zone = arg->zone;
1474	keg->uk_name = zone->uz_name;
1475
1476	if (arg->flags & UMA_ZONE_VM)
1477	keg->uk_flags \|= UMA_ZFLAG_CACHEONLY;
1478
1479	if (arg->flags & UMA_ZONE_ZINIT)
1480	keg->uk_init = zero_init;
1481
1482	if (arg->flags & UMA_ZONE_MALLOC)
1483	keg->uk_flags \|= UMA_ZONE_VTOSLAB;
1484
1485	if (arg->flags & UMA_ZONE_PCPU)
1486	#ifdef SMP
1487	keg->uk_flags \|= UMA_ZONE_OFFPAGE;
1488	#else
1489	keg->uk_flags &= ~UMA_ZONE_PCPU;
1490	#endif
1491
1492	if (keg->uk_flags & UMA_ZONE_CACHESPREAD) {
1493	keg_cachespread_init(keg);
1494	} else {
1495	if (keg->uk_size > (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1496	keg_large_init(keg);
1497	else
1498	keg_small_init(keg);
1499	}
1500
1501	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1502	keg->uk_slabzone = slabzone;
1503
1504	/*
1505	* If we haven't booted yet we need allocations to go through the
1506	* startup cache until the vm is ready.
1507	*/
1508	if (keg->uk_ppera == 1) {
1509	#ifdef UMA_MD_SMALL_ALLOC
1510	keg->uk_allocf = uma_small_alloc;
1511	keg->uk_freef = uma_small_free;
1512
1513	#ifndef __rtems__
1514	if (booted < UMA_STARTUP)
1515	keg->uk_allocf = startup_alloc;
1516	#endif /* __rtems__ */
1517	#else
1518	#ifndef __rtems__
1519	if (booted < UMA_STARTUP2)
1520	keg->uk_allocf = startup_alloc;
1521	#endif /* __rtems__ */
1522	#endif
1523	#ifndef __rtems__
1524	} else if (booted < UMA_STARTUP2 &&
1525	(keg->uk_flags & UMA_ZFLAG_INTERNAL))
1526	keg->uk_allocf = startup_alloc;
1527	#else /* __rtems__ */
1528	}
1529	#endif /* __rtems__ */
1530
1531	/*
1532	* Initialize keg's lock
1533	*/
1534	KEG_LOCK_INIT(keg, (arg->flags & UMA_ZONE_MTXCLASS));
1535
1536	/*
1537	* If we're putting the slab header in the actual page we need to
1538	* figure out where in each page it goes. This calculates a right
1539	* justified offset into the memory on an ALIGN_PTR boundary.
1540	*/
1541	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1542	u_int totsize;
1543
1544	/* Size of the slab struct and free list */
1545	totsize = sizeof(struct uma_slab);
1546
1547	if (totsize & UMA_ALIGN_PTR)
1548	totsize = (totsize & ~UMA_ALIGN_PTR) +
1549	(UMA_ALIGN_PTR + 1);
1550	keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize;
1551
1552	/*
1553	* The only way the following is possible is if with our
1554	* UMA_ALIGN_PTR adjustments we are now bigger than
1555	* UMA_SLAB_SIZE. I haven't checked whether this is
1556	* mathematically possible for all cases, so we make
1557	* sure here anyway.
1558	*/
1559	totsize = keg->uk_pgoff + sizeof(struct uma_slab);
1560	if (totsize > PAGE_SIZE * keg->uk_ppera) {
1561	printf("zone %s ipers %d rsize %d size %d\n",
1562	zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1563	keg->uk_size);
1564	panic("UMA slab won't fit.");
1565	}
1566	}
1567
1568	if (keg->uk_flags & UMA_ZONE_HASH)
1569	hash_alloc(&keg->uk_hash);
1570
1571	#ifdef UMA_DEBUG
1572	printf("UMA: %s(%p) size %d(%d) flags %#x ipers %d ppera %d out %d free %d\n",
1573	zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
1574	keg->uk_ipers, keg->uk_ppera,
1575	(keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
1576	keg->uk_free);
1577	#endif
1578
1579	LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1580
1581	rw_wlock(&uma_rwlock);
1582	LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1583	rw_wunlock(&uma_rwlock);
1584	return (0);
1585	}
1586
1587	/*
1588	* Zone header ctor. This initializes all fields, locks, etc.
1589	*
1590	* Arguments/Returns follow uma_ctor specifications
1591	* udata Actually uma_zctor_args
1592	*/
1593	static int
1594	zone_ctor(void mem, int size, void udata, int flags)
1595	{
1596	struct uma_zctor_args *arg = udata;
1597	uma_zone_t zone = mem;
1598	uma_zone_t z;
1599	uma_keg_t keg;
1600
1601	bzero(zone, size);
1602	zone->uz_name = arg->name;
1603	zone->uz_ctor = arg->ctor;
1604	zone->uz_dtor = arg->dtor;
1605	zone->uz_slab = zone_fetch_slab;
1606	zone->uz_init = NULL;
1607	zone->uz_fini = NULL;
1608	zone->uz_allocs = 0;
1609	zone->uz_frees = 0;
1610	zone->uz_fails = 0;
1611	zone->uz_sleeps = 0;
1612	zone->uz_count = 0;
1613	zone->uz_count_min = 0;
1614	zone->uz_flags = 0;
1615	zone->uz_warning = NULL;
1616	timevalclear(&zone->uz_ratecheck);
1617	keg = arg->keg;
1618
1619	ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
1620
1621	/*
1622	* This is a pure cache zone, no kegs.
1623	*/
1624	if (arg->import) {
1625	if (arg->flags & UMA_ZONE_VM)
1626	arg->flags \|= UMA_ZFLAG_CACHEONLY;
1627	zone->uz_flags = arg->flags;
1628	zone->uz_size = arg->size;
1629	zone->uz_import = arg->import;
1630	zone->uz_release = arg->release;
1631	zone->uz_arg = arg->arg;
1632	zone->uz_lockptr = &zone->uz_lock;
1633	rw_wlock(&uma_rwlock);
1634	LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
1635	rw_wunlock(&uma_rwlock);
1636	goto out;
1637	}
1638
1639	/*
1640	* Use the regular zone/keg/slab allocator.
1641	*/
1642	zone->uz_import = (uma_import)zone_import;
1643	zone->uz_release = (uma_release)zone_release;
1644	zone->uz_arg = zone;
1645
1646	if (arg->flags & UMA_ZONE_SECONDARY) {
1647	KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1648	zone->uz_init = arg->uminit;
1649	zone->uz_fini = arg->fini;
1650	zone->uz_lockptr = &keg->uk_lock;
1651	zone->uz_flags \|= UMA_ZONE_SECONDARY;
1652	rw_wlock(&uma_rwlock);
1653	ZONE_LOCK(zone);
1654	LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1655	if (LIST_NEXT(z, uz_link) == NULL) {
1656	LIST_INSERT_AFTER(z, zone, uz_link);
1657	break;
1658	}
1659	}
1660	ZONE_UNLOCK(zone);
1661	rw_wunlock(&uma_rwlock);
1662	} else if (keg == NULL) {
1663	if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1664	arg->align, arg->flags)) == NULL)
1665	return (ENOMEM);
1666	} else {
1667	struct uma_kctor_args karg;
1668	int error;
1669
1670	/* We should only be here from uma_startup() */
1671	karg.size = arg->size;
1672	karg.uminit = arg->uminit;
1673	karg.fini = arg->fini;
1674	karg.align = arg->align;
1675	karg.flags = arg->flags;
1676	karg.zone = zone;
1677	error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1678	flags);
1679	if (error)
1680	return (error);
1681	}
1682
1683	/*
1684	* Link in the first keg.
1685	*/
1686	zone->uz_klink.kl_keg = keg;
1687	LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
1688	zone->uz_lockptr = &keg->uk_lock;
1689	zone->uz_size = keg->uk_size;
1690	zone->uz_flags \|= (keg->uk_flags &
1691	(UMA_ZONE_INHERIT \| UMA_ZFLAG_INHERIT));
1692
1693	/*
1694	* Some internal zones don't have room allocated for the per cpu
1695	* caches. If we're internal, bail out here.
1696	*/
1697	if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1698	KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
1699	("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1700	return (0);
1701	}
1702
1703	out:
1704	if ((arg->flags & UMA_ZONE_MAXBUCKET) == 0)
1705	zone->uz_count = bucket_select(zone->uz_size);
1706	else
1707	zone->uz_count = BUCKET_MAX;
1708	zone->uz_count_min = zone->uz_count;
1709
1710	return (0);
1711	}
1712
1713	/*
1714	* Keg header dtor. This frees all data, destroys locks, frees the hash
1715	* table and removes the keg from the global list.
1716	*
1717	* Arguments/Returns follow uma_dtor specifications
1718	* udata unused
1719	*/
1720	static void
1721	keg_dtor(void arg, int size, void udata)
1722	{
1723	uma_keg_t keg;
1724
1725	keg = (uma_keg_t)arg;
1726	KEG_LOCK(keg);
1727	if (keg->uk_free != 0) {
1728	printf("Freed UMA keg (%s) was not empty (%d items). "
1729	" Lost %d pages of memory.\n",
1730	keg->uk_name ? keg->uk_name : "",
1731	keg->uk_free, keg->uk_pages);
1732	}
1733	KEG_UNLOCK(keg);
1734
1735	hash_free(&keg->uk_hash);
1736
1737	KEG_LOCK_FINI(keg);
1738	}
1739
1740	/*
1741	* Zone header dtor.
1742	*
1743	* Arguments/Returns follow uma_dtor specifications
1744	* udata unused
1745	*/
1746	static void
1747	zone_dtor(void arg, int size, void udata)
1748	{
1749	uma_klink_t klink;
1750	uma_zone_t zone;
1751	uma_keg_t keg;
1752
1753	zone = (uma_zone_t)arg;
1754	keg = zone_first_keg(zone);
1755
1756	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
1757	cache_drain(zone);
1758
1759	rw_wlock(&uma_rwlock);
1760	LIST_REMOVE(zone, uz_link);
1761	rw_wunlock(&uma_rwlock);
1762	/*
1763	* XXX there are some races here where
1764	* the zone can be drained but zone lock
1765	* released and then refilled before we
1766	* remove it... we dont care for now
1767	*/
1768	zone_drain_wait(zone, M_WAITOK);
1769	/*
1770	* Unlink all of our kegs.
1771	*/
1772	while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
1773	klink->kl_keg = NULL;
1774	LIST_REMOVE(klink, kl_link);
1775	if (klink == &zone->uz_klink)
1776	continue;
1777	free(klink, M_TEMP);
1778	}
1779	/*
1780	* We only destroy kegs from non secondary zones.
1781	*/
1782	if (keg != NULL && (zone->uz_flags & UMA_ZONE_SECONDARY) == 0) {
1783	rw_wlock(&uma_rwlock);
1784	LIST_REMOVE(keg, uk_link);
1785	rw_wunlock(&uma_rwlock);
1786	zone_free_item(kegs, keg, NULL, SKIP_NONE);
1787	}
1788	ZONE_LOCK_FINI(zone);
1789	}
1790
1791	/*
1792	* Traverses every zone in the system and calls a callback
1793	*
1794	* Arguments:
1795	* zfunc A pointer to a function which accepts a zone
1796	* as an argument.
1797	*
1798	* Returns:
1799	* Nothing
1800	*/
1801	static void
1802	zone_foreach(void (*zfunc)(uma_zone_t))
1803	{
1804	uma_keg_t keg;
1805	uma_zone_t zone;
1806
1807	rw_rlock(&uma_rwlock);
1808	LIST_FOREACH(keg, &uma_kegs, uk_link) {
1809	LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1810	zfunc(zone);
1811	}
1812	rw_runlock(&uma_rwlock);
1813	}
1814
1815	/* Public functions */
1816	/* See uma.h */
1817	void
1818	uma_startup(void *bootmem, int boot_pages)
1819	{
1820	struct uma_zctor_args args;
1821	#ifndef __rtems__
1822	uma_slab_t slab;
1823	int i;
1824	#endif /* __rtems__ */
1825
1826	#ifdef UMA_DEBUG
1827	printf("Creating uma keg headers zone and keg.\n");
1828	#endif
1829	rw_init(&uma_rwlock, "UMA lock");
1830
1831	/* "manually" create the initial zone */
1832	memset(&args, 0, sizeof(args));
1833	args.name = "UMA Kegs";
1834	args.size = sizeof(struct uma_keg);
1835	args.ctor = keg_ctor;
1836	args.dtor = keg_dtor;
1837	args.uminit = zero_init;
1838	args.fini = NULL;
1839	args.keg = &masterkeg;
1840	args.align = 32 - 1;
1841	args.flags = UMA_ZFLAG_INTERNAL;
1842	/* The initial zone has no Per cpu queues so it's smaller */
1843	zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1844
1845	#ifndef __rtems__
1846	#ifdef UMA_DEBUG
1847	printf("Filling boot free list.\n");
1848	#endif
1849	for (i = 0; i < boot_pages; i++) {
1850	slab = (uma_slab_t)((uint8_t )bootmem + (i UMA_SLAB_SIZE));
1851	slab->us_data = (uint8_t *)slab;
1852	slab->us_flags = UMA_SLAB_BOOT;
1853	LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1854	}
1855	mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
1856	#endif /* __rtems__ */
1857
1858	#ifdef UMA_DEBUG
1859	printf("Creating uma zone headers zone and keg.\n");
1860	#endif
1861	args.name = "UMA Zones";
1862	args.size = sizeof(struct uma_zone) +
1863	(sizeof(struct uma_cache) * (mp_maxid + 1));
1864	args.ctor = zone_ctor;
1865	args.dtor = zone_dtor;
1866	args.uminit = zero_init;
1867	args.fini = NULL;
1868	args.keg = NULL;
1869	args.align = 32 - 1;
1870	args.flags = UMA_ZFLAG_INTERNAL;
1871	/* The initial zone has no Per cpu queues so it's smaller */
1872	zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1873
1874	#ifdef UMA_DEBUG
1875	printf("Creating slab and hash zones.\n");
1876	#endif
1877
1878	/* Now make a zone for slab headers */
1879	slabzone = uma_zcreate("UMA Slabs",
1880	sizeof(struct uma_slab),
1881	NULL, NULL, NULL, NULL,
1882	UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1883
1884	hashzone = uma_zcreate("UMA Hash",
1885	sizeof(struct slabhead ) UMA_HASH_SIZE_INIT,
1886	NULL, NULL, NULL, NULL,
1887	UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1888
1889	bucket_init();
1890
1891	#ifndef __rtems__
1892	booted = UMA_STARTUP;
1893	#endif /* __rtems__ */
1894
1895	#ifdef UMA_DEBUG
1896	printf("UMA startup complete.\n");
1897	#endif
1898	}
1899	#ifdef __rtems__
1900	static void
1901	rtems_bsd_uma_startup(void *unused)
1902	{
1903	(void) unused;
1904
1905	sx_init_flags(&uma_drain_lock, "umadrain", SX_RECURSE);
1906	uma_startup(NULL, 0);
1907	}
1908
1909	SYSINIT(rtems_bsd_uma_startup, SI_SUB_VM, SI_ORDER_SECOND,
1910	rtems_bsd_uma_startup, NULL);
1911	#endif /* __rtems__ */
1912
1913	#ifndef __rtems__
1914	/* see uma.h */
1915	void
1916	uma_startup2(void)
1917	{
1918	booted = UMA_STARTUP2;
1919	bucket_enable();
1920	sx_init(&uma_drain_lock, "umadrain");
1921	#ifdef UMA_DEBUG
1922	printf("UMA startup2 complete.\n");
1923	#endif
1924	}
1925	#endif /* __rtems__ */
1926
1927	/*
1928	* Initialize our callout handle
1929	*
1930	*/
1931
1932	static void
1933	uma_startup3(void)
1934	{
1935	#ifdef UMA_DEBUG
1936	printf("Starting callout.\n");
1937	#endif
1938	callout_init(&uma_callout, 1);
1939	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1940	#ifdef UMA_DEBUG
1941	printf("UMA startup3 complete.\n");
1942	#endif
1943	}
1944
1945	static uma_keg_t
1946	uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1947	int align, uint32_t flags)
1948	{
1949	struct uma_kctor_args args;
1950
1951	args.size = size;
1952	args.uminit = uminit;
1953	args.fini = fini;
1954	args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
1955	args.flags = flags;
1956	args.zone = zone;
1957	return (zone_alloc_item(kegs, &args, M_WAITOK));
1958	}
1959
1960	/* See uma.h */
1961	void
1962	uma_set_align(int align)
1963	{
1964
1965	if (align != UMA_ALIGN_CACHE)
1966	uma_align_cache = align;
1967	}
1968
1969	/* See uma.h */
1970	uma_zone_t
1971	uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1972	uma_init uminit, uma_fini fini, int align, uint32_t flags)
1973
1974	{
1975	struct uma_zctor_args args;
1976	uma_zone_t res;
1977	#ifndef __rtems__
1978	bool locked;
1979	#endif /* __rtems__ */
1980
1981	/* This stuff is essential for the zone ctor */
1982	memset(&args, 0, sizeof(args));
1983	args.name = name;
1984	args.size = size;
1985	args.ctor = ctor;
1986	args.dtor = dtor;
1987	args.uminit = uminit;
1988	args.fini = fini;
1989	#ifdef INVARIANTS
1990	/*
1991	* If a zone is being created with an empty constructor and
1992	* destructor, pass UMA constructor/destructor which checks for
1993	* memory use after free.
1994	*/
1995	if ((!(flags & (UMA_ZONE_ZINIT \| UMA_ZONE_NOFREE))) &&
1996	ctor == NULL && dtor == NULL && uminit == NULL && fini == NULL) {
1997	args.ctor = trash_ctor;
1998	args.dtor = trash_dtor;
1999	args.uminit = trash_init;
2000	args.fini = trash_fini;
2001	}
2002	#endif
2003	args.align = align;
2004	args.flags = flags;
2005	args.keg = NULL;
2006
2007	#ifndef __rtems__
2008	if (booted < UMA_STARTUP2) {
2009	locked = false;
2010	} else {
2011	#endif /* __rtems__ */
2012	sx_slock(&uma_drain_lock);
2013	#ifndef __rtems__
2014	locked = true;
2015	}
2016	#endif /* __rtems__ */
2017	res = zone_alloc_item(zones, &args, M_WAITOK);
2018	#ifndef __rtems__
2019	if (locked)
2020	#endif /* __rtems__ */
2021	sx_sunlock(&uma_drain_lock);
2022	return (res);
2023	}
2024
2025	/* See uma.h */
2026	uma_zone_t
2027	uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
2028	uma_init zinit, uma_fini zfini, uma_zone_t master)
2029	{
2030	struct uma_zctor_args args;
2031	uma_keg_t keg;
2032	uma_zone_t res;
2033	#ifndef __rtems__
2034	bool locked;
2035	#endif /* __rtems__ */
2036
2037	keg = zone_first_keg(master);
2038	memset(&args, 0, sizeof(args));
2039	args.name = name;
2040	args.size = keg->uk_size;
2041	args.ctor = ctor;
2042	args.dtor = dtor;
2043	args.uminit = zinit;
2044	args.fini = zfini;
2045	args.align = keg->uk_align;
2046	args.flags = keg->uk_flags \| UMA_ZONE_SECONDARY;
2047	args.keg = keg;
2048
2049	#ifndef __rtems__
2050	if (booted < UMA_STARTUP2) {
2051	locked = false;
2052	} else {
2053	#endif /* __rtems__ */
2054	sx_slock(&uma_drain_lock);
2055	#ifndef __rtems__
2056	locked = true;
2057	}
2058	#endif /* __rtems__ */
2059	/* XXX Attaches only one keg of potentially many. */
2060	res = zone_alloc_item(zones, &args, M_WAITOK);
2061	#ifndef __rtems__
2062	if (locked)
2063	#endif /* __rtems__ */
2064	sx_sunlock(&uma_drain_lock);
2065	return (res);
2066	}
2067
2068	/* See uma.h */
2069	uma_zone_t
2070	uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
2071	uma_init zinit, uma_fini zfini, uma_import zimport,
2072	uma_release zrelease, void *arg, int flags)
2073	{
2074	struct uma_zctor_args args;
2075
2076	memset(&args, 0, sizeof(args));
2077	args.name = name;
2078	args.size = size;
2079	args.ctor = ctor;
2080	args.dtor = dtor;
2081	args.uminit = zinit;
2082	args.fini = zfini;
2083	args.import = zimport;
2084	args.release = zrelease;
2085	args.arg = arg;
2086	args.align = 0;
2087	args.flags = flags;
2088
2089	return (zone_alloc_item(zones, &args, M_WAITOK));
2090	}
2091
2092	#ifndef __rtems__
2093	static void
2094	zone_lock_pair(uma_zone_t a, uma_zone_t b)
2095	{
2096	if (a < b) {
2097	ZONE_LOCK(a);
2098	mtx_lock_flags(b->uz_lockptr, MTX_DUPOK);
2099	} else {
2100	ZONE_LOCK(b);
2101	mtx_lock_flags(a->uz_lockptr, MTX_DUPOK);
2102	}
2103	}
2104
2105	static void
2106	zone_unlock_pair(uma_zone_t a, uma_zone_t b)
2107	{
2108
2109	ZONE_UNLOCK(a);
2110	ZONE_UNLOCK(b);
2111	}
2112
2113	int
2114	uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
2115	{
2116	uma_klink_t klink;
2117	uma_klink_t kl;
2118	int error;
2119
2120	error = 0;
2121	klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK \| M_ZERO);
2122
2123	zone_lock_pair(zone, master);
2124	/*
2125	* zone must use vtoslab() to resolve objects and must already be
2126	* a secondary.
2127	*/
2128	if ((zone->uz_flags & (UMA_ZONE_VTOSLAB \| UMA_ZONE_SECONDARY))
2129	!= (UMA_ZONE_VTOSLAB \| UMA_ZONE_SECONDARY)) {
2130	error = EINVAL;
2131	goto out;
2132	}
2133	/*
2134	* The new master must also use vtoslab().
2135	*/
2136	if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
2137	error = EINVAL;
2138	goto out;
2139	}
2140
2141	/*
2142	* The underlying object must be the same size. rsize
2143	* may be different.
2144	*/
2145	if (master->uz_size != zone->uz_size) {
2146	error = E2BIG;
2147	goto out;
2148	}
2149	/*
2150	* Put it at the end of the list.
2151	*/
2152	klink->kl_keg = zone_first_keg(master);
2153	LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
2154	if (LIST_NEXT(kl, kl_link) == NULL) {
2155	LIST_INSERT_AFTER(kl, klink, kl_link);
2156	break;
2157	}
2158	}
2159	klink = NULL;
2160	zone->uz_flags \|= UMA_ZFLAG_MULTI;
2161	zone->uz_slab = zone_fetch_slab_multi;
2162
2163	out:
2164	zone_unlock_pair(zone, master);
2165	if (klink != NULL)
2166	free(klink, M_TEMP);
2167
2168	return (error);
2169	}
2170	#endif /* __rtems__ */
2171
2172
2173	/* See uma.h */
2174	void
2175	uma_zdestroy(uma_zone_t zone)
2176	{
2177
2178	sx_slock(&uma_drain_lock);
2179	zone_free_item(zones, zone, NULL, SKIP_NONE);
2180	sx_sunlock(&uma_drain_lock);
2181	}
2182
2183	/* See uma.h */
2184	void *
2185	uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
2186	{
2187	void *item;
2188	uma_cache_t cache;
2189	uma_bucket_t bucket;
2190	int lockfail;
2191	int cpu;
2192
2193	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
2194	random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
2195
2196	/* This is the fast path allocation */
2197	#ifdef UMA_DEBUG_ALLOC_1
2198	printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
2199	#endif
2200	CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
2201	zone->uz_name, flags);
2202
2203	if (flags & M_WAITOK) {
2204	WITNESS_WARN(WARN_GIANTOK \| WARN_SLEEPOK, NULL,
2205	"uma_zalloc_arg: zone \"%s\"", zone->uz_name);
2206	}
2207	#ifndef __rtems__
2208	KASSERT(curthread->td_critnest == 0 \|\| SCHEDULER_STOPPED(),
2209	("uma_zalloc_arg: called with spinlock or critical section held"));
2210	#endif /* __rtems__ */
2211
2212	#ifdef DEBUG_MEMGUARD
2213	if (memguard_cmp_zone(zone)) {
2214	item = memguard_alloc(zone->uz_size, flags);
2215	if (item != NULL) {
2216	if (zone->uz_init != NULL &&
2217	zone->uz_init(item, zone->uz_size, flags) != 0)
2218	return (NULL);
2219	if (zone->uz_ctor != NULL &&
2220	zone->uz_ctor(item, zone->uz_size, udata,
2221	flags) != 0) {
2222	zone->uz_fini(item, zone->uz_size);
2223	return (NULL);
2224	}
2225	return (item);
2226	}
2227	/* This is unfortunate but should not be fatal. */
2228	}
2229	#endif
2230	/*
2231	* If possible, allocate from the per-CPU cache. There are two
2232	* requirements for safe access to the per-CPU cache: (1) the thread
2233	* accessing the cache must not be preempted or yield during access,
2234	* and (2) the thread must not migrate CPUs without switching which
2235	* cache it accesses. We rely on a critical section to prevent
2236	* preemption and migration. We release the critical section in
2237	* order to acquire the zone mutex if we are unable to allocate from
2238	* the current cache; when we re-acquire the critical section, we
2239	* must detect and handle migration if it has occurred.
2240	*/
2241	critical_enter();
2242	cpu = curcpu;
2243	cache = &zone->uz_cpu[cpu];
2244
2245	zalloc_start:
2246	bucket = cache->uc_allocbucket;
2247	if (bucket != NULL && bucket->ub_cnt > 0) {
2248	bucket->ub_cnt--;
2249	item = bucket->ub_bucket[bucket->ub_cnt];
2250	#ifdef INVARIANTS
2251	bucket->ub_bucket[bucket->ub_cnt] = NULL;
2252	#endif
2253	KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
2254	cache->uc_allocs++;
2255	critical_exit();
2256	if (zone->uz_ctor != NULL &&
2257	zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2258	atomic_add_long(&zone->uz_fails, 1);
2259	zone_free_item(zone, item, udata, SKIP_DTOR);
2260	return (NULL);
2261	}
2262	#ifdef INVARIANTS
2263	uma_dbg_alloc(zone, NULL, item);
2264	#endif
2265	if (flags & M_ZERO)
2266	uma_zero_item(item, zone);
2267	return (item);
2268	}
2269
2270	/*
2271	* We have run out of items in our alloc bucket.
2272	* See if we can switch with our free bucket.
2273	*/
2274	bucket = cache->uc_freebucket;
2275	if (bucket != NULL && bucket->ub_cnt > 0) {
2276	#ifdef UMA_DEBUG_ALLOC
2277	printf("uma_zalloc: Swapping empty with alloc.\n");
2278	#endif
2279	cache->uc_freebucket = cache->uc_allocbucket;
2280	cache->uc_allocbucket = bucket;
2281	goto zalloc_start;
2282	}
2283
2284	/*
2285	* Discard any empty allocation bucket while we hold no locks.
2286	*/
2287	bucket = cache->uc_allocbucket;
2288	cache->uc_allocbucket = NULL;
2289	critical_exit();
2290	if (bucket != NULL)
2291	bucket_free(zone, bucket, udata);
2292
2293	/* Short-circuit for zones without buckets and low memory. */
2294	if (zone->uz_count == 0 \|\| bucketdisable)
2295	goto zalloc_item;
2296
2297	/*
2298	* Attempt to retrieve the item from the per-CPU cache has failed, so
2299	* we must go back to the zone. This requires the zone lock, so we
2300	* must drop the critical section, then re-acquire it when we go back
2301	* to the cache. Since the critical section is released, we may be
2302	* preempted or migrate. As such, make sure not to maintain any
2303	* thread-local state specific to the cache from prior to releasing
2304	* the critical section.
2305	*/
2306	lockfail = 0;
2307	if (ZONE_TRYLOCK(zone) == 0) {
2308	/* Record contention to size the buckets. */
2309	ZONE_LOCK(zone);
2310	lockfail = 1;
2311	}
2312	critical_enter();
2313	cpu = curcpu;
2314	cache = &zone->uz_cpu[cpu];
2315
2316	/*
2317	* Since we have locked the zone we may as well send back our stats.
2318	*/
2319	atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
2320	atomic_add_long(&zone->uz_frees, cache->uc_frees);
2321	cache->uc_allocs = 0;
2322	cache->uc_frees = 0;
2323
2324	/* See if we lost the race to fill the cache. */
2325	if (cache->uc_allocbucket != NULL) {
2326	ZONE_UNLOCK(zone);
2327	goto zalloc_start;
2328	}
2329
2330	/*
2331	* Check the zone's cache of buckets.
2332	*/
2333	if ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
2334	KASSERT(bucket->ub_cnt != 0,
2335	("uma_zalloc_arg: Returning an empty bucket."));
2336
2337	LIST_REMOVE(bucket, ub_link);
2338	cache->uc_allocbucket = bucket;
2339	ZONE_UNLOCK(zone);
2340	goto zalloc_start;
2341	}
2342	/* We are no longer associated with this CPU. */
2343	critical_exit();
2344
2345	/*
2346	* We bump the uz count when the cache size is insufficient to
2347	* handle the working set.
2348	*/
2349	if (lockfail && zone->uz_count < BUCKET_MAX)
2350	zone->uz_count++;
2351	ZONE_UNLOCK(zone);
2352
2353	/*
2354	* Now lets just fill a bucket and put it on the free list. If that
2355	* works we'll restart the allocation from the beginning and it
2356	* will use the just filled bucket.
2357	*/
2358	bucket = zone_alloc_bucket(zone, udata, flags);
2359	if (bucket != NULL) {
2360	ZONE_LOCK(zone);
2361	critical_enter();
2362	cpu = curcpu;
2363	cache = &zone->uz_cpu[cpu];
2364	/*
2365	* See if we lost the race or were migrated. Cache the
2366	* initialized bucket to make this less likely or claim
2367	* the memory directly.
2368	*/
2369	if (cache->uc_allocbucket == NULL)
2370	cache->uc_allocbucket = bucket;
2371	else
2372	LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
2373	ZONE_UNLOCK(zone);
2374	goto zalloc_start;
2375	}
2376
2377	/*
2378	* We may not be able to get a bucket so return an actual item.
2379	*/
2380	#ifdef UMA_DEBUG
2381	printf("uma_zalloc_arg: Bucketzone returned NULL\n");
2382	#endif
2383
2384	zalloc_item:
2385	item = zone_alloc_item(zone, udata, flags);
2386
2387	return (item);
2388	}
2389
2390	static uma_slab_t
2391	keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
2392	{
2393	uma_slab_t slab;
2394	int reserve;
2395
2396	mtx_assert(&keg->uk_lock, MA_OWNED);
2397	slab = NULL;
2398	reserve = 0;
2399	if ((flags & M_USE_RESERVE) == 0)
2400	reserve = keg->uk_reserve;
2401
2402	for (;;) {
2403	/*
2404	* Find a slab with some space. Prefer slabs that are partially
2405	* used over those that are totally full. This helps to reduce
2406	* fragmentation.
2407	*/
2408	if (keg->uk_free > reserve) {
2409	if (!LIST_EMPTY(&keg->uk_part_slab)) {
2410	slab = LIST_FIRST(&keg->uk_part_slab);
2411	} else {
2412	slab = LIST_FIRST(&keg->uk_free_slab);
2413	LIST_REMOVE(slab, us_link);
2414	LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
2415	us_link);
2416	}
2417	MPASS(slab->us_keg == keg);
2418	return (slab);
2419	}
2420
2421	/*
2422	* M_NOVM means don't ask at all!
2423	*/
2424	if (flags & M_NOVM)
2425	break;
2426
2427	if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
2428	keg->uk_flags \|= UMA_ZFLAG_FULL;
2429	/*
2430	* If this is not a multi-zone, set the FULL bit.
2431	* Otherwise slab_multi() takes care of it.
2432	*/
2433	if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) {
2434	zone->uz_flags \|= UMA_ZFLAG_FULL;
2435	zone_log_warning(zone);
2436	zone_maxaction(zone);
2437	}
2438	if (flags & M_NOWAIT)
2439	break;
2440	zone->uz_sleeps++;
2441	msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
2442	continue;
2443	}
2444	slab = keg_alloc_slab(keg, zone, flags);
2445	/*
2446	* If we got a slab here it's safe to mark it partially used
2447	* and return. We assume that the caller is going to remove
2448	* at least one item.
2449	*/
2450	if (slab) {
2451	MPASS(slab->us_keg == keg);
2452	LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2453	return (slab);
2454	}
2455	/*
2456	* We might not have been able to get a slab but another cpu
2457	* could have while we were unlocked. Check again before we
2458	* fail.
2459	*/
2460	flags \|= M_NOVM;
2461	}
2462	return (slab);
2463	}
2464
2465	static uma_slab_t
2466	zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
2467	{
2468	uma_slab_t slab;
2469
2470	if (keg == NULL) {
2471	keg = zone_first_keg(zone);
2472	KEG_LOCK(keg);
2473	}
2474
2475	for (;;) {
2476	slab = keg_fetch_slab(keg, zone, flags);
2477	if (slab)
2478	return (slab);
2479	if (flags & (M_NOWAIT \| M_NOVM))
2480	break;
2481	}
2482	KEG_UNLOCK(keg);
2483	return (NULL);
2484	}
2485
2486	#ifndef __rtems__
2487	/*
2488	* uma_zone_fetch_slab_multi: Fetches a slab from one available keg. Returns
2489	* with the keg locked. On NULL no lock is held.
2490	*
2491	* The last pointer is used to seed the search. It is not required.
2492	*/
2493	static uma_slab_t
2494	zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
2495	{
2496	uma_klink_t klink;
2497	uma_slab_t slab;
2498	uma_keg_t keg;
2499	int flags;
2500	int empty;
2501	int full;
2502
2503	/*
2504	* Don't wait on the first pass. This will skip limit tests
2505	* as well. We don't want to block if we can find a provider
2506	* without blocking.
2507	*/
2508	flags = (rflags & ~M_WAITOK) \| M_NOWAIT;
2509	/*
2510	* Use the last slab allocated as a hint for where to start
2511	* the search.
2512	*/
2513	if (last != NULL) {
2514	slab = keg_fetch_slab(last, zone, flags);
2515	if (slab)
2516	return (slab);
2517	KEG_UNLOCK(last);
2518	}
2519	/*
2520	* Loop until we have a slab incase of transient failures
2521	* while M_WAITOK is specified. I'm not sure this is 100%
2522	* required but we've done it for so long now.
2523	*/
2524	for (;;) {
2525	empty = 0;
2526	full = 0;
2527	/*
2528	* Search the available kegs for slabs. Be careful to hold the
2529	* correct lock while calling into the keg layer.
2530	*/
2531	LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
2532	keg = klink->kl_keg;
2533	KEG_LOCK(keg);
2534	if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
2535	slab = keg_fetch_slab(keg, zone, flags);
2536	if (slab)
2537	return (slab);
2538	}
2539	if (keg->uk_flags & UMA_ZFLAG_FULL)
2540	full++;
2541	else
2542	empty++;
2543	KEG_UNLOCK(keg);
2544	}
2545	if (rflags & (M_NOWAIT \| M_NOVM))
2546	break;
2547	flags = rflags;
2548	/*
2549	* All kegs are full. XXX We can't atomically check all kegs
2550	* and sleep so just sleep for a short period and retry.
2551	*/
2552	if (full && !empty) {
2553	ZONE_LOCK(zone);
2554	zone->uz_flags \|= UMA_ZFLAG_FULL;
2555	zone->uz_sleeps++;
2556	zone_log_warning(zone);
2557	zone_maxaction(zone);
2558	msleep(zone, zone->uz_lockptr, PVM,
2559	"zonelimit", hz/100);
2560	zone->uz_flags &= ~UMA_ZFLAG_FULL;
2561	ZONE_UNLOCK(zone);
2562	continue;
2563	}
2564	}
2565	return (NULL);
2566	}
2567	#endif /* __rtems__ */
2568
2569	static void *
2570	slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
2571	{
2572	void *item;
2573	uint8_t freei;
2574
2575	MPASS(keg == slab->us_keg);
2576	mtx_assert(&keg->uk_lock, MA_OWNED);
2577
2578	freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1;
2579	BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free);
2580	item = slab->us_data + (keg->uk_rsize * freei);
2581	slab->us_freecount--;
2582	keg->uk_free--;
2583
2584	/* Move this slab to the full list */
2585	if (slab->us_freecount == 0) {
2586	LIST_REMOVE(slab, us_link);
2587	LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2588	}
2589
2590	return (item);
2591	}
2592
2593	static int
2594	zone_import(uma_zone_t zone, void **bucket, int max, int flags)
2595	{
2596	uma_slab_t slab;
2597	uma_keg_t keg;
2598	int i;
2599
2600	slab = NULL;
2601	keg = NULL;
2602	/* Try to keep the buckets totally full */
2603	for (i = 0; i < max; ) {
2604	if ((slab = zone->uz_slab(zone, keg, flags)) == NULL)
2605	break;
2606	keg = slab->us_keg;
2607	while (slab->us_freecount && i < max) {
2608	bucket[i++] = slab_alloc_item(keg, slab);
2609	if (keg->uk_free <= keg->uk_reserve)
2610	break;
2611	}
2612	/* Don't grab more than one slab at a time. */
2613	flags &= ~M_WAITOK;
2614	flags \|= M_NOWAIT;
2615	}
2616	if (slab != NULL)
2617	KEG_UNLOCK(keg);
2618
2619	return i;
2620	}
2621
2622	static uma_bucket_t
2623	zone_alloc_bucket(uma_zone_t zone, void *udata, int flags)
2624	{
2625	uma_bucket_t bucket;
2626	int max;
2627
2628	/* Don't wait for buckets, preserve caller's NOVM setting. */
2629	bucket = bucket_alloc(zone, udata, M_NOWAIT \| (flags & M_NOVM));
2630	if (bucket == NULL)
2631	return (NULL);
2632
2633	max = MIN(bucket->ub_entries, zone->uz_count);
2634	bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
2635	max, flags);
2636
2637	/*
2638	* Initialize the memory if necessary.
2639	*/
2640	if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
2641	int i;
2642
2643	for (i = 0; i < bucket->ub_cnt; i++)
2644	if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
2645	flags) != 0)
2646	break;
2647	/*
2648	* If we couldn't initialize the whole bucket, put the
2649	* rest back onto the freelist.
2650	*/
2651	if (i != bucket->ub_cnt) {
2652	zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
2653	bucket->ub_cnt - i);
2654	#ifdef INVARIANTS
2655	bzero(&bucket->ub_bucket[i],
2656	sizeof(void ) (bucket->ub_cnt - i));
2657	#endif
2658	bucket->ub_cnt = i;
2659	}
2660	}
2661
2662	if (bucket->ub_cnt == 0) {
2663	bucket_free(zone, bucket, udata);
2664	atomic_add_long(&zone->uz_fails, 1);
2665	return (NULL);
2666	}
2667
2668	return (bucket);
2669	}
2670
2671	/*
2672	* Allocates a single item from a zone.
2673	*
2674	* Arguments
2675	* zone The zone to alloc for.
2676	* udata The data to be passed to the constructor.
2677	* flags M_WAITOK, M_NOWAIT, M_ZERO.
2678	*
2679	* Returns
2680	* NULL if there is no memory and M_NOWAIT is set
2681	* An item if successful
2682	*/
2683
2684	static void *
2685	zone_alloc_item(uma_zone_t zone, void *udata, int flags)
2686	{
2687	void *item;
2688
2689	item = NULL;
2690
2691	#ifdef UMA_DEBUG_ALLOC
2692	printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2693	#endif
2694	if (zone->uz_import(zone->uz_arg, &item, 1, flags) != 1)
2695	goto fail;
2696	atomic_add_long(&zone->uz_allocs, 1);
2697
2698	/*
2699	* We have to call both the zone's init (not the keg's init)
2700	* and the zone's ctor. This is because the item is going from
2701	* a keg slab directly to the user, and the user is expecting it
2702	* to be both zone-init'd as well as zone-ctor'd.
2703	*/
2704	if (zone->uz_init != NULL) {
2705	if (zone->uz_init(item, zone->uz_size, flags) != 0) {
2706	zone_free_item(zone, item, udata, SKIP_FINI);
2707	goto fail;
2708	}
2709	}
2710	if (zone->uz_ctor != NULL) {
2711	if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2712	zone_free_item(zone, item, udata, SKIP_DTOR);
2713	goto fail;
2714	}
2715	}
2716	#ifdef INVARIANTS
2717	uma_dbg_alloc(zone, NULL, item);
2718	#endif
2719	if (flags & M_ZERO)
2720	uma_zero_item(item, zone);
2721
2722	return (item);
2723
2724	fail:
2725	atomic_add_long(&zone->uz_fails, 1);
2726	return (NULL);
2727	}
2728
2729	/* See uma.h */
2730	void
2731	uma_zfree_arg(uma_zone_t zone, void item, void udata)
2732	{
2733	uma_cache_t cache;
2734	uma_bucket_t bucket;
2735	int lockfail;
2736	int cpu;
2737
2738	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
2739	random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
2740
2741	#ifdef UMA_DEBUG_ALLOC_1
2742	printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2743	#endif
2744	CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2745	zone->uz_name);
2746
2747	#ifndef __rtems__
2748	KASSERT(curthread->td_critnest == 0 \|\| SCHEDULER_STOPPED(),
2749	("uma_zfree_arg: called with spinlock or critical section held"));
2750	#endif /* __rtems__ */
2751
2752	/* uma_zfree(..., NULL) does nothing, to match free(9). */
2753	if (item == NULL)
2754	return;
2755	#ifdef DEBUG_MEMGUARD
2756	if (is_memguard_addr(item)) {
2757	if (zone->uz_dtor != NULL)
2758	zone->uz_dtor(item, zone->uz_size, udata);
2759	if (zone->uz_fini != NULL)
2760	zone->uz_fini(item, zone->uz_size);
2761	memguard_free(item);
2762	return;
2763	}
2764	#endif
2765	#ifdef INVARIANTS
2766	if (zone->uz_flags & UMA_ZONE_MALLOC)
2767	uma_dbg_free(zone, udata, item);
2768	else
2769	uma_dbg_free(zone, NULL, item);
2770	#endif
2771	if (zone->uz_dtor != NULL)
2772	zone->uz_dtor(item, zone->uz_size, udata);
2773
2774	/*
2775	* The race here is acceptable. If we miss it we'll just have to wait
2776	* a little longer for the limits to be reset.
2777	*/
2778	if (zone->uz_flags & UMA_ZFLAG_FULL)
2779	goto zfree_item;
2780
2781	/*
2782	* If possible, free to the per-CPU cache. There are two
2783	* requirements for safe access to the per-CPU cache: (1) the thread
2784	* accessing the cache must not be preempted or yield during access,
2785	* and (2) the thread must not migrate CPUs without switching which
2786	* cache it accesses. We rely on a critical section to prevent
2787	* preemption and migration. We release the critical section in
2788	* order to acquire the zone mutex if we are unable to free to the
2789	* current cache; when we re-acquire the critical section, we must
2790	* detect and handle migration if it has occurred.
2791	*/
2792	zfree_restart:
2793	critical_enter();
2794	cpu = curcpu;
2795	cache = &zone->uz_cpu[cpu];
2796
2797	zfree_start:
2798	/*
2799	* Try to free into the allocbucket first to give LIFO ordering
2800	* for cache-hot datastructures. Spill over into the freebucket
2801	* if necessary. Alloc will swap them if one runs dry.
2802	*/
2803	bucket = cache->uc_allocbucket;
2804	if (bucket == NULL \|\| bucket->ub_cnt >= bucket->ub_entries)
2805	bucket = cache->uc_freebucket;
2806	if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
2807	KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2808	("uma_zfree: Freeing to non free bucket index."));
2809	bucket->ub_bucket[bucket->ub_cnt] = item;
2810	bucket->ub_cnt++;
2811	cache->uc_frees++;
2812	critical_exit();
2813	return;
2814	}
2815
2816	/*
2817	* We must go back the zone, which requires acquiring the zone lock,
2818	* which in turn means we must release and re-acquire the critical
2819	* section. Since the critical section is released, we may be
2820	* preempted or migrate. As such, make sure not to maintain any
2821	* thread-local state specific to the cache from prior to releasing
2822	* the critical section.
2823	*/
2824	critical_exit();
2825	if (zone->uz_count == 0 \|\| bucketdisable)
2826	goto zfree_item;
2827
2828	lockfail = 0;
2829	if (ZONE_TRYLOCK(zone) == 0) {
2830	/* Record contention to size the buckets. */
2831	ZONE_LOCK(zone);
2832	lockfail = 1;
2833	}
2834	critical_enter();
2835	cpu = curcpu;
2836	cache = &zone->uz_cpu[cpu];
2837
2838	/*
2839	* Since we have locked the zone we may as well send back our stats.
2840	*/
2841	atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
2842	atomic_add_long(&zone->uz_frees, cache->uc_frees);
2843	cache->uc_allocs = 0;
2844	cache->uc_frees = 0;
2845
2846	bucket = cache->uc_freebucket;
2847	if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
2848	ZONE_UNLOCK(zone);
2849	goto zfree_start;
2850	}
2851	cache->uc_freebucket = NULL;
2852	/* We are no longer associated with this CPU. */
2853	critical_exit();
2854
2855	/* Can we throw this on the zone full list? */
2856	if (bucket != NULL) {
2857	#ifdef UMA_DEBUG_ALLOC
2858	printf("uma_zfree: Putting old bucket on the free list.\n");
2859	#endif
2860	/* ub_cnt is pointing to the last free item */
2861	KASSERT(bucket->ub_cnt != 0,
2862	("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2863	LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
2864	}
2865
2866	/*
2867	* We bump the uz count when the cache size is insufficient to
2868	* handle the working set.
2869	*/
2870	if (lockfail && zone->uz_count < BUCKET_MAX)
2871	zone->uz_count++;
2872	ZONE_UNLOCK(zone);
2873
2874	#ifdef UMA_DEBUG_ALLOC
2875	printf("uma_zfree: Allocating new free bucket.\n");
2876	#endif
2877	bucket = bucket_alloc(zone, udata, M_NOWAIT);
2878	if (bucket) {
2879	critical_enter();
2880	cpu = curcpu;
2881	cache = &zone->uz_cpu[cpu];
2882	if (cache->uc_freebucket == NULL) {
2883	cache->uc_freebucket = bucket;
2884	goto zfree_start;
2885	}
2886	/*
2887	* We lost the race, start over. We have to drop our
2888	* critical section to free the bucket.
2889	*/
2890	critical_exit();
2891	bucket_free(zone, bucket, udata);
2892	goto zfree_restart;
2893	}
2894
2895	/*
2896	* If nothing else caught this, we'll just do an internal free.
2897	*/
2898	zfree_item:
2899	zone_free_item(zone, item, udata, SKIP_DTOR);
2900
2901	return;
2902	}
2903
2904	static void
2905	slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item)
2906	{
2907	uint8_t freei;
2908
2909	mtx_assert(&keg->uk_lock, MA_OWNED);
2910	MPASS(keg == slab->us_keg);
2911
2912	/* Do we need to remove from any lists? */
2913	if (slab->us_freecount+1 == keg->uk_ipers) {
2914	LIST_REMOVE(slab, us_link);
2915	LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2916	} else if (slab->us_freecount == 0) {
2917	LIST_REMOVE(slab, us_link);
2918	LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2919	}
2920
2921	/* Slab management. */
2922	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
2923	BIT_SET(SLAB_SETSIZE, freei, &slab->us_free);
2924	slab->us_freecount++;
2925
2926	/* Keg statistics. */
2927	keg->uk_free++;
2928	}
2929
2930	static void
2931	zone_release(uma_zone_t zone, void **bucket, int cnt)
2932	{
2933	void *item;
2934	uma_slab_t slab;
2935	uma_keg_t keg;
2936	uint8_t *mem;
2937	int clearfull;
2938	int i;
2939
2940	clearfull = 0;
2941	keg = zone_first_keg(zone);
2942	KEG_LOCK(keg);
2943	for (i = 0; i < cnt; i++) {
2944	item = bucket[i];
2945	if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
2946	mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
2947	if (zone->uz_flags & UMA_ZONE_HASH) {
2948	slab = hash_sfind(&keg->uk_hash, mem);
2949	} else {
2950	mem += keg->uk_pgoff;
2951	slab = (uma_slab_t)mem;
2952	}
2953	} else {
2954	slab = vtoslab((vm_offset_t)item);
2955	if (slab->us_keg != keg) {
2956	KEG_UNLOCK(keg);
2957	keg = slab->us_keg;
2958	KEG_LOCK(keg);
2959	}
2960	}
2961	slab_free_item(keg, slab, item);
2962	if (keg->uk_flags & UMA_ZFLAG_FULL) {
2963	if (keg->uk_pages < keg->uk_maxpages) {
2964	keg->uk_flags &= ~UMA_ZFLAG_FULL;
2965	clearfull = 1;
2966	}
2967
2968	/*
2969	* We can handle one more allocation. Since we're
2970	* clearing ZFLAG_FULL, wake up all procs blocked
2971	* on pages. This should be uncommon, so keeping this
2972	* simple for now (rather than adding count of blocked
2973	* threads etc).
2974	*/
2975	wakeup(keg);
2976	}
2977	}
2978	KEG_UNLOCK(keg);
2979	if (clearfull) {
2980	ZONE_LOCK(zone);
2981	zone->uz_flags &= ~UMA_ZFLAG_FULL;
2982	wakeup(zone);
2983	ZONE_UNLOCK(zone);
2984	}
2985
2986	}
2987
2988	/*
2989	* Frees a single item to any zone.
2990	*
2991	* Arguments:
2992	* zone The zone to free to
2993	* item The item we're freeing
2994	* udata User supplied data for the dtor
2995	* skip Skip dtors and finis
2996	*/
2997	static void
2998	zone_free_item(uma_zone_t zone, void item, void udata, enum zfreeskip skip)
2999	{
3000
3001	#ifdef INVARIANTS
3002	if (skip == SKIP_NONE) {
3003	if (zone->uz_flags & UMA_ZONE_MALLOC)
3004	uma_dbg_free(zone, udata, item);
3005	else
3006	uma_dbg_free(zone, NULL, item);
3007	}
3008	#endif
3009	if (skip < SKIP_DTOR && zone->uz_dtor)
3010	zone->uz_dtor(item, zone->uz_size, udata);
3011
3012	if (skip < SKIP_FINI && zone->uz_fini)
3013	zone->uz_fini(item, zone->uz_size);
3014
3015	atomic_add_long(&zone->uz_frees, 1);
3016	zone->uz_release(zone->uz_arg, &item, 1);
3017	}
3018
3019	/* See uma.h */
3020	int
3021	uma_zone_set_max(uma_zone_t zone, int nitems)
3022	{
3023	uma_keg_t keg;
3024
3025	keg = zone_first_keg(zone);
3026	if (keg == NULL)
3027	return (0);
3028	KEG_LOCK(keg);
3029	#ifdef __rtems__
3030	#ifdef SMP
3031	/*
3032	* Ensure we have enough items to fill the per-processor caches. This
3033	* is a heuristic approach and works not under all conditions.
3034	*/
3035	nitems += 2 * BUCKET_MAX * (mp_maxid + 1);
3036	#endif
3037	#endif /* __rtems__ */
3038	keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
3039	if (keg->uk_maxpages * keg->uk_ipers < nitems)
3040	keg->uk_maxpages += keg->uk_ppera;
3041	nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
3042	KEG_UNLOCK(keg);
3043
3044	return (nitems);
3045	}
3046
3047	/* See uma.h */
3048	int
3049	uma_zone_get_max(uma_zone_t zone)
3050	{
3051	int nitems;
3052	uma_keg_t keg;
3053
3054	keg = zone_first_keg(zone);
3055	if (keg == NULL)
3056	return (0);
3057	KEG_LOCK(keg);
3058	nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
3059	KEG_UNLOCK(keg);
3060
3061	return (nitems);
3062	}
3063
3064	/* See uma.h */
3065	void
3066	uma_zone_set_warning(uma_zone_t zone, const char *warning)
3067	{
3068
3069	ZONE_LOCK(zone);
3070	zone->uz_warning = warning;
3071	ZONE_UNLOCK(zone);
3072	}
3073
3074	/* See uma.h */
3075	void
3076	uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction)
3077	{
3078
3079	ZONE_LOCK(zone);
3080	TASK_INIT(&zone->uz_maxaction, 0, (task_fn_t *)maxaction, zone);
3081	ZONE_UNLOCK(zone);
3082	}
3083
3084	/* See uma.h */
3085	int
3086	uma_zone_get_cur(uma_zone_t zone)
3087	{
3088	int64_t nitems;
3089	u_int i;
3090
3091	ZONE_LOCK(zone);
3092	nitems = zone->uz_allocs - zone->uz_frees;
3093	CPU_FOREACH(i) {
3094	/*
3095	* See the comment in sysctl_vm_zone_stats() regarding the
3096	* safety of accessing the per-cpu caches. With the zone lock
3097	* held, it is safe, but can potentially result in stale data.
3098	*/
3099	nitems += zone->uz_cpu[i].uc_allocs -
3100	zone->uz_cpu[i].uc_frees;
3101	}
3102	ZONE_UNLOCK(zone);
3103
3104	return (nitems < 0 ? 0 : nitems);
3105	}
3106
3107	/* See uma.h */
3108	void
3109	uma_zone_set_init(uma_zone_t zone, uma_init uminit)
3110	{
3111	uma_keg_t keg;
3112
3113	keg = zone_first_keg(zone);
3114	KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
3115	KEG_LOCK(keg);
3116	KASSERT(keg->uk_pages == 0,
3117	("uma_zone_set_init on non-empty keg"));
3118	keg->uk_init = uminit;
3119	KEG_UNLOCK(keg);
3120	}
3121
3122	/* See uma.h */
3123	void
3124	uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
3125	{
3126	uma_keg_t keg;
3127
3128	keg = zone_first_keg(zone);
3129	KASSERT(keg != NULL, ("uma_zone_set_fini: Invalid zone type"));
3130	KEG_LOCK(keg);
3131	KASSERT(keg->uk_pages == 0,
3132	("uma_zone_set_fini on non-empty keg"));
3133	keg->uk_fini = fini;
3134	KEG_UNLOCK(keg);
3135	}
3136
3137	/* See uma.h */
3138	void
3139	uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
3140	{
3141
3142	ZONE_LOCK(zone);
3143	KASSERT(zone_first_keg(zone)->uk_pages == 0,
3144	("uma_zone_set_zinit on non-empty keg"));
3145	zone->uz_init = zinit;
3146	ZONE_UNLOCK(zone);
3147	}
3148
3149	/* See uma.h */
3150	void
3151	uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
3152	{
3153
3154	ZONE_LOCK(zone);
3155	KASSERT(zone_first_keg(zone)->uk_pages == 0,
3156	("uma_zone_set_zfini on non-empty keg"));
3157	zone->uz_fini = zfini;
3158	ZONE_UNLOCK(zone);
3159	}
3160
3161	/* See uma.h */
3162	/* XXX uk_freef is not actually used with the zone locked */
3163	void
3164	uma_zone_set_freef(uma_zone_t zone, uma_free freef)
3165	{
3166	uma_keg_t keg;
3167
3168	keg = zone_first_keg(zone);
3169	KASSERT(keg != NULL, ("uma_zone_set_freef: Invalid zone type"));
3170	KEG_LOCK(keg);
3171	keg->uk_freef = freef;
3172	KEG_UNLOCK(keg);
3173	}
3174
3175	/* See uma.h */
3176	/* XXX uk_allocf is not actually used with the zone locked */
3177	void
3178	uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
3179	{
3180	uma_keg_t keg;
3181
3182	keg = zone_first_keg(zone);
3183	KEG_LOCK(keg);
3184	keg->uk_allocf = allocf;
3185	KEG_UNLOCK(keg);
3186	}
3187
3188	/* See uma.h */
3189	void
3190	uma_zone_reserve(uma_zone_t zone, int items)
3191	{
3192	uma_keg_t keg;
3193
3194	keg = zone_first_keg(zone);
3195	if (keg == NULL)
3196	return;
3197	KEG_LOCK(keg);
3198	keg->uk_reserve = items;
3199	KEG_UNLOCK(keg);
3200
3201	return;
3202	}
3203
3204	#ifndef __rtems__
3205	/* See uma.h */
3206	int
3207	uma_zone_reserve_kva(uma_zone_t zone, int count)
3208	{
3209	uma_keg_t keg;
3210	vm_offset_t kva;
3211	u_int pages;
3212
3213	keg = zone_first_keg(zone);
3214	if (keg == NULL)
3215	return (0);
3216	pages = count / keg->uk_ipers;
3217
3218	if (pages * keg->uk_ipers < count)
3219	pages++;
3220	pages *= keg->uk_ppera;
3221
3222	#ifdef UMA_MD_SMALL_ALLOC
3223	if (keg->uk_ppera > 1) {
3224	#else
3225	if (1) {
3226	#endif
3227	kva = kva_alloc((vm_size_t)pages * PAGE_SIZE);
3228	if (kva == 0)
3229	return (0);
3230	} else
3231	kva = 0;
3232	KEG_LOCK(keg);
3233	keg->uk_kva = kva;
3234	keg->uk_offset = 0;
3235	keg->uk_maxpages = pages;
3236	#ifdef UMA_MD_SMALL_ALLOC
3237	keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
3238	#else
3239	keg->uk_allocf = noobj_alloc;
3240	#endif
3241	keg->uk_flags \|= UMA_ZONE_NOFREE;
3242	KEG_UNLOCK(keg);
3243
3244	return (1);
3245	}
3246
3247	/* See uma.h */
3248	void
3249	uma_prealloc(uma_zone_t zone, int items)
3250	{
3251	int slabs;
3252	uma_slab_t slab;
3253	uma_keg_t keg;
3254
3255	keg = zone_first_keg(zone);
3256	if (keg == NULL)
3257	return;
3258	KEG_LOCK(keg);
3259	slabs = items / keg->uk_ipers;
3260	if (slabs * keg->uk_ipers < items)
3261	slabs++;
3262	while (slabs > 0) {
3263	slab = keg_alloc_slab(keg, zone, M_WAITOK);
3264	if (slab == NULL)
3265	break;
3266	MPASS(slab->us_keg == keg);
3267	LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
3268	slabs--;
3269	}
3270	KEG_UNLOCK(keg);
3271	}
3272	#endif /* __rtems__ */
3273
3274	/* See uma.h */
3275	static void
3276	uma_reclaim_locked(bool kmem_danger)
3277	{
3278
3279	#ifdef UMA_DEBUG
3280	printf("UMA: vm asked us to release pages!\n");
3281	#endif
3282	sx_assert(&uma_drain_lock, SA_XLOCKED);
3283	bucket_enable();
3284	zone_foreach(zone_drain);
3285	#ifndef __rtems__
3286	if (vm_page_count_min() \|\| kmem_danger) {
3287	cache_drain_safe(NULL);
3288	zone_foreach(zone_drain);
3289	}
3290	#endif /* __rtems__ */
3291	/*
3292	* Some slabs may have been freed but this zone will be visited early
3293	* we visit again so that we can free pages that are empty once other
3294	* zones are drained. We have to do the same for buckets.
3295	*/
3296	zone_drain(slabzone);
3297	bucket_zone_drain();
3298	}
3299
3300	void
3301	uma_reclaim(void)
3302	{
3303
3304	sx_xlock(&uma_drain_lock);
3305	uma_reclaim_locked(false);
3306	sx_xunlock(&uma_drain_lock);
3307	}
3308
3309	static int uma_reclaim_needed;
3310
3311	void
3312	uma_reclaim_wakeup(void)
3313	{
3314
3315	uma_reclaim_needed = 1;
3316	wakeup(&uma_reclaim_needed);
3317	}
3318
3319	void
3320	uma_reclaim_worker(void *arg __unused)
3321	{
3322
3323	sx_xlock(&uma_drain_lock);
3324	for (;;) {
3325	sx_sleep(&uma_reclaim_needed, &uma_drain_lock, PVM,
3326	"umarcl", 0);
3327	if (uma_reclaim_needed) {
3328	uma_reclaim_needed = 0;
3329	#ifndef __rtems__
3330	sx_xunlock(&uma_drain_lock);
3331	EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM);
3332	sx_xlock(&uma_drain_lock);
3333	#endif /* __rtems__ */
3334	uma_reclaim_locked(true);
3335	}
3336	}
3337	}
3338
3339	/* See uma.h */
3340	int
3341	uma_zone_exhausted(uma_zone_t zone)
3342	{
3343	int full;
3344
3345	ZONE_LOCK(zone);
3346	full = (zone->uz_flags & UMA_ZFLAG_FULL);
3347	ZONE_UNLOCK(zone);
3348	return (full);
3349	}
3350
3351	int
3352	uma_zone_exhausted_nolock(uma_zone_t zone)
3353	{
3354	return (zone->uz_flags & UMA_ZFLAG_FULL);
3355	}
3356
3357	#ifndef __rtems__
3358	void *
3359	uma_large_malloc(vm_size_t size, int wait)
3360	{
3361	void *mem;
3362	uma_slab_t slab;
3363	uint8_t flags;
3364
3365	slab = zone_alloc_item(slabzone, NULL, wait);
3366	if (slab == NULL)
3367	return (NULL);
3368	mem = page_alloc(NULL, size, &flags, wait);
3369	if (mem) {
3370	vsetslab((vm_offset_t)mem, slab);
3371	slab->us_data = mem;
3372	slab->us_flags = flags \| UMA_SLAB_MALLOC;
3373	slab->us_size = size;
3374	} else {
3375	zone_free_item(slabzone, slab, NULL, SKIP_NONE);
3376	}
3377
3378	return (mem);
3379	}
3380
3381	void
3382	uma_large_free(uma_slab_t slab)
3383	{
3384
3385	page_free(slab->us_data, slab->us_size, slab->us_flags);
3386	zone_free_item(slabzone, slab, NULL, SKIP_NONE);
3387	}
3388	#endif /* __rtems__ */
3389
3390	static void
3391	uma_zero_item(void *item, uma_zone_t zone)
3392	{
3393	int i;
3394
3395	if (zone->uz_flags & UMA_ZONE_PCPU) {
3396	CPU_FOREACH(i)
3397	bzero(zpcpu_get_cpu(item, i), zone->uz_size);
3398	} else
3399	bzero(item, zone->uz_size);
3400	}
3401
3402	void
3403	uma_print_stats(void)
3404	{
3405	zone_foreach(uma_print_zone);
3406	}
3407
3408	static void
3409	slab_print(uma_slab_t slab)
3410	{
3411	printf("slab: keg %p, data %p, freecount %d\n",
3412	slab->us_keg, slab->us_data, slab->us_freecount);
3413	}
3414
3415	static void
3416	cache_print(uma_cache_t cache)
3417	{
3418	printf("alloc: %p(%d), free: %p(%d)\n",
3419	cache->uc_allocbucket,
3420	cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
3421	cache->uc_freebucket,
3422	cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
3423	}
3424
3425	static void
3426	uma_print_keg(uma_keg_t keg)
3427	{
3428	uma_slab_t slab;
3429
3430	printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
3431	"out %d free %d limit %d\n",
3432	keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
3433	keg->uk_ipers, keg->uk_ppera,
3434	(keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
3435	keg->uk_free, (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
3436	printf("Part slabs:\n");
3437	LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
3438	slab_print(slab);
3439	printf("Free slabs:\n");
3440	LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
3441	slab_print(slab);
3442	printf("Full slabs:\n");
3443	LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
3444	slab_print(slab);
3445	}
3446
3447	void
3448	uma_print_zone(uma_zone_t zone)
3449	{
3450	uma_cache_t cache;
3451	uma_klink_t kl;
3452	int i;
3453
3454	printf("zone: %s(%p) size %d flags %#x\n",
3455	zone->uz_name, zone, zone->uz_size, zone->uz_flags);
3456	LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
3457	uma_print_keg(kl->kl_keg);
3458	CPU_FOREACH(i) {
3459	cache = &zone->uz_cpu[i];
3460	printf("CPU %d Cache:\n", i);
3461	cache_print(cache);
3462	}
3463	}
3464
3465	#ifndef __rtems__
3466	#ifdef DDB
3467	/*
3468	* Generate statistics across both the zone and its per-cpu cache's. Return
3469	* desired statistics if the pointer is non-NULL for that statistic.
3470	*
3471	* Note: does not update the zone statistics, as it can't safely clear the
3472	* per-CPU cache statistic.
3473	*
3474	* XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
3475	* safe from off-CPU; we should modify the caches to track this information
3476	* directly so that we don't have to.
3477	*/
3478	static void
3479	uma_zone_sumstat(uma_zone_t z, int cachefreep, uint64_t allocsp,
3480	uint64_t freesp, uint64_t sleepsp)
3481	{
3482	uma_cache_t cache;
3483	uint64_t allocs, frees, sleeps;
3484	int cachefree, cpu;
3485
3486	allocs = frees = sleeps = 0;
3487	cachefree = 0;
3488	CPU_FOREACH(cpu) {
3489	cache = &z->uz_cpu[cpu];
3490	if (cache->uc_allocbucket != NULL)
3491	cachefree += cache->uc_allocbucket->ub_cnt;
3492	if (cache->uc_freebucket != NULL)
3493	cachefree += cache->uc_freebucket->ub_cnt;
3494	allocs += cache->uc_allocs;
3495	frees += cache->uc_frees;
3496	}
3497	allocs += z->uz_allocs;
3498	frees += z->uz_frees;
3499	sleeps += z->uz_sleeps;
3500	if (cachefreep != NULL)
3501	*cachefreep = cachefree;
3502	if (allocsp != NULL)
3503	*allocsp = allocs;
3504	if (freesp != NULL)
3505	*freesp = frees;
3506	if (sleepsp != NULL)
3507	*sleepsp = sleeps;
3508	}
3509	#endif /* DDB */
3510	#endif /* __rtems__ */
3511
3512	static int
3513	sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
3514	{
3515	uma_keg_t kz;
3516	uma_zone_t z;
3517	int count;
3518
3519	count = 0;
3520	rw_rlock(&uma_rwlock);
3521	LIST_FOREACH(kz, &uma_kegs, uk_link) {
3522	LIST_FOREACH(z, &kz->uk_zones, uz_link)
3523	count++;
3524	}
3525	rw_runlock(&uma_rwlock);
3526	return (sysctl_handle_int(oidp, &count, 0, req));
3527	}
3528
3529	static int
3530	sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
3531	{
3532	struct uma_stream_header ush;
3533	struct uma_type_header uth;
3534	struct uma_percpu_stat ups;
3535	uma_bucket_t bucket;
3536	struct sbuf sbuf;
3537	uma_cache_t cache;
3538	uma_klink_t kl;
3539	uma_keg_t kz;
3540	uma_zone_t z;
3541	uma_keg_t k;
3542	int count, error, i;
3543
3544	error = sysctl_wire_old_buffer(req, 0);
3545	if (error != 0)
3546	return (error);
3547	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
3548	sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL);
3549
3550	count = 0;
3551	rw_rlock(&uma_rwlock);
3552	LIST_FOREACH(kz, &uma_kegs, uk_link) {
3553	LIST_FOREACH(z, &kz->uk_zones, uz_link)
3554	count++;
3555	}
3556
3557	/*
3558	* Insert stream header.
3559	*/
3560	bzero(&ush, sizeof(ush));
3561	ush.ush_version = UMA_STREAM_VERSION;
3562	ush.ush_maxcpus = (mp_maxid + 1);
3563	ush.ush_count = count;
3564	(void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
3565
3566	LIST_FOREACH(kz, &uma_kegs, uk_link) {
3567	LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3568	bzero(&uth, sizeof(uth));
3569	ZONE_LOCK(z);
3570	strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
3571	uth.uth_align = kz->uk_align;
3572	uth.uth_size = kz->uk_size;
3573	uth.uth_rsize = kz->uk_rsize;
3574	LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
3575	k = kl->kl_keg;
3576	uth.uth_maxpages += k->uk_maxpages;
3577	uth.uth_pages += k->uk_pages;
3578	uth.uth_keg_free += k->uk_free;
3579	uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
3580	* k->uk_ipers;
3581	}
3582
3583	/*
3584	* A zone is secondary is it is not the first entry
3585	* on the keg's zone list.
3586	*/
3587	if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
3588	(LIST_FIRST(&kz->uk_zones) != z))
3589	uth.uth_zone_flags = UTH_ZONE_SECONDARY;
3590
3591	LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3592	uth.uth_zone_free += bucket->ub_cnt;
3593	uth.uth_allocs = z->uz_allocs;
3594	uth.uth_frees = z->uz_frees;
3595	uth.uth_fails = z->uz_fails;
3596	uth.uth_sleeps = z->uz_sleeps;
3597	(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
3598	/*
3599	* While it is not normally safe to access the cache
3600	* bucket pointers while not on the CPU that owns the
3601	* cache, we only allow the pointers to be exchanged
3602	* without the zone lock held, not invalidated, so
3603	* accept the possible race associated with bucket
3604	* exchange during monitoring.
3605	*/
3606	for (i = 0; i < (mp_maxid + 1); i++) {
3607	bzero(&ups, sizeof(ups));
3608	if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
3609	goto skip;
3610	if (CPU_ABSENT(i))
3611	goto skip;
3612	cache = &z->uz_cpu[i];
3613	if (cache->uc_allocbucket != NULL)
3614	ups.ups_cache_free +=
3615	cache->uc_allocbucket->ub_cnt;
3616	if (cache->uc_freebucket != NULL)
3617	ups.ups_cache_free +=
3618	cache->uc_freebucket->ub_cnt;
3619	ups.ups_allocs = cache->uc_allocs;
3620	ups.ups_frees = cache->uc_frees;
3621	skip:
3622	(void)sbuf_bcat(&sbuf, &ups, sizeof(ups));
3623	}
3624	ZONE_UNLOCK(z);
3625	}
3626	}
3627	rw_runlock(&uma_rwlock);
3628	error = sbuf_finish(&sbuf);
3629	sbuf_delete(&sbuf);
3630	return (error);
3631	}
3632
3633	int
3634	sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS)
3635	{
3636	uma_zone_t zone = (uma_zone_t )arg1;
3637	int error, max;
3638
3639	max = uma_zone_get_max(zone);
3640	error = sysctl_handle_int(oidp, &max, 0, req);
3641	if (error \|\| !req->newptr)
3642	return (error);
3643
3644	uma_zone_set_max(zone, max);
3645
3646	return (0);
3647	}
3648
3649	int
3650	sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS)
3651	{
3652	uma_zone_t zone = (uma_zone_t )arg1;
3653	int cur;
3654
3655	cur = uma_zone_get_cur(zone);
3656	return (sysctl_handle_int(oidp, &cur, 0, req));
3657	}
3658
3659	#ifdef INVARIANTS
3660	static uma_slab_t
3661	uma_dbg_getslab(uma_zone_t zone, void *item)
3662	{
3663	uma_slab_t slab;
3664	uma_keg_t keg;
3665	uint8_t *mem;
3666
3667	mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
3668	if (zone->uz_flags & UMA_ZONE_VTOSLAB) {
3669	slab = vtoslab((vm_offset_t)mem);
3670	} else {
3671	/*
3672	* It is safe to return the slab here even though the
3673	* zone is unlocked because the item's allocation state
3674	* essentially holds a reference.
3675	*/
3676	ZONE_LOCK(zone);
3677	keg = LIST_FIRST(&zone->uz_kegs)->kl_keg;
3678	if (keg->uk_flags & UMA_ZONE_HASH)
3679	slab = hash_sfind(&keg->uk_hash, mem);
3680	else
3681	slab = (uma_slab_t)(mem + keg->uk_pgoff);
3682	ZONE_UNLOCK(zone);
3683	}
3684
3685	return (slab);
3686	}
3687
3688	/*
3689	* Set up the slab's freei data such that uma_dbg_free can function.
3690	*
3691	*/
3692	static void
3693	uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
3694	{
3695	uma_keg_t keg;
3696	int freei;
3697
3698	if (zone_first_keg(zone) == NULL)
3699	return;
3700	if (slab == NULL) {
3701	slab = uma_dbg_getslab(zone, item);
3702	if (slab == NULL)
3703	panic("uma: item %p did not belong to zone %s\n",
3704	item, zone->uz_name);
3705	}
3706	keg = slab->us_keg;
3707	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
3708
3709	if (BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
3710	panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)\n",
3711	item, zone, zone->uz_name, slab, freei);
3712	BIT_SET_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
3713
3714	return;
3715	}
3716
3717	/*
3718	* Verifies freed addresses. Checks for alignment, valid slab membership
3719	* and duplicate frees.
3720	*
3721	*/
3722	static void
3723	uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
3724	{
3725	uma_keg_t keg;
3726	int freei;
3727
3728	if (zone_first_keg(zone) == NULL)
3729	return;
3730	if (slab == NULL) {
3731	slab = uma_dbg_getslab(zone, item);
3732	if (slab == NULL)
3733	panic("uma: Freed item %p did not belong to zone %s\n",
3734	item, zone->uz_name);
3735	}
3736	keg = slab->us_keg;
3737	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
3738
3739	if (freei >= keg->uk_ipers)
3740	panic("Invalid free of %p from zone %p(%s) slab %p(%d)\n",
3741	item, zone, zone->uz_name, slab, freei);
3742
3743	if (((freei * keg->uk_rsize) + slab->us_data) != item)
3744	panic("Unaligned free of %p from zone %p(%s) slab %p(%d)\n",
3745	item, zone, zone->uz_name, slab, freei);
3746
3747	if (!BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
3748	panic("Duplicate free of %p from zone %p(%s) slab %p(%d)\n",
3749	item, zone, zone->uz_name, slab, freei);
3750
3751	BIT_CLR_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
3752	}
3753	#endif /* INVARIANTS */
3754
3755	#ifndef __rtems__
3756	#ifdef DDB
3757	DB_SHOW_COMMAND(uma, db_show_uma)
3758	{
3759	uint64_t allocs, frees, sleeps;
3760	uma_bucket_t bucket;
3761	uma_keg_t kz;
3762	uma_zone_t z;
3763	int cachefree;
3764
3765	db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used",
3766	"Free", "Requests", "Sleeps", "Bucket");
3767	LIST_FOREACH(kz, &uma_kegs, uk_link) {
3768	LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3769	if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
3770	allocs = z->uz_allocs;
3771	frees = z->uz_frees;
3772	sleeps = z->uz_sleeps;
3773	cachefree = 0;
3774	} else
3775	uma_zone_sumstat(z, &cachefree, &allocs,
3776	&frees, &sleeps);
3777	if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
3778	(LIST_FIRST(&kz->uk_zones) != z)))
3779	cachefree += kz->uk_free;
3780	LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3781	cachefree += bucket->ub_cnt;
3782	db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n",
3783	z->uz_name, (uintmax_t)kz->uk_size,
3784	(intmax_t)(allocs - frees), cachefree,
3785	(uintmax_t)allocs, sleeps, z->uz_count);
3786	if (db_pager_quit)
3787	return;
3788	}
3789	}
3790	}
3791
3792	DB_SHOW_COMMAND(umacache, db_show_umacache)
3793	{
3794	uint64_t allocs, frees;
3795	uma_bucket_t bucket;
3796	uma_zone_t z;
3797	int cachefree;
3798
3799	db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
3800	"Requests", "Bucket");
3801	LIST_FOREACH(z, &uma_cachezones, uz_link) {
3802	uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL);
3803	LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3804	cachefree += bucket->ub_cnt;
3805	db_printf("%18s %8ju %8jd %8d %12ju %8u\n",
3806	z->uz_name, (uintmax_t)z->uz_size,
3807	(intmax_t)(allocs - frees), cachefree,
3808	(uintmax_t)allocs, z->uz_count);
3809	if (db_pager_quit)
3810	return;
3811	}
3812	}
3813	#endif /* DDB */
3814	#endif /* __rtems__ */

Note: See TracBrowser for help on using the repository browser.

Download in other formats: