Context Navigation

source: rtems-libbsd/freebsd/sys/vm/uma_core.c @ 5ede682

55-freebsd-126-freebsd-12

Last change on this file since 5ede682 was 5ede682, checked in by Sebastian Huber <sebastian.huber@…>, on 11/14/16 at 09:17:10
ZONE(9): Use recursive lock for the UMA drain
Property mode set to `100644`
File size: 91.4 KB

Line
1	#include <machine/rtems-bsd-kernel-space.h>
2
3	/*-
4	* Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
5	* Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
6	* Copyright (c) 2004-2006 Robert N. M. Watson
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions
11	* are met:
12	* 1. Redistributions of source code must retain the above copyright
13	* notice unmodified, this list of conditions, and the following
14	* disclaimer.
15	* 2. Redistributions in binary form must reproduce the above copyright
16	* notice, this list of conditions and the following disclaimer in the
17	* documentation and/or other materials provided with the distribution.
18	*
19	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29	*/
30
31	/*
32	* uma_core.c Implementation of the Universal Memory allocator
33	*
34	* This allocator is intended to replace the multitude of similar object caches
35	* in the standard FreeBSD kernel. The intent is to be flexible as well as
36	* efficient. A primary design goal is to return unused memory to the rest of
37	* the system. This will make the system as a whole more flexible due to the
38	* ability to move memory to subsystems which most need it instead of leaving
39	* pools of reserved memory unused.
40	*
41	* The basic ideas stem from similar slab/zone based allocators whose algorithms
42	* are well known.
43	*
44	*/
45
46	/*
47	* TODO:
48	* - Improve memory usage for large allocations
49	* - Investigate cache size adjustments
50	*/
51
52	#include <sys/cdefs.h>
53	__FBSDID("$FreeBSD$");
54
55	/* I should really use ktr.. */
56	/*
57	#define UMA_DEBUG 1
58	#define UMA_DEBUG_ALLOC 1
59	#define UMA_DEBUG_ALLOC_1 1
60	*/
61
62	#include <rtems/bsd/local/opt_ddb.h>
63	#include <rtems/bsd/local/opt_param.h>
64	#include <rtems/bsd/local/opt_vm.h>
65
66	#include <sys/param.h>
67	#include <sys/systm.h>
68	#include <sys/bitset.h>
69	#include <sys/eventhandler.h>
70	#include <sys/kernel.h>
71	#include <sys/types.h>
72	#include <sys/queue.h>
73	#include <sys/malloc.h>
74	#include <sys/ktr.h>
75	#include <sys/lock.h>
76	#include <sys/sysctl.h>
77	#include <sys/mutex.h>
78	#include <sys/proc.h>
79	#include <sys/random.h>
80	#include <sys/rwlock.h>
81	#include <sys/sbuf.h>
82	#include <sys/sched.h>
83	#include <sys/smp.h>
84	#include <sys/taskqueue.h>
85	#include <sys/vmmeter.h>
86
87	#include <vm/vm.h>
88	#include <vm/vm_object.h>
89	#include <vm/vm_page.h>
90	#include <vm/vm_pageout.h>
91	#include <vm/vm_param.h>
92	#include <vm/vm_map.h>
93	#include <vm/vm_kern.h>
94	#include <vm/vm_extern.h>
95	#include <vm/uma.h>
96	#include <vm/uma_int.h>
97	#include <vm/uma_dbg.h>
98
99	#include <ddb/ddb.h>
100	#ifdef __rtems__
101	#ifdef RTEMS_SMP
102	/*
103	* It is essential that we have a per-processor cache, otherwise the
104	* critical_enter()/critical_exit() protection would be insufficient.
105	*/
106	#undef curcpu
107	#define curcpu rtems_get_current_processor()
108	#undef mp_maxid
109	#define mp_maxid (rtems_get_processor_count() - 1)
110	#define SMP
111	#endif
112	#endif /* __rtems__ */
113
114	#ifdef DEBUG_MEMGUARD
115	#include <vm/memguard.h>
116	#endif
117
118	/*
119	* This is the zone and keg from which all zones are spawned. The idea is that
120	* even the zone & keg heads are allocated from the allocator, so we use the
121	* bss section to bootstrap us.
122	*/
123	static struct uma_keg masterkeg;
124	static struct uma_zone masterzone_k;
125	static struct uma_zone masterzone_z;
126	static uma_zone_t kegs = &masterzone_k;
127	static uma_zone_t zones = &masterzone_z;
128
129	/* This is the zone from which all of uma_slab_t's are allocated. */
130	static uma_zone_t slabzone;
131
132	/*
133	* The initial hash tables come out of this zone so they can be allocated
134	* prior to malloc coming up.
135	*/
136	static uma_zone_t hashzone;
137
138	/* The boot-time adjusted value for cache line alignment. */
139	int uma_align_cache = 64 - 1;
140
141	static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
142
143	#ifndef __rtems__
144	/*
145	* Are we allowed to allocate buckets?
146	*/
147	static int bucketdisable = 1;
148	#else /* __rtems__ */
149	#define bucketdisable 0
150	#endif /* __rtems__ */
151
152	/* Linked list of all kegs in the system */
153	static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
154
155	/* Linked list of all cache-only zones in the system */
156	static LIST_HEAD(,uma_zone) uma_cachezones =
157	LIST_HEAD_INITIALIZER(uma_cachezones);
158
159	/* This RW lock protects the keg list */
160	static struct rwlock_padalign uma_rwlock;
161
162	#ifndef __rtems__
163	/* Linked list of boot time pages */
164	static LIST_HEAD(,uma_slab) uma_boot_pages =
165	LIST_HEAD_INITIALIZER(uma_boot_pages);
166
167	/* This mutex protects the boot time pages list */
168	static struct mtx_padalign uma_boot_pages_mtx;
169	#endif /* __rtems__ */
170
171	static struct sx uma_drain_lock;
172
173	#ifndef __rtems__
174	/* Is the VM done starting up? */
175	static int booted = 0;
176	#define UMA_STARTUP 1
177	#define UMA_STARTUP2 2
178	#endif /* __rtems__ */
179
180	/*
181	* This is the handle used to schedule events that need to happen
182	* outside of the allocation fast path.
183	*/
184	static struct callout uma_callout;
185	#define UMA_TIMEOUT 20 /* Seconds for callout interval. */
186
187	/*
188	* This structure is passed as the zone ctor arg so that I don't have to create
189	* a special allocation function just for zones.
190	*/
191	struct uma_zctor_args {
192	const char *name;
193	size_t size;
194	uma_ctor ctor;
195	uma_dtor dtor;
196	uma_init uminit;
197	uma_fini fini;
198	uma_import import;
199	uma_release release;
200	void *arg;
201	uma_keg_t keg;
202	int align;
203	uint32_t flags;
204	};
205
206	struct uma_kctor_args {
207	uma_zone_t zone;
208	size_t size;
209	uma_init uminit;
210	uma_fini fini;
211	int align;
212	uint32_t flags;
213	};
214
215	struct uma_bucket_zone {
216	uma_zone_t ubz_zone;
217	char *ubz_name;
218	int ubz_entries; /* Number of items it can hold. */
219	int ubz_maxsize; /* Maximum allocation size per-item. */
220	};
221
222	/*
223	* Compute the actual number of bucket entries to pack them in power
224	* of two sizes for more efficient space utilization.
225	*/
226	#define BUCKET_SIZE(n) \
227	(((sizeof(void ) (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
228
229	#ifndef __rtems__
230	#define BUCKET_MAX BUCKET_SIZE(256)
231	#else /* __rtems__ */
232	#define BUCKET_MAX BUCKET_SIZE(128)
233	#endif /* __rtems__ */
234
235	struct uma_bucket_zone bucket_zones[] = {
236	{ NULL, "4 Bucket", BUCKET_SIZE(4), 4096 },
237	{ NULL, "6 Bucket", BUCKET_SIZE(6), 3072 },
238	{ NULL, "8 Bucket", BUCKET_SIZE(8), 2048 },
239	{ NULL, "12 Bucket", BUCKET_SIZE(12), 1536 },
240	{ NULL, "16 Bucket", BUCKET_SIZE(16), 1024 },
241	{ NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
242	{ NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
243	{ NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
244	#ifndef __rtems__
245	{ NULL, "256 Bucket", BUCKET_SIZE(256), 64 },
246	#endif /* __rtems__ */
247	{ NULL, NULL, 0}
248	};
249
250	/*
251	* Flags and enumerations to be passed to internal functions.
252	*/
253	enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI };
254
255	/* Prototypes.. */
256
257	#ifndef __rtems__
258	static void noobj_alloc(uma_zone_t, vm_size_t, uint8_t , int);
259	#endif /* __rtems__ */
260	static void page_alloc(uma_zone_t, vm_size_t, uint8_t , int);
261	#ifndef __rtems__
262	static void startup_alloc(uma_zone_t, vm_size_t, uint8_t , int);
263	#endif /* __rtems__ */
264	static void page_free(void *, vm_size_t, uint8_t);
265	static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
266	static void cache_drain(uma_zone_t);
267	static void bucket_drain(uma_zone_t, uma_bucket_t);
268	static void bucket_cache_drain(uma_zone_t zone);
269	static int keg_ctor(void , int, void , int);
270	static void keg_dtor(void , int, void );
271	static int zone_ctor(void , int, void , int);
272	static void zone_dtor(void , int, void );
273	static int zero_init(void *, int, int);
274	static void keg_small_init(uma_keg_t keg);
275	static void keg_large_init(uma_keg_t keg);
276	static void zone_foreach(void (*zfunc)(uma_zone_t));
277	static void zone_timeout(uma_zone_t zone);
278	static int hash_alloc(struct uma_hash *);
279	static int hash_expand(struct uma_hash , struct uma_hash );
280	static void hash_free(struct uma_hash *hash);
281	static void uma_timeout(void *);
282	static void uma_startup3(void);
283	static void zone_alloc_item(uma_zone_t, void , int);
284	static void zone_free_item(uma_zone_t, void , void , enum zfreeskip);
285	static void bucket_enable(void);
286	static void bucket_init(void);
287	static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
288	static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
289	static void bucket_zone_drain(void);
290	static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, void *, int flags);
291	static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
292	#ifndef __rtems__
293	static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
294	#endif /* __rtems__ */
295	static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
296	static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
297	static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
298	uma_fini fini, int align, uint32_t flags);
299	static int zone_import(uma_zone_t zone, void **bucket, int max, int flags);
300	static void zone_release(uma_zone_t zone, void **bucket, int cnt);
301	static void uma_zero_item(void *item, uma_zone_t zone);
302
303	void uma_print_zone(uma_zone_t);
304	void uma_print_stats(void);
305	static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
306	static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
307
308	#ifdef INVARIANTS
309	static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item);
310	static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item);
311	#endif
312
313	SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
314
315	SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD\|CTLTYPE_INT,
316	0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
317
318	SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD\|CTLTYPE_STRUCT,
319	0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
320
321	static int zone_warnings = 1;
322	SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0,
323	"Warn when UMA zones becomes full");
324
325	/*
326	* This routine checks to see whether or not it's safe to enable buckets.
327	*/
328	static void
329	bucket_enable(void)
330	{
331	#ifndef __rtems__
332	bucketdisable = vm_page_count_min();
333	#endif /* __rtems__ */
334	}
335
336	/*
337	* Initialize bucket_zones, the array of zones of buckets of various sizes.
338	*
339	* For each zone, calculate the memory required for each bucket, consisting
340	* of the header and an array of pointers.
341	*/
342	static void
343	bucket_init(void)
344	{
345	struct uma_bucket_zone *ubz;
346	int size;
347
348	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
349	size = roundup(sizeof(struct uma_bucket), sizeof(void *));
350	size += sizeof(void ) ubz->ubz_entries;
351	ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
352	NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
353	UMA_ZONE_MTXCLASS \| UMA_ZFLAG_BUCKET);
354	}
355	}
356
357	/*
358	* Given a desired number of entries for a bucket, return the zone from which
359	* to allocate the bucket.
360	*/
361	static struct uma_bucket_zone *
362	bucket_zone_lookup(int entries)
363	{
364	struct uma_bucket_zone *ubz;
365
366	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
367	if (ubz->ubz_entries >= entries)
368	return (ubz);
369	ubz--;
370	return (ubz);
371	}
372
373	static int
374	bucket_select(int size)
375	{
376	struct uma_bucket_zone *ubz;
377
378	ubz = &bucket_zones[0];
379	if (size > ubz->ubz_maxsize)
380	return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
381
382	for (; ubz->ubz_entries != 0; ubz++)
383	if (ubz->ubz_maxsize < size)
384	break;
385	ubz--;
386	return (ubz->ubz_entries);
387	}
388
389	static uma_bucket_t
390	bucket_alloc(uma_zone_t zone, void *udata, int flags)
391	{
392	struct uma_bucket_zone *ubz;
393	uma_bucket_t bucket;
394
395	#ifndef __rtems__
396	/*
397	* This is to stop us from allocating per cpu buckets while we're
398	* running out of vm.boot_pages. Otherwise, we would exhaust the
399	* boot pages. This also prevents us from allocating buckets in
400	* low memory situations.
401	*/
402	if (bucketdisable)
403	return (NULL);
404	#endif /* __rtems__ */
405	/*
406	* To limit bucket recursion we store the original zone flags
407	* in a cookie passed via zalloc_arg/zfree_arg. This allows the
408	* NOVM flag to persist even through deep recursions. We also
409	* store ZFLAG_BUCKET once we have recursed attempting to allocate
410	* a bucket for a bucket zone so we do not allow infinite bucket
411	* recursion. This cookie will even persist to frees of unused
412	* buckets via the allocation path or bucket allocations in the
413	* free path.
414	*/
415	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
416	udata = (void *)(uintptr_t)zone->uz_flags;
417	else {
418	if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
419	return (NULL);
420	udata = (void *)((uintptr_t)udata \| UMA_ZFLAG_BUCKET);
421	}
422	if ((uintptr_t)udata & UMA_ZFLAG_CACHEONLY)
423	flags \|= M_NOVM;
424	ubz = bucket_zone_lookup(zone->uz_count);
425	if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0)
426	ubz++;
427	bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags);
428	if (bucket) {
429	#ifdef INVARIANTS
430	bzero(bucket->ub_bucket, sizeof(void ) ubz->ubz_entries);
431	#endif
432	bucket->ub_cnt = 0;
433	bucket->ub_entries = ubz->ubz_entries;
434	}
435
436	return (bucket);
437	}
438
439	static void
440	bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
441	{
442	struct uma_bucket_zone *ubz;
443
444	KASSERT(bucket->ub_cnt == 0,
445	("bucket_free: Freeing a non free bucket."));
446	if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
447	udata = (void *)(uintptr_t)zone->uz_flags;
448	ubz = bucket_zone_lookup(bucket->ub_entries);
449	uma_zfree_arg(ubz->ubz_zone, bucket, udata);
450	}
451
452	static void
453	bucket_zone_drain(void)
454	{
455	struct uma_bucket_zone *ubz;
456
457	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
458	zone_drain(ubz->ubz_zone);
459	}
460
461	static void
462	zone_log_warning(uma_zone_t zone)
463	{
464	static const struct timeval warninterval = { 300, 0 };
465
466	if (!zone_warnings \|\| zone->uz_warning == NULL)
467	return;
468
469	if (ratecheck(&zone->uz_ratecheck, &warninterval))
470	printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
471	}
472
473	static inline void
474	zone_maxaction(uma_zone_t zone)
475	{
476
477	if (zone->uz_maxaction.ta_func != NULL)
478	taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction);
479	}
480
481	static void
482	zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
483	{
484	uma_klink_t klink;
485
486	LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
487	kegfn(klink->kl_keg);
488	}
489
490	/*
491	* Routine called by timeout which is used to fire off some time interval
492	* based calculations. (stats, hash size, etc.)
493	*
494	* Arguments:
495	* arg Unused
496	*
497	* Returns:
498	* Nothing
499	*/
500	static void
501	uma_timeout(void *unused)
502	{
503	bucket_enable();
504	zone_foreach(zone_timeout);
505
506	/* Reschedule this event */
507	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
508	}
509
510	/*
511	* Routine to perform timeout driven calculations. This expands the
512	* hashes and does per cpu statistics aggregation.
513	*
514	* Returns nothing.
515	*/
516	static void
517	keg_timeout(uma_keg_t keg)
518	{
519
520	KEG_LOCK(keg);
521	/*
522	* Expand the keg hash table.
523	*
524	* This is done if the number of slabs is larger than the hash size.
525	* What I'm trying to do here is completely reduce collisions. This
526	* may be a little aggressive. Should I allow for two collisions max?
527	*/
528	if (keg->uk_flags & UMA_ZONE_HASH &&
529	keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
530	struct uma_hash newhash;
531	struct uma_hash oldhash;
532	int ret;
533
534	/*
535	* This is so involved because allocating and freeing
536	* while the keg lock is held will lead to deadlock.
537	* I have to do everything in stages and check for
538	* races.
539	*/
540	newhash = keg->uk_hash;
541	KEG_UNLOCK(keg);
542	ret = hash_alloc(&newhash);
543	KEG_LOCK(keg);
544	if (ret) {
545	if (hash_expand(&keg->uk_hash, &newhash)) {
546	oldhash = keg->uk_hash;
547	keg->uk_hash = newhash;
548	} else
549	oldhash = newhash;
550
551	KEG_UNLOCK(keg);
552	hash_free(&oldhash);
553	return;
554	}
555	}
556	KEG_UNLOCK(keg);
557	}
558
559	static void
560	zone_timeout(uma_zone_t zone)
561	{
562
563	zone_foreach_keg(zone, &keg_timeout);
564	}
565
566	/*
567	* Allocate and zero fill the next sized hash table from the appropriate
568	* backing store.
569	*
570	* Arguments:
571	* hash A new hash structure with the old hash size in uh_hashsize
572	*
573	* Returns:
574	* 1 on success and 0 on failure.
575	*/
576	static int
577	hash_alloc(struct uma_hash *hash)
578	{
579	int oldsize;
580	int alloc;
581
582	oldsize = hash->uh_hashsize;
583
584	/* We're just going to go to a power of two greater */
585	if (oldsize) {
586	hash->uh_hashsize = oldsize * 2;
587	alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
588	hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
589	M_UMAHASH, M_NOWAIT);
590	} else {
591	alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
592	hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
593	M_WAITOK);
594	hash->uh_hashsize = UMA_HASH_SIZE_INIT;
595	}
596	if (hash->uh_slab_hash) {
597	bzero(hash->uh_slab_hash, alloc);
598	hash->uh_hashmask = hash->uh_hashsize - 1;
599	return (1);
600	}
601
602	return (0);
603	}
604
605	/*
606	* Expands the hash table for HASH zones. This is done from zone_timeout
607	* to reduce collisions. This must not be done in the regular allocation
608	* path, otherwise, we can recurse on the vm while allocating pages.
609	*
610	* Arguments:
611	* oldhash The hash you want to expand
612	* newhash The hash structure for the new table
613	*
614	* Returns:
615	* Nothing
616	*
617	* Discussion:
618	*/
619	static int
620	hash_expand(struct uma_hash oldhash, struct uma_hash newhash)
621	{
622	uma_slab_t slab;
623	int hval;
624	int i;
625
626	if (!newhash->uh_slab_hash)
627	return (0);
628
629	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
630	return (0);
631
632	/*
633	* I need to investigate hash algorithms for resizing without a
634	* full rehash.
635	*/
636
637	for (i = 0; i < oldhash->uh_hashsize; i++)
638	while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
639	slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
640	SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
641	hval = UMA_HASH(newhash, slab->us_data);
642	SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
643	slab, us_hlink);
644	}
645
646	return (1);
647	}
648
649	/*
650	* Free the hash bucket to the appropriate backing store.
651	*
652	* Arguments:
653	* slab_hash The hash bucket we're freeing
654	* hashsize The number of entries in that hash bucket
655	*
656	* Returns:
657	* Nothing
658	*/
659	static void
660	hash_free(struct uma_hash *hash)
661	{
662	if (hash->uh_slab_hash == NULL)
663	return;
664	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
665	zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
666	else
667	free(hash->uh_slab_hash, M_UMAHASH);
668	}
669
670	/*
671	* Frees all outstanding items in a bucket
672	*
673	* Arguments:
674	* zone The zone to free to, must be unlocked.
675	* bucket The free/alloc bucket with items, cpu queue must be locked.
676	*
677	* Returns:
678	* Nothing
679	*/
680
681	static void
682	bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
683	{
684	int i;
685
686	if (bucket == NULL)
687	return;
688
689	if (zone->uz_fini)
690	for (i = 0; i < bucket->ub_cnt; i++)
691	zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
692	zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
693	bucket->ub_cnt = 0;
694	}
695
696	/*
697	* Drains the per cpu caches for a zone.
698	*
699	* NOTE: This may only be called while the zone is being turn down, and not
700	* during normal operation. This is necessary in order that we do not have
701	* to migrate CPUs to drain the per-CPU caches.
702	*
703	* Arguments:
704	* zone The zone to drain, must be unlocked.
705	*
706	* Returns:
707	* Nothing
708	*/
709	static void
710	cache_drain(uma_zone_t zone)
711	{
712	uma_cache_t cache;
713	int cpu;
714
715	/*
716	* XXX: It is safe to not lock the per-CPU caches, because we're
717	* tearing down the zone anyway. I.e., there will be no further use
718	* of the caches at this point.
719	*
720	* XXX: It would good to be able to assert that the zone is being
721	* torn down to prevent improper use of cache_drain().
722	*
723	* XXX: We lock the zone before passing into bucket_cache_drain() as
724	* it is used elsewhere. Should the tear-down path be made special
725	* there in some form?
726	*/
727	CPU_FOREACH(cpu) {
728	cache = &zone->uz_cpu[cpu];
729	bucket_drain(zone, cache->uc_allocbucket);
730	bucket_drain(zone, cache->uc_freebucket);
731	if (cache->uc_allocbucket != NULL)
732	bucket_free(zone, cache->uc_allocbucket, NULL);
733	if (cache->uc_freebucket != NULL)
734	bucket_free(zone, cache->uc_freebucket, NULL);
735	cache->uc_allocbucket = cache->uc_freebucket = NULL;
736	}
737	ZONE_LOCK(zone);
738	bucket_cache_drain(zone);
739	ZONE_UNLOCK(zone);
740	}
741
742	#ifndef __rtems__
743	static void
744	cache_shrink(uma_zone_t zone)
745	{
746
747	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
748	return;
749
750	ZONE_LOCK(zone);
751	zone->uz_count = (zone->uz_count_min + zone->uz_count) / 2;
752	ZONE_UNLOCK(zone);
753	}
754
755	static void
756	cache_drain_safe_cpu(uma_zone_t zone)
757	{
758	uma_cache_t cache;
759	uma_bucket_t b1, b2;
760
761	if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
762	return;
763
764	b1 = b2 = NULL;
765	ZONE_LOCK(zone);
766	critical_enter();
767	cache = &zone->uz_cpu[curcpu];
768	if (cache->uc_allocbucket) {
769	if (cache->uc_allocbucket->ub_cnt != 0)
770	LIST_INSERT_HEAD(&zone->uz_buckets,
771	cache->uc_allocbucket, ub_link);
772	else
773	b1 = cache->uc_allocbucket;
774	cache->uc_allocbucket = NULL;
775	}
776	if (cache->uc_freebucket) {
777	if (cache->uc_freebucket->ub_cnt != 0)
778	LIST_INSERT_HEAD(&zone->uz_buckets,
779	cache->uc_freebucket, ub_link);
780	else
781	b2 = cache->uc_freebucket;
782	cache->uc_freebucket = NULL;
783	}
784	critical_exit();
785	ZONE_UNLOCK(zone);
786	if (b1)
787	bucket_free(zone, b1, NULL);
788	if (b2)
789	bucket_free(zone, b2, NULL);
790	}
791
792	/*
793	* Safely drain per-CPU caches of a zone(s) to alloc bucket.
794	* This is an expensive call because it needs to bind to all CPUs
795	* one by one and enter a critical section on each of them in order
796	* to safely access their cache buckets.
797	* Zone lock must not be held on call this function.
798	*/
799	static void
800	cache_drain_safe(uma_zone_t zone)
801	{
802	int cpu;
803
804	/*
805	* Polite bucket sizes shrinking was not enouth, shrink aggressively.
806	*/
807	if (zone)
808	cache_shrink(zone);
809	else
810	zone_foreach(cache_shrink);
811
812	CPU_FOREACH(cpu) {
813	thread_lock(curthread);
814	sched_bind(curthread, cpu);
815	thread_unlock(curthread);
816
817	if (zone)
818	cache_drain_safe_cpu(zone);
819	else
820	zone_foreach(cache_drain_safe_cpu);
821	}
822	thread_lock(curthread);
823	sched_unbind(curthread);
824	thread_unlock(curthread);
825	}
826	#endif /* __rtems__ */
827
828	/*
829	* Drain the cached buckets from a zone. Expects a locked zone on entry.
830	*/
831	static void
832	bucket_cache_drain(uma_zone_t zone)
833	{
834	uma_bucket_t bucket;
835
836	/*
837	* Drain the bucket queues and free the buckets, we just keep two per
838	* cpu (alloc/free).
839	*/
840	while ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
841	LIST_REMOVE(bucket, ub_link);
842	ZONE_UNLOCK(zone);
843	bucket_drain(zone, bucket);
844	bucket_free(zone, bucket, NULL);
845	ZONE_LOCK(zone);
846	}
847
848	/*
849	* Shrink further bucket sizes. Price of single zone lock collision
850	* is probably lower then price of global cache drain.
851	*/
852	if (zone->uz_count > zone->uz_count_min)
853	zone->uz_count--;
854	}
855
856	static void
857	keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
858	{
859	uint8_t *mem;
860	int i;
861	uint8_t flags;
862
863	mem = slab->us_data;
864	flags = slab->us_flags;
865	i = start;
866	if (keg->uk_fini != NULL) {
867	for (i--; i > -1; i--)
868	keg->uk_fini(slab->us_data + (keg->uk_rsize * i),
869	keg->uk_size);
870	}
871	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
872	zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
873	#ifdef UMA_DEBUG
874	printf("%s: Returning %d bytes.\n", keg->uk_name,
875	PAGE_SIZE * keg->uk_ppera);
876	#endif
877	keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
878	}
879
880	/*
881	* Frees pages from a keg back to the system. This is done on demand from
882	* the pageout daemon.
883	*
884	* Returns nothing.
885	*/
886	static void
887	keg_drain(uma_keg_t keg)
888	{
889	struct slabhead freeslabs = { 0 };
890	uma_slab_t slab, tmp;
891
892	/*
893	* We don't want to take pages from statically allocated kegs at this
894	* time
895	*/
896	if (keg->uk_flags & UMA_ZONE_NOFREE \|\| keg->uk_freef == NULL)
897	return;
898
899	#ifdef UMA_DEBUG
900	printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
901	#endif
902	KEG_LOCK(keg);
903	if (keg->uk_free == 0)
904	goto finished;
905
906	LIST_FOREACH_SAFE(slab, &keg->uk_free_slab, us_link, tmp) {
907	#ifndef __rtems__
908	/* We have nowhere to free these to. */
909	if (slab->us_flags & UMA_SLAB_BOOT)
910	continue;
911	#endif /* __rtems__ */
912
913	LIST_REMOVE(slab, us_link);
914	keg->uk_pages -= keg->uk_ppera;
915	keg->uk_free -= keg->uk_ipers;
916
917	if (keg->uk_flags & UMA_ZONE_HASH)
918	UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
919
920	SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
921	}
922	finished:
923	KEG_UNLOCK(keg);
924
925	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
926	SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
927	keg_free_slab(keg, slab, keg->uk_ipers);
928	}
929	}
930
931	static void
932	zone_drain_wait(uma_zone_t zone, int waitok)
933	{
934
935	/*
936	* Set draining to interlock with zone_dtor() so we can release our
937	* locks as we go. Only dtor() should do a WAITOK call since it
938	* is the only call that knows the structure will still be available
939	* when it wakes up.
940	*/
941	ZONE_LOCK(zone);
942	while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
943	if (waitok == M_NOWAIT)
944	goto out;
945	msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
946	}
947	zone->uz_flags \|= UMA_ZFLAG_DRAINING;
948	bucket_cache_drain(zone);
949	ZONE_UNLOCK(zone);
950	/*
951	* The DRAINING flag protects us from being freed while
952	* we're running. Normally the uma_rwlock would protect us but we
953	* must be able to release and acquire the right lock for each keg.
954	*/
955	zone_foreach_keg(zone, &keg_drain);
956	ZONE_LOCK(zone);
957	zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
958	wakeup(zone);
959	out:
960	ZONE_UNLOCK(zone);
961	}
962
963	void
964	zone_drain(uma_zone_t zone)
965	{
966
967	zone_drain_wait(zone, M_NOWAIT);
968	}
969
970	/*
971	* Allocate a new slab for a keg. This does not insert the slab onto a list.
972	*
973	* Arguments:
974	* wait Shall we wait?
975	*
976	* Returns:
977	* The slab that was allocated or NULL if there is no memory and the
978	* caller specified M_NOWAIT.
979	*/
980	static uma_slab_t
981	keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
982	{
983	uma_alloc allocf;
984	uma_slab_t slab;
985	uint8_t *mem;
986	uint8_t flags;
987	int i;
988
989	mtx_assert(&keg->uk_lock, MA_OWNED);
990	slab = NULL;
991	mem = NULL;
992
993	#ifdef UMA_DEBUG
994	printf("alloc_slab: Allocating a new slab for %s\n", keg->uk_name);
995	#endif
996	allocf = keg->uk_allocf;
997	KEG_UNLOCK(keg);
998
999	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1000	slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
1001	if (slab == NULL)
1002	goto out;
1003	}
1004
1005	/*
1006	* This reproduces the old vm_zone behavior of zero filling pages the
1007	* first time they are added to a zone.
1008	*
1009	* Malloced items are zeroed in uma_zalloc.
1010	*/
1011
1012	if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1013	wait \|= M_ZERO;
1014	else
1015	wait &= ~M_ZERO;
1016
1017	if (keg->uk_flags & UMA_ZONE_NODUMP)
1018	wait \|= M_NODUMP;
1019
1020	/* zone is passed for legacy reasons. */
1021	mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait);
1022	if (mem == NULL) {
1023	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1024	zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
1025	slab = NULL;
1026	goto out;
1027	}
1028
1029	/* Point the slab into the allocated memory */
1030	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
1031	slab = (uma_slab_t )(mem + keg->uk_pgoff);
1032
1033	if (keg->uk_flags & UMA_ZONE_VTOSLAB)
1034	for (i = 0; i < keg->uk_ppera; i++)
1035	vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
1036
1037	slab->us_keg = keg;
1038	slab->us_data = mem;
1039	slab->us_freecount = keg->uk_ipers;
1040	slab->us_flags = flags;
1041	BIT_FILL(SLAB_SETSIZE, &slab->us_free);
1042	#ifdef INVARIANTS
1043	BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree);
1044	#endif
1045
1046	if (keg->uk_init != NULL) {
1047	for (i = 0; i < keg->uk_ipers; i++)
1048	if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
1049	keg->uk_size, wait) != 0)
1050	break;
1051	if (i != keg->uk_ipers) {
1052	keg_free_slab(keg, slab, i);
1053	slab = NULL;
1054	goto out;
1055	}
1056	}
1057	out:
1058	KEG_LOCK(keg);
1059
1060	if (slab != NULL) {
1061	if (keg->uk_flags & UMA_ZONE_HASH)
1062	UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
1063
1064	keg->uk_pages += keg->uk_ppera;
1065	keg->uk_free += keg->uk_ipers;
1066	}
1067
1068	return (slab);
1069	}
1070
1071	#ifndef __rtems__
1072	/*
1073	* This function is intended to be used early on in place of page_alloc() so
1074	* that we may use the boot time page cache to satisfy allocations before
1075	* the VM is ready.
1076	*/
1077	static void *
1078	startup_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
1079	{
1080	uma_keg_t keg;
1081	uma_slab_t tmps;
1082	int pages, check_pages;
1083
1084	keg = zone_first_keg(zone);
1085	pages = howmany(bytes, PAGE_SIZE);
1086	check_pages = pages - 1;
1087	KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
1088
1089	/*
1090	* Check our small startup cache to see if it has pages remaining.
1091	*/
1092	mtx_lock(&uma_boot_pages_mtx);
1093
1094	/* First check if we have enough room. */
1095	tmps = LIST_FIRST(&uma_boot_pages);
1096	while (tmps != NULL && check_pages-- > 0)
1097	tmps = LIST_NEXT(tmps, us_link);
1098	if (tmps != NULL) {
1099	/*
1100	* It's ok to lose tmps references. The last one will
1101	* have tmps->us_data pointing to the start address of
1102	* "pages" contiguous pages of memory.
1103	*/
1104	while (pages-- > 0) {
1105	tmps = LIST_FIRST(&uma_boot_pages);
1106	LIST_REMOVE(tmps, us_link);
1107	}
1108	mtx_unlock(&uma_boot_pages_mtx);
1109	*pflag = tmps->us_flags;
1110	return (tmps->us_data);
1111	}
1112	mtx_unlock(&uma_boot_pages_mtx);
1113	if (booted < UMA_STARTUP2)
1114	panic("UMA: Increase vm.boot_pages");
1115	/*
1116	* Now that we've booted reset these users to their real allocator.
1117	*/
1118	#ifdef UMA_MD_SMALL_ALLOC
1119	keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
1120	#else
1121	keg->uk_allocf = page_alloc;
1122	#endif
1123	return keg->uk_allocf(zone, bytes, pflag, wait);
1124	}
1125	#endif /* __rtems__ */
1126
1127	/*
1128	* Allocates a number of pages from the system
1129	*
1130	* Arguments:
1131	* bytes The number of bytes requested
1132	* wait Shall we wait?
1133	*
1134	* Returns:
1135	* A pointer to the alloced memory or possibly
1136	* NULL if M_NOWAIT is set.
1137	*/
1138	static void *
1139	page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
1140	{
1141	void p; / Returned page */
1142
1143	#ifndef __rtems__
1144	*pflag = UMA_SLAB_KMEM;
1145	p = (void *) kmem_malloc(kmem_arena, bytes, wait);
1146	#else /* __rtems__ */
1147	*pflag = 0;
1148	p = rtems_bsd_page_alloc(bytes, wait);
1149	#endif /* __rtems__ */
1150
1151	return (p);
1152	}
1153
1154	#ifndef __rtems__
1155	/*
1156	* Allocates a number of pages from within an object
1157	*
1158	* Arguments:
1159	* bytes The number of bytes requested
1160	* wait Shall we wait?
1161	*
1162	* Returns:
1163	* A pointer to the alloced memory or possibly
1164	* NULL if M_NOWAIT is set.
1165	*/
1166	static void *
1167	noobj_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait)
1168	{
1169	TAILQ_HEAD(, vm_page) alloctail;
1170	u_long npages;
1171	vm_offset_t retkva, zkva;
1172	vm_page_t p, p_next;
1173	uma_keg_t keg;
1174
1175	TAILQ_INIT(&alloctail);
1176	keg = zone_first_keg(zone);
1177
1178	npages = howmany(bytes, PAGE_SIZE);
1179	while (npages > 0) {
1180	p = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT \|
1181	VM_ALLOC_WIRED \| VM_ALLOC_NOOBJ);
1182	if (p != NULL) {
1183	/*
1184	* Since the page does not belong to an object, its
1185	* listq is unused.
1186	*/
1187	TAILQ_INSERT_TAIL(&alloctail, p, listq);
1188	npages--;
1189	continue;
1190	}
1191	if (wait & M_WAITOK) {
1192	VM_WAIT;
1193	continue;
1194	}
1195
1196	/*
1197	* Page allocation failed, free intermediate pages and
1198	* exit.
1199	*/
1200	TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
1201	vm_page_unwire(p, PQ_NONE);
1202	vm_page_free(p);
1203	}
1204	return (NULL);
1205	}
1206	*flags = UMA_SLAB_PRIV;
1207	zkva = keg->uk_kva +
1208	atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
1209	retkva = zkva;
1210	TAILQ_FOREACH(p, &alloctail, listq) {
1211	pmap_qenter(zkva, &p, 1);
1212	zkva += PAGE_SIZE;
1213	}
1214
1215	return ((void *)retkva);
1216	}
1217	#endif /* __rtems__ */
1218
1219	/*
1220	* Frees a number of pages to the system
1221	*
1222	* Arguments:
1223	* mem A pointer to the memory to be freed
1224	* size The size of the memory being freed
1225	* flags The original p->us_flags field
1226	*
1227	* Returns:
1228	* Nothing
1229	*/
1230	static void
1231	page_free(void *mem, vm_size_t size, uint8_t flags)
1232	{
1233	#ifndef __rtems__
1234	struct vmem *vmem;
1235
1236	if (flags & UMA_SLAB_KMEM)
1237	vmem = kmem_arena;
1238	else if (flags & UMA_SLAB_KERNEL)
1239	vmem = kernel_arena;
1240	else
1241	panic("UMA: page_free used with invalid flags %x", flags);
1242
1243	kmem_free(vmem, (vm_offset_t)mem, size);
1244	#else /* __rtems__ */
1245	if (flags & UMA_SLAB_KERNEL)
1246	free(mem, M_TEMP);
1247	else
1248	rtems_bsd_page_free(mem);
1249	#endif /* __rtems__ */
1250	}
1251
1252	/*
1253	* Zero fill initializer
1254	*
1255	* Arguments/Returns follow uma_init specifications
1256	*/
1257	static int
1258	zero_init(void *mem, int size, int flags)
1259	{
1260	bzero(mem, size);
1261	return (0);
1262	}
1263
1264	/*
1265	* Finish creating a small uma keg. This calculates ipers, and the keg size.
1266	*
1267	* Arguments
1268	* keg The zone we should initialize
1269	*
1270	* Returns
1271	* Nothing
1272	*/
1273	static void
1274	keg_small_init(uma_keg_t keg)
1275	{
1276	u_int rsize;
1277	u_int memused;
1278	u_int wastedspace;
1279	u_int shsize;
1280	u_int slabsize;
1281
1282	if (keg->uk_flags & UMA_ZONE_PCPU) {
1283	u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU;
1284
1285	slabsize = sizeof(struct pcpu);
1286	keg->uk_ppera = howmany(ncpus * sizeof(struct pcpu),
1287	PAGE_SIZE);
1288	} else {
1289	slabsize = UMA_SLAB_SIZE;
1290	keg->uk_ppera = 1;
1291	}
1292
1293	/*
1294	* Calculate the size of each allocation (rsize) according to
1295	* alignment. If the requested size is smaller than we have
1296	* allocation bits for we round it up.
1297	*/
1298	rsize = keg->uk_size;
1299	if (rsize < slabsize / SLAB_SETSIZE)
1300	rsize = slabsize / SLAB_SETSIZE;
1301	if (rsize & keg->uk_align)
1302	rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1303	keg->uk_rsize = rsize;
1304
1305	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 \|\|
1306	keg->uk_rsize < sizeof(struct pcpu),
1307	("%s: size %u too large", __func__, keg->uk_rsize));
1308
1309	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1310	shsize = 0;
1311	else
1312	shsize = sizeof(struct uma_slab);
1313
1314	keg->uk_ipers = (slabsize - shsize) / rsize;
1315	KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1316	("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1317
1318	memused = keg->uk_ipers * rsize + shsize;
1319	wastedspace = slabsize - memused;
1320
1321	/*
1322	* We can't do OFFPAGE if we're internal or if we've been
1323	* asked to not go to the VM for buckets. If we do this we
1324	* may end up going to the VM for slabs which we do not
1325	* want to do if we're UMA_ZFLAG_CACHEONLY as a result
1326	* of UMA_ZONE_VM, which clearly forbids it.
1327	*/
1328	if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) \|\|
1329	(keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1330	return;
1331
1332	/*
1333	* See if using an OFFPAGE slab will limit our waste. Only do
1334	* this if it permits more items per-slab.
1335	*
1336	* XXX We could try growing slabsize to limit max waste as well.
1337	* Historically this was not done because the VM could not
1338	* efficiently handle contiguous allocations.
1339	*/
1340	if ((wastedspace >= slabsize / UMA_MAX_WASTE) &&
1341	(keg->uk_ipers < (slabsize / keg->uk_rsize))) {
1342	keg->uk_ipers = slabsize / keg->uk_rsize;
1343	KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1344	("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1345	#ifdef UMA_DEBUG
1346	printf("UMA decided we need offpage slab headers for "
1347	"keg: %s, calculated wastedspace = %d, "
1348	"maximum wasted space allowed = %d, "
1349	"calculated ipers = %d, "
1350	"new wasted space = %d\n", keg->uk_name, wastedspace,
1351	slabsize / UMA_MAX_WASTE, keg->uk_ipers,
1352	slabsize - keg->uk_ipers * keg->uk_rsize);
1353	#endif
1354	keg->uk_flags \|= UMA_ZONE_OFFPAGE;
1355	}
1356
1357	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1358	(keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1359	keg->uk_flags \|= UMA_ZONE_HASH;
1360	}
1361
1362	/*
1363	* Finish creating a large (> UMA_SLAB_SIZE) uma kegs. Just give in and do
1364	* OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
1365	* more complicated.
1366	*
1367	* Arguments
1368	* keg The keg we should initialize
1369	*
1370	* Returns
1371	* Nothing
1372	*/
1373	static void
1374	keg_large_init(uma_keg_t keg)
1375	{
1376	u_int shsize;
1377
1378	KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
1379	KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1380	("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
1381	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1382	("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
1383
1384	keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
1385	keg->uk_ipers = 1;
1386	keg->uk_rsize = keg->uk_size;
1387
1388	/* We can't do OFFPAGE if we're internal, bail out here. */
1389	if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
1390	return;
1391
1392	/* Check whether we have enough space to not do OFFPAGE. */
1393	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0) {
1394	shsize = sizeof(struct uma_slab);
1395	if (shsize & UMA_ALIGN_PTR)
1396	shsize = (shsize & ~UMA_ALIGN_PTR) +
1397	(UMA_ALIGN_PTR + 1);
1398
1399	if ((PAGE_SIZE * keg->uk_ppera) - keg->uk_rsize < shsize)
1400	keg->uk_flags \|= UMA_ZONE_OFFPAGE;
1401	}
1402
1403	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1404	(keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1405	keg->uk_flags \|= UMA_ZONE_HASH;
1406	}
1407
1408	static void
1409	keg_cachespread_init(uma_keg_t keg)
1410	{
1411	int alignsize;
1412	int trailer;
1413	int pages;
1414	int rsize;
1415
1416	KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1417	("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
1418
1419	alignsize = keg->uk_align + 1;
1420	rsize = keg->uk_size;
1421	/*
1422	* We want one item to start on every align boundary in a page. To
1423	* do this we will span pages. We will also extend the item by the
1424	* size of align if it is an even multiple of align. Otherwise, it
1425	* would fall on the same boundary every time.
1426	*/
1427	if (rsize & keg->uk_align)
1428	rsize = (rsize & ~keg->uk_align) + alignsize;
1429	if ((rsize & alignsize) == 0)
1430	rsize += alignsize;
1431	trailer = rsize - keg->uk_size;
1432	pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
1433	pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
1434	keg->uk_rsize = rsize;
1435	keg->uk_ppera = pages;
1436	keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
1437	keg->uk_flags \|= UMA_ZONE_OFFPAGE \| UMA_ZONE_VTOSLAB;
1438	KASSERT(keg->uk_ipers <= SLAB_SETSIZE,
1439	("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
1440	keg->uk_ipers));
1441	}
1442
1443	/*
1444	* Keg header ctor. This initializes all fields, locks, etc. And inserts
1445	* the keg onto the global keg list.
1446	*
1447	* Arguments/Returns follow uma_ctor specifications
1448	* udata Actually uma_kctor_args
1449	*/
1450	static int
1451	keg_ctor(void mem, int size, void udata, int flags)
1452	{
1453	struct uma_kctor_args *arg = udata;
1454	uma_keg_t keg = mem;
1455	uma_zone_t zone;
1456
1457	bzero(keg, size);
1458	keg->uk_size = arg->size;
1459	keg->uk_init = arg->uminit;
1460	keg->uk_fini = arg->fini;
1461	keg->uk_align = arg->align;
1462	keg->uk_free = 0;
1463	keg->uk_reserve = 0;
1464	keg->uk_pages = 0;
1465	keg->uk_flags = arg->flags;
1466	keg->uk_allocf = page_alloc;
1467	keg->uk_freef = page_free;
1468	keg->uk_slabzone = NULL;
1469
1470	/*
1471	* The master zone is passed to us at keg-creation time.
1472	*/
1473	zone = arg->zone;
1474	keg->uk_name = zone->uz_name;
1475
1476	if (arg->flags & UMA_ZONE_VM)
1477	keg->uk_flags \|= UMA_ZFLAG_CACHEONLY;
1478
1479	if (arg->flags & UMA_ZONE_ZINIT)
1480	keg->uk_init = zero_init;
1481
1482	if (arg->flags & UMA_ZONE_MALLOC)
1483	keg->uk_flags \|= UMA_ZONE_VTOSLAB;
1484
1485	if (arg->flags & UMA_ZONE_PCPU)
1486	#ifdef SMP
1487	keg->uk_flags \|= UMA_ZONE_OFFPAGE;
1488	#else
1489	keg->uk_flags &= ~UMA_ZONE_PCPU;
1490	#endif
1491
1492	if (keg->uk_flags & UMA_ZONE_CACHESPREAD) {
1493	keg_cachespread_init(keg);
1494	} else {
1495	if (keg->uk_size > (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1496	keg_large_init(keg);
1497	else
1498	keg_small_init(keg);
1499	}
1500
1501	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1502	keg->uk_slabzone = slabzone;
1503
1504	/*
1505	* If we haven't booted yet we need allocations to go through the
1506	* startup cache until the vm is ready.
1507	*/
1508	if (keg->uk_ppera == 1) {
1509	#ifdef UMA_MD_SMALL_ALLOC
1510	keg->uk_allocf = uma_small_alloc;
1511	keg->uk_freef = uma_small_free;
1512
1513	#ifndef __rtems__
1514	if (booted < UMA_STARTUP)
1515	keg->uk_allocf = startup_alloc;
1516	#endif /* __rtems__ */
1517	#else
1518	#ifndef __rtems__
1519	if (booted < UMA_STARTUP2)
1520	keg->uk_allocf = startup_alloc;
1521	#endif /* __rtems__ */
1522	#endif
1523	#ifndef __rtems__
1524	} else if (booted < UMA_STARTUP2 &&
1525	(keg->uk_flags & UMA_ZFLAG_INTERNAL))
1526	keg->uk_allocf = startup_alloc;
1527	#else /* __rtems__ */
1528	}
1529	#endif /* __rtems__ */
1530
1531	/*
1532	* Initialize keg's lock
1533	*/
1534	KEG_LOCK_INIT(keg, (arg->flags & UMA_ZONE_MTXCLASS));
1535
1536	/*
1537	* If we're putting the slab header in the actual page we need to
1538	* figure out where in each page it goes. This calculates a right
1539	* justified offset into the memory on an ALIGN_PTR boundary.
1540	*/
1541	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1542	u_int totsize;
1543
1544	/* Size of the slab struct and free list */
1545	totsize = sizeof(struct uma_slab);
1546
1547	if (totsize & UMA_ALIGN_PTR)
1548	totsize = (totsize & ~UMA_ALIGN_PTR) +
1549	(UMA_ALIGN_PTR + 1);
1550	keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize;
1551
1552	/*
1553	* The only way the following is possible is if with our
1554	* UMA_ALIGN_PTR adjustments we are now bigger than
1555	* UMA_SLAB_SIZE. I haven't checked whether this is
1556	* mathematically possible for all cases, so we make
1557	* sure here anyway.
1558	*/
1559	totsize = keg->uk_pgoff + sizeof(struct uma_slab);
1560	if (totsize > PAGE_SIZE * keg->uk_ppera) {
1561	printf("zone %s ipers %d rsize %d size %d\n",
1562	zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1563	keg->uk_size);
1564	panic("UMA slab won't fit.");
1565	}
1566	}
1567
1568	if (keg->uk_flags & UMA_ZONE_HASH)
1569	hash_alloc(&keg->uk_hash);
1570
1571	#ifdef UMA_DEBUG
1572	printf("UMA: %s(%p) size %d(%d) flags %#x ipers %d ppera %d out %d free %d\n",
1573	zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
1574	keg->uk_ipers, keg->uk_ppera,
1575	(keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
1576	keg->uk_free);
1577	#endif
1578
1579	LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1580
1581	rw_wlock(&uma_rwlock);
1582	LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1583	rw_wunlock(&uma_rwlock);
1584	return (0);
1585	}
1586
1587	/*
1588	* Zone header ctor. This initializes all fields, locks, etc.
1589	*
1590	* Arguments/Returns follow uma_ctor specifications
1591	* udata Actually uma_zctor_args
1592	*/
1593	static int
1594	zone_ctor(void mem, int size, void udata, int flags)
1595	{
1596	struct uma_zctor_args *arg = udata;
1597	uma_zone_t zone = mem;
1598	uma_zone_t z;
1599	uma_keg_t keg;
1600
1601	bzero(zone, size);
1602	zone->uz_name = arg->name;
1603	zone->uz_ctor = arg->ctor;
1604	zone->uz_dtor = arg->dtor;
1605	zone->uz_slab = zone_fetch_slab;
1606	zone->uz_init = NULL;
1607	zone->uz_fini = NULL;
1608	zone->uz_allocs = 0;
1609	zone->uz_frees = 0;
1610	zone->uz_fails = 0;
1611	zone->uz_sleeps = 0;
1612	zone->uz_count = 0;
1613	zone->uz_count_min = 0;
1614	zone->uz_flags = 0;
1615	zone->uz_warning = NULL;
1616	timevalclear(&zone->uz_ratecheck);
1617	keg = arg->keg;
1618
1619	ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
1620
1621	/*
1622	* This is a pure cache zone, no kegs.
1623	*/
1624	if (arg->import) {
1625	if (arg->flags & UMA_ZONE_VM)
1626	arg->flags \|= UMA_ZFLAG_CACHEONLY;
1627	zone->uz_flags = arg->flags;
1628	zone->uz_size = arg->size;
1629	zone->uz_import = arg->import;
1630	zone->uz_release = arg->release;
1631	zone->uz_arg = arg->arg;
1632	zone->uz_lockptr = &zone->uz_lock;
1633	rw_wlock(&uma_rwlock);
1634	LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
1635	rw_wunlock(&uma_rwlock);
1636	goto out;
1637	}
1638
1639	/*
1640	* Use the regular zone/keg/slab allocator.
1641	*/
1642	zone->uz_import = (uma_import)zone_import;
1643	zone->uz_release = (uma_release)zone_release;
1644	zone->uz_arg = zone;
1645
1646	if (arg->flags & UMA_ZONE_SECONDARY) {
1647	KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1648	zone->uz_init = arg->uminit;
1649	zone->uz_fini = arg->fini;
1650	zone->uz_lockptr = &keg->uk_lock;
1651	zone->uz_flags \|= UMA_ZONE_SECONDARY;
1652	rw_wlock(&uma_rwlock);
1653	ZONE_LOCK(zone);
1654	LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1655	if (LIST_NEXT(z, uz_link) == NULL) {
1656	LIST_INSERT_AFTER(z, zone, uz_link);
1657	break;
1658	}
1659	}
1660	ZONE_UNLOCK(zone);
1661	rw_wunlock(&uma_rwlock);
1662	} else if (keg == NULL) {
1663	if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1664	arg->align, arg->flags)) == NULL)
1665	return (ENOMEM);
1666	} else {
1667	struct uma_kctor_args karg;
1668	int error;
1669
1670	/* We should only be here from uma_startup() */
1671	karg.size = arg->size;
1672	karg.uminit = arg->uminit;
1673	karg.fini = arg->fini;
1674	karg.align = arg->align;
1675	karg.flags = arg->flags;
1676	karg.zone = zone;
1677	error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1678	flags);
1679	if (error)
1680	return (error);
1681	}
1682
1683	/*
1684	* Link in the first keg.
1685	*/
1686	zone->uz_klink.kl_keg = keg;
1687	LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
1688	zone->uz_lockptr = &keg->uk_lock;
1689	zone->uz_size = keg->uk_size;
1690	zone->uz_flags \|= (keg->uk_flags &
1691	(UMA_ZONE_INHERIT \| UMA_ZFLAG_INHERIT));
1692
1693	/*
1694	* Some internal zones don't have room allocated for the per cpu
1695	* caches. If we're internal, bail out here.
1696	*/
1697	if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1698	KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
1699	("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1700	return (0);
1701	}
1702
1703	out:
1704	if ((arg->flags & UMA_ZONE_MAXBUCKET) == 0)
1705	zone->uz_count = bucket_select(zone->uz_size);
1706	else
1707	zone->uz_count = BUCKET_MAX;
1708	zone->uz_count_min = zone->uz_count;
1709
1710	return (0);
1711	}
1712
1713	/*
1714	* Keg header dtor. This frees all data, destroys locks, frees the hash
1715	* table and removes the keg from the global list.
1716	*
1717	* Arguments/Returns follow uma_dtor specifications
1718	* udata unused
1719	*/
1720	static void
1721	keg_dtor(void arg, int size, void udata)
1722	{
1723	uma_keg_t keg;
1724
1725	keg = (uma_keg_t)arg;
1726	KEG_LOCK(keg);
1727	if (keg->uk_free != 0) {
1728	printf("Freed UMA keg (%s) was not empty (%d items). "
1729	" Lost %d pages of memory.\n",
1730	keg->uk_name ? keg->uk_name : "",
1731	keg->uk_free, keg->uk_pages);
1732	}
1733	KEG_UNLOCK(keg);
1734
1735	hash_free(&keg->uk_hash);
1736
1737	KEG_LOCK_FINI(keg);
1738	}
1739
1740	/*
1741	* Zone header dtor.
1742	*
1743	* Arguments/Returns follow uma_dtor specifications
1744	* udata unused
1745	*/
1746	static void
1747	zone_dtor(void arg, int size, void udata)
1748	{
1749	uma_klink_t klink;
1750	uma_zone_t zone;
1751	uma_keg_t keg;
1752
1753	zone = (uma_zone_t)arg;
1754	keg = zone_first_keg(zone);
1755
1756	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
1757	cache_drain(zone);
1758
1759	rw_wlock(&uma_rwlock);
1760	LIST_REMOVE(zone, uz_link);
1761	rw_wunlock(&uma_rwlock);
1762	/*
1763	* XXX there are some races here where
1764	* the zone can be drained but zone lock
1765	* released and then refilled before we
1766	* remove it... we dont care for now
1767	*/
1768	zone_drain_wait(zone, M_WAITOK);
1769	/*
1770	* Unlink all of our kegs.
1771	*/
1772	while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
1773	klink->kl_keg = NULL;
1774	LIST_REMOVE(klink, kl_link);
1775	if (klink == &zone->uz_klink)
1776	continue;
1777	free(klink, M_TEMP);
1778	}
1779	/*
1780	* We only destroy kegs from non secondary zones.
1781	*/
1782	if (keg != NULL && (zone->uz_flags & UMA_ZONE_SECONDARY) == 0) {
1783	rw_wlock(&uma_rwlock);
1784	LIST_REMOVE(keg, uk_link);
1785	rw_wunlock(&uma_rwlock);
1786	zone_free_item(kegs, keg, NULL, SKIP_NONE);
1787	}
1788	ZONE_LOCK_FINI(zone);
1789	}
1790
1791	/*
1792	* Traverses every zone in the system and calls a callback
1793	*
1794	* Arguments:
1795	* zfunc A pointer to a function which accepts a zone
1796	* as an argument.
1797	*
1798	* Returns:
1799	* Nothing
1800	*/
1801	static void
1802	zone_foreach(void (*zfunc)(uma_zone_t))
1803	{
1804	uma_keg_t keg;
1805	uma_zone_t zone;
1806
1807	rw_rlock(&uma_rwlock);
1808	LIST_FOREACH(keg, &uma_kegs, uk_link) {
1809	LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1810	zfunc(zone);
1811	}
1812	rw_runlock(&uma_rwlock);
1813	}
1814
1815	/* Public functions */
1816	/* See uma.h */
1817	void
1818	uma_startup(void *bootmem, int boot_pages)
1819	{
1820	struct uma_zctor_args args;
1821	#ifndef __rtems__
1822	uma_slab_t slab;
1823	int i;
1824	#endif /* __rtems__ */
1825
1826	#ifdef UMA_DEBUG
1827	printf("Creating uma keg headers zone and keg.\n");
1828	#endif
1829	rw_init(&uma_rwlock, "UMA lock");
1830
1831	/* "manually" create the initial zone */
1832	memset(&args, 0, sizeof(args));
1833	args.name = "UMA Kegs";
1834	args.size = sizeof(struct uma_keg);
1835	args.ctor = keg_ctor;
1836	args.dtor = keg_dtor;
1837	args.uminit = zero_init;
1838	args.fini = NULL;
1839	args.keg = &masterkeg;
1840	args.align = 32 - 1;
1841	args.flags = UMA_ZFLAG_INTERNAL;
1842	/* The initial zone has no Per cpu queues so it's smaller */
1843	zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1844
1845	#ifndef __rtems__
1846	#ifdef UMA_DEBUG
1847	printf("Filling boot free list.\n");
1848	#endif
1849	for (i = 0; i < boot_pages; i++) {
1850	slab = (uma_slab_t)((uint8_t )bootmem + (i UMA_SLAB_SIZE));
1851	slab->us_data = (uint8_t *)slab;
1852	slab->us_flags = UMA_SLAB_BOOT;
1853	LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1854	}
1855	mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
1856	#endif /* __rtems__ */
1857
1858	#ifdef UMA_DEBUG
1859	printf("Creating uma zone headers zone and keg.\n");
1860	#endif
1861	args.name = "UMA Zones";
1862	args.size = sizeof(struct uma_zone) +
1863	(sizeof(struct uma_cache) * (mp_maxid + 1));
1864	args.ctor = zone_ctor;
1865	args.dtor = zone_dtor;
1866	args.uminit = zero_init;
1867	args.fini = NULL;
1868	args.keg = NULL;
1869	args.align = 32 - 1;
1870	args.flags = UMA_ZFLAG_INTERNAL;
1871	/* The initial zone has no Per cpu queues so it's smaller */
1872	zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1873
1874	#ifdef UMA_DEBUG
1875	printf("Creating slab and hash zones.\n");
1876	#endif
1877
1878	/* Now make a zone for slab headers */
1879	slabzone = uma_zcreate("UMA Slabs",
1880	sizeof(struct uma_slab),
1881	NULL, NULL, NULL, NULL,
1882	UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1883
1884	hashzone = uma_zcreate("UMA Hash",
1885	sizeof(struct slabhead ) UMA_HASH_SIZE_INIT,
1886	NULL, NULL, NULL, NULL,
1887	UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1888
1889	bucket_init();
1890
1891	#ifndef __rtems__
1892	booted = UMA_STARTUP;
1893	#endif /* __rtems__ */
1894
1895	#ifdef UMA_DEBUG
1896	printf("UMA startup complete.\n");
1897	#endif
1898	}
1899	#ifdef __rtems__
1900	static void
1901	rtems_bsd_uma_startup(void *unused)
1902	{
1903	(void) unused;
1904
1905	sx_init_flags(&uma_drain_lock, "umadrain", SX_RECURSE);
1906	uma_startup(NULL, 0);
1907	}
1908
1909	SYSINIT(rtems_bsd_uma_startup, SI_SUB_VM, SI_ORDER_SECOND,
1910	rtems_bsd_uma_startup, NULL);
1911	#endif /* __rtems__ */
1912
1913	#ifndef __rtems__
1914	/* see uma.h */
1915	void
1916	uma_startup2(void)
1917	{
1918	booted = UMA_STARTUP2;
1919	bucket_enable();
1920	sx_init(&uma_drain_lock, "umadrain");
1921	#ifdef UMA_DEBUG
1922	printf("UMA startup2 complete.\n");
1923	#endif
1924	}
1925	#endif /* __rtems__ */
1926
1927	/*
1928	* Initialize our callout handle
1929	*
1930	*/
1931
1932	static void
1933	uma_startup3(void)
1934	{
1935	#ifdef UMA_DEBUG
1936	printf("Starting callout.\n");
1937	#endif
1938	callout_init(&uma_callout, 1);
1939	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1940	#ifdef UMA_DEBUG
1941	printf("UMA startup3 complete.\n");
1942	#endif
1943	}
1944
1945	static uma_keg_t
1946	uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1947	int align, uint32_t flags)
1948	{
1949	struct uma_kctor_args args;
1950
1951	args.size = size;
1952	args.uminit = uminit;
1953	args.fini = fini;
1954	args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
1955	args.flags = flags;
1956	args.zone = zone;
1957	return (zone_alloc_item(kegs, &args, M_WAITOK));
1958	}
1959
1960	/* See uma.h */
1961	void
1962	uma_set_align(int align)
1963	{
1964
1965	if (align != UMA_ALIGN_CACHE)
1966	uma_align_cache = align;
1967	}
1968
1969	/* See uma.h */
1970	uma_zone_t
1971	uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1972	uma_init uminit, uma_fini fini, int align, uint32_t flags)
1973
1974	{
1975	struct uma_zctor_args args;
1976	uma_zone_t res;
1977	#ifndef __rtems__
1978	bool locked;
1979	#endif /* __rtems__ */
1980
1981	/* This stuff is essential for the zone ctor */
1982	memset(&args, 0, sizeof(args));
1983	args.name = name;
1984	args.size = size;
1985	args.ctor = ctor;
1986	args.dtor = dtor;
1987	args.uminit = uminit;
1988	args.fini = fini;
1989	#ifdef INVARIANTS
1990	/*
1991	* If a zone is being created with an empty constructor and
1992	* destructor, pass UMA constructor/destructor which checks for
1993	* memory use after free.
1994	*/
1995	if ((!(flags & (UMA_ZONE_ZINIT \| UMA_ZONE_NOFREE))) &&
1996	ctor == NULL && dtor == NULL && uminit == NULL && fini == NULL) {
1997	args.ctor = trash_ctor;
1998	args.dtor = trash_dtor;
1999	args.uminit = trash_init;
2000	args.fini = trash_fini;
2001	}
2002	#endif
2003	args.align = align;
2004	args.flags = flags;
2005	args.keg = NULL;
2006
2007	#ifndef __rtems__
2008	if (booted < UMA_STARTUP2) {
2009	locked = false;
2010	} else {
2011	#endif /* __rtems__ */
2012	sx_slock(&uma_drain_lock);
2013	#ifndef __rtems__
2014	locked = true;
2015	}
2016	#endif /* __rtems__ */
2017	res = zone_alloc_item(zones, &args, M_WAITOK);
2018	#ifndef __rtems__
2019	if (locked)
2020	#endif /* __rtems__ */
2021	sx_sunlock(&uma_drain_lock);
2022	return (res);
2023	}
2024
2025	/* See uma.h */
2026	uma_zone_t
2027	uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
2028	uma_init zinit, uma_fini zfini, uma_zone_t master)
2029	{
2030	struct uma_zctor_args args;
2031	uma_keg_t keg;
2032	uma_zone_t res;
2033	#ifndef __rtems__
2034	bool locked;
2035	#endif /* __rtems__ */
2036
2037	keg = zone_first_keg(master);
2038	memset(&args, 0, sizeof(args));
2039	args.name = name;
2040	args.size = keg->uk_size;
2041	args.ctor = ctor;
2042	args.dtor = dtor;
2043	args.uminit = zinit;
2044	args.fini = zfini;
2045	args.align = keg->uk_align;
2046	args.flags = keg->uk_flags \| UMA_ZONE_SECONDARY;
2047	args.keg = keg;
2048
2049	#ifndef __rtems__
2050	if (booted < UMA_STARTUP2) {
2051	locked = false;
2052	} else {
2053	#endif /* __rtems__ */
2054	sx_slock(&uma_drain_lock);
2055	#ifndef __rtems__
2056	locked = true;
2057	}
2058	#endif /* __rtems__ */
2059	/* XXX Attaches only one keg of potentially many. */
2060	res = zone_alloc_item(zones, &args, M_WAITOK);
2061	#ifndef __rtems__
2062	if (locked)
2063	#endif /* __rtems__ */
2064	sx_sunlock(&uma_drain_lock);
2065	return (res);
2066	}
2067
2068	/* See uma.h */
2069	uma_zone_t
2070	uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
2071	uma_init zinit, uma_fini zfini, uma_import zimport,
2072	uma_release zrelease, void *arg, int flags)
2073	{
2074	struct uma_zctor_args args;
2075
2076	memset(&args, 0, sizeof(args));
2077	args.name = name;
2078	args.size = size;
2079	args.ctor = ctor;
2080	args.dtor = dtor;
2081	args.uminit = zinit;
2082	args.fini = zfini;
2083	args.import = zimport;
2084	args.release = zrelease;
2085	args.arg = arg;
2086	args.align = 0;
2087	args.flags = flags;
2088
2089	return (zone_alloc_item(zones, &args, M_WAITOK));
2090	}
2091
2092	#ifndef __rtems__
2093	static void
2094	zone_lock_pair(uma_zone_t a, uma_zone_t b)
2095	{
2096	if (a < b) {
2097	ZONE_LOCK(a);
2098	mtx_lock_flags(b->uz_lockptr, MTX_DUPOK);
2099	} else {
2100	ZONE_LOCK(b);
2101	mtx_lock_flags(a->uz_lockptr, MTX_DUPOK);
2102	}
2103	}
2104
2105	static void
2106	zone_unlock_pair(uma_zone_t a, uma_zone_t b)
2107	{
2108
2109	ZONE_UNLOCK(a);
2110	ZONE_UNLOCK(b);
2111	}
2112
2113	int
2114	uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
2115	{
2116	uma_klink_t klink;
2117	uma_klink_t kl;
2118	int error;
2119
2120	error = 0;
2121	klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK \| M_ZERO);
2122
2123	zone_lock_pair(zone, master);
2124	/*
2125	* zone must use vtoslab() to resolve objects and must already be
2126	* a secondary.
2127	*/
2128	if ((zone->uz_flags & (UMA_ZONE_VTOSLAB \| UMA_ZONE_SECONDARY))
2129	!= (UMA_ZONE_VTOSLAB \| UMA_ZONE_SECONDARY)) {
2130	error = EINVAL;
2131	goto out;
2132	}
2133	/*
2134	* The new master must also use vtoslab().
2135	*/
2136	if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
2137	error = EINVAL;
2138	goto out;
2139	}
2140
2141	/*
2142	* The underlying object must be the same size. rsize
2143	* may be different.
2144	*/
2145	if (master->uz_size != zone->uz_size) {
2146	error = E2BIG;
2147	goto out;
2148	}
2149	/*
2150	* Put it at the end of the list.
2151	*/
2152	klink->kl_keg = zone_first_keg(master);
2153	LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
2154	if (LIST_NEXT(kl, kl_link) == NULL) {
2155	LIST_INSERT_AFTER(kl, klink, kl_link);
2156	break;
2157	}
2158	}
2159	klink = NULL;
2160	zone->uz_flags \|= UMA_ZFLAG_MULTI;
2161	zone->uz_slab = zone_fetch_slab_multi;
2162
2163	out:
2164	zone_unlock_pair(zone, master);
2165	if (klink != NULL)
2166	free(klink, M_TEMP);
2167
2168	return (error);
2169	}
2170	#endif /* __rtems__ */
2171
2172
2173	/* See uma.h */
2174	void
2175	uma_zdestroy(uma_zone_t zone)
2176	{
2177
2178	sx_slock(&uma_drain_lock);
2179	zone_free_item(zones, zone, NULL, SKIP_NONE);
2180	sx_sunlock(&uma_drain_lock);
2181	}
2182
2183	/* See uma.h */
2184	void *
2185	uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
2186	{
2187	void *item;
2188	uma_cache_t cache;
2189	uma_bucket_t bucket;
2190	int lockfail;
2191	int cpu;
2192
2193	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
2194	random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
2195
2196	/* This is the fast path allocation */
2197	#ifdef UMA_DEBUG_ALLOC_1
2198	printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
2199	#endif
2200	CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
2201	zone->uz_name, flags);
2202
2203	if (flags & M_WAITOK) {
2204	WITNESS_WARN(WARN_GIANTOK \| WARN_SLEEPOK, NULL,
2205	"uma_zalloc_arg: zone \"%s\"", zone->uz_name);
2206	}
2207	KASSERT(curthread->td_critnest == 0 \|\| SCHEDULER_STOPPED(),
2208	("uma_zalloc_arg: called with spinlock or critical section held"));
2209
2210	#ifdef DEBUG_MEMGUARD
2211	if (memguard_cmp_zone(zone)) {
2212	item = memguard_alloc(zone->uz_size, flags);
2213	if (item != NULL) {
2214	if (zone->uz_init != NULL &&
2215	zone->uz_init(item, zone->uz_size, flags) != 0)
2216	return (NULL);
2217	if (zone->uz_ctor != NULL &&
2218	zone->uz_ctor(item, zone->uz_size, udata,
2219	flags) != 0) {
2220	zone->uz_fini(item, zone->uz_size);
2221	return (NULL);
2222	}
2223	return (item);
2224	}
2225	/* This is unfortunate but should not be fatal. */
2226	}
2227	#endif
2228	/*
2229	* If possible, allocate from the per-CPU cache. There are two
2230	* requirements for safe access to the per-CPU cache: (1) the thread
2231	* accessing the cache must not be preempted or yield during access,
2232	* and (2) the thread must not migrate CPUs without switching which
2233	* cache it accesses. We rely on a critical section to prevent
2234	* preemption and migration. We release the critical section in
2235	* order to acquire the zone mutex if we are unable to allocate from
2236	* the current cache; when we re-acquire the critical section, we
2237	* must detect and handle migration if it has occurred.
2238	*/
2239	critical_enter();
2240	cpu = curcpu;
2241	cache = &zone->uz_cpu[cpu];
2242
2243	zalloc_start:
2244	bucket = cache->uc_allocbucket;
2245	if (bucket != NULL && bucket->ub_cnt > 0) {
2246	bucket->ub_cnt--;
2247	item = bucket->ub_bucket[bucket->ub_cnt];
2248	#ifdef INVARIANTS
2249	bucket->ub_bucket[bucket->ub_cnt] = NULL;
2250	#endif
2251	KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
2252	cache->uc_allocs++;
2253	critical_exit();
2254	if (zone->uz_ctor != NULL &&
2255	zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2256	atomic_add_long(&zone->uz_fails, 1);
2257	zone_free_item(zone, item, udata, SKIP_DTOR);
2258	return (NULL);
2259	}
2260	#ifdef INVARIANTS
2261	uma_dbg_alloc(zone, NULL, item);
2262	#endif
2263	if (flags & M_ZERO)
2264	uma_zero_item(item, zone);
2265	return (item);
2266	}
2267
2268	/*
2269	* We have run out of items in our alloc bucket.
2270	* See if we can switch with our free bucket.
2271	*/
2272	bucket = cache->uc_freebucket;
2273	if (bucket != NULL && bucket->ub_cnt > 0) {
2274	#ifdef UMA_DEBUG_ALLOC
2275	printf("uma_zalloc: Swapping empty with alloc.\n");
2276	#endif
2277	cache->uc_freebucket = cache->uc_allocbucket;
2278	cache->uc_allocbucket = bucket;
2279	goto zalloc_start;
2280	}
2281
2282	/*
2283	* Discard any empty allocation bucket while we hold no locks.
2284	*/
2285	bucket = cache->uc_allocbucket;
2286	cache->uc_allocbucket = NULL;
2287	critical_exit();
2288	if (bucket != NULL)
2289	bucket_free(zone, bucket, udata);
2290
2291	/* Short-circuit for zones without buckets and low memory. */
2292	if (zone->uz_count == 0 \|\| bucketdisable)
2293	goto zalloc_item;
2294
2295	/*
2296	* Attempt to retrieve the item from the per-CPU cache has failed, so
2297	* we must go back to the zone. This requires the zone lock, so we
2298	* must drop the critical section, then re-acquire it when we go back
2299	* to the cache. Since the critical section is released, we may be
2300	* preempted or migrate. As such, make sure not to maintain any
2301	* thread-local state specific to the cache from prior to releasing
2302	* the critical section.
2303	*/
2304	lockfail = 0;
2305	if (ZONE_TRYLOCK(zone) == 0) {
2306	/* Record contention to size the buckets. */
2307	ZONE_LOCK(zone);
2308	lockfail = 1;
2309	}
2310	critical_enter();
2311	cpu = curcpu;
2312	cache = &zone->uz_cpu[cpu];
2313
2314	/*
2315	* Since we have locked the zone we may as well send back our stats.
2316	*/
2317	atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
2318	atomic_add_long(&zone->uz_frees, cache->uc_frees);
2319	cache->uc_allocs = 0;
2320	cache->uc_frees = 0;
2321
2322	/* See if we lost the race to fill the cache. */
2323	if (cache->uc_allocbucket != NULL) {
2324	ZONE_UNLOCK(zone);
2325	goto zalloc_start;
2326	}
2327
2328	/*
2329	* Check the zone's cache of buckets.
2330	*/
2331	if ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
2332	KASSERT(bucket->ub_cnt != 0,
2333	("uma_zalloc_arg: Returning an empty bucket."));
2334
2335	LIST_REMOVE(bucket, ub_link);
2336	cache->uc_allocbucket = bucket;
2337	ZONE_UNLOCK(zone);
2338	goto zalloc_start;
2339	}
2340	/* We are no longer associated with this CPU. */
2341	critical_exit();
2342
2343	/*
2344	* We bump the uz count when the cache size is insufficient to
2345	* handle the working set.
2346	*/
2347	if (lockfail && zone->uz_count < BUCKET_MAX)
2348	zone->uz_count++;
2349	ZONE_UNLOCK(zone);
2350
2351	/*
2352	* Now lets just fill a bucket and put it on the free list. If that
2353	* works we'll restart the allocation from the beginning and it
2354	* will use the just filled bucket.
2355	*/
2356	bucket = zone_alloc_bucket(zone, udata, flags);
2357	if (bucket != NULL) {
2358	ZONE_LOCK(zone);
2359	critical_enter();
2360	cpu = curcpu;
2361	cache = &zone->uz_cpu[cpu];
2362	/*
2363	* See if we lost the race or were migrated. Cache the
2364	* initialized bucket to make this less likely or claim
2365	* the memory directly.
2366	*/
2367	if (cache->uc_allocbucket == NULL)
2368	cache->uc_allocbucket = bucket;
2369	else
2370	LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
2371	ZONE_UNLOCK(zone);
2372	goto zalloc_start;
2373	}
2374
2375	/*
2376	* We may not be able to get a bucket so return an actual item.
2377	*/
2378	#ifdef UMA_DEBUG
2379	printf("uma_zalloc_arg: Bucketzone returned NULL\n");
2380	#endif
2381
2382	zalloc_item:
2383	item = zone_alloc_item(zone, udata, flags);
2384
2385	return (item);
2386	}
2387
2388	static uma_slab_t
2389	keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
2390	{
2391	uma_slab_t slab;
2392	int reserve;
2393
2394	mtx_assert(&keg->uk_lock, MA_OWNED);
2395	slab = NULL;
2396	reserve = 0;
2397	if ((flags & M_USE_RESERVE) == 0)
2398	reserve = keg->uk_reserve;
2399
2400	for (;;) {
2401	/*
2402	* Find a slab with some space. Prefer slabs that are partially
2403	* used over those that are totally full. This helps to reduce
2404	* fragmentation.
2405	*/
2406	if (keg->uk_free > reserve) {
2407	if (!LIST_EMPTY(&keg->uk_part_slab)) {
2408	slab = LIST_FIRST(&keg->uk_part_slab);
2409	} else {
2410	slab = LIST_FIRST(&keg->uk_free_slab);
2411	LIST_REMOVE(slab, us_link);
2412	LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
2413	us_link);
2414	}
2415	MPASS(slab->us_keg == keg);
2416	return (slab);
2417	}
2418
2419	/*
2420	* M_NOVM means don't ask at all!
2421	*/
2422	if (flags & M_NOVM)
2423	break;
2424
2425	if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
2426	keg->uk_flags \|= UMA_ZFLAG_FULL;
2427	/*
2428	* If this is not a multi-zone, set the FULL bit.
2429	* Otherwise slab_multi() takes care of it.
2430	*/
2431	if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) {
2432	zone->uz_flags \|= UMA_ZFLAG_FULL;
2433	zone_log_warning(zone);
2434	zone_maxaction(zone);
2435	}
2436	if (flags & M_NOWAIT)
2437	break;
2438	zone->uz_sleeps++;
2439	msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
2440	continue;
2441	}
2442	slab = keg_alloc_slab(keg, zone, flags);
2443	/*
2444	* If we got a slab here it's safe to mark it partially used
2445	* and return. We assume that the caller is going to remove
2446	* at least one item.
2447	*/
2448	if (slab) {
2449	MPASS(slab->us_keg == keg);
2450	LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2451	return (slab);
2452	}
2453	/*
2454	* We might not have been able to get a slab but another cpu
2455	* could have while we were unlocked. Check again before we
2456	* fail.
2457	*/
2458	flags \|= M_NOVM;
2459	}
2460	return (slab);
2461	}
2462
2463	static uma_slab_t
2464	zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
2465	{
2466	uma_slab_t slab;
2467
2468	if (keg == NULL) {
2469	keg = zone_first_keg(zone);
2470	KEG_LOCK(keg);
2471	}
2472
2473	for (;;) {
2474	slab = keg_fetch_slab(keg, zone, flags);
2475	if (slab)
2476	return (slab);
2477	if (flags & (M_NOWAIT \| M_NOVM))
2478	break;
2479	}
2480	KEG_UNLOCK(keg);
2481	return (NULL);
2482	}
2483
2484	#ifndef __rtems__
2485	/*
2486	* uma_zone_fetch_slab_multi: Fetches a slab from one available keg. Returns
2487	* with the keg locked. On NULL no lock is held.
2488	*
2489	* The last pointer is used to seed the search. It is not required.
2490	*/
2491	static uma_slab_t
2492	zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
2493	{
2494	uma_klink_t klink;
2495	uma_slab_t slab;
2496	uma_keg_t keg;
2497	int flags;
2498	int empty;
2499	int full;
2500
2501	/*
2502	* Don't wait on the first pass. This will skip limit tests
2503	* as well. We don't want to block if we can find a provider
2504	* without blocking.
2505	*/
2506	flags = (rflags & ~M_WAITOK) \| M_NOWAIT;
2507	/*
2508	* Use the last slab allocated as a hint for where to start
2509	* the search.
2510	*/
2511	if (last != NULL) {
2512	slab = keg_fetch_slab(last, zone, flags);
2513	if (slab)
2514	return (slab);
2515	KEG_UNLOCK(last);
2516	}
2517	/*
2518	* Loop until we have a slab incase of transient failures
2519	* while M_WAITOK is specified. I'm not sure this is 100%
2520	* required but we've done it for so long now.
2521	*/
2522	for (;;) {
2523	empty = 0;
2524	full = 0;
2525	/*
2526	* Search the available kegs for slabs. Be careful to hold the
2527	* correct lock while calling into the keg layer.
2528	*/
2529	LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
2530	keg = klink->kl_keg;
2531	KEG_LOCK(keg);
2532	if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
2533	slab = keg_fetch_slab(keg, zone, flags);
2534	if (slab)
2535	return (slab);
2536	}
2537	if (keg->uk_flags & UMA_ZFLAG_FULL)
2538	full++;
2539	else
2540	empty++;
2541	KEG_UNLOCK(keg);
2542	}
2543	if (rflags & (M_NOWAIT \| M_NOVM))
2544	break;
2545	flags = rflags;
2546	/*
2547	* All kegs are full. XXX We can't atomically check all kegs
2548	* and sleep so just sleep for a short period and retry.
2549	*/
2550	if (full && !empty) {
2551	ZONE_LOCK(zone);
2552	zone->uz_flags \|= UMA_ZFLAG_FULL;
2553	zone->uz_sleeps++;
2554	zone_log_warning(zone);
2555	zone_maxaction(zone);
2556	msleep(zone, zone->uz_lockptr, PVM,
2557	"zonelimit", hz/100);
2558	zone->uz_flags &= ~UMA_ZFLAG_FULL;
2559	ZONE_UNLOCK(zone);
2560	continue;
2561	}
2562	}
2563	return (NULL);
2564	}
2565	#endif /* __rtems__ */
2566
2567	static void *
2568	slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
2569	{
2570	void *item;
2571	uint8_t freei;
2572
2573	MPASS(keg == slab->us_keg);
2574	mtx_assert(&keg->uk_lock, MA_OWNED);
2575
2576	freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1;
2577	BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free);
2578	item = slab->us_data + (keg->uk_rsize * freei);
2579	slab->us_freecount--;
2580	keg->uk_free--;
2581
2582	/* Move this slab to the full list */
2583	if (slab->us_freecount == 0) {
2584	LIST_REMOVE(slab, us_link);
2585	LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2586	}
2587
2588	return (item);
2589	}
2590
2591	static int
2592	zone_import(uma_zone_t zone, void **bucket, int max, int flags)
2593	{
2594	uma_slab_t slab;
2595	uma_keg_t keg;
2596	int i;
2597
2598	slab = NULL;
2599	keg = NULL;
2600	/* Try to keep the buckets totally full */
2601	for (i = 0; i < max; ) {
2602	if ((slab = zone->uz_slab(zone, keg, flags)) == NULL)
2603	break;
2604	keg = slab->us_keg;
2605	while (slab->us_freecount && i < max) {
2606	bucket[i++] = slab_alloc_item(keg, slab);
2607	if (keg->uk_free <= keg->uk_reserve)
2608	break;
2609	}
2610	/* Don't grab more than one slab at a time. */
2611	flags &= ~M_WAITOK;
2612	flags \|= M_NOWAIT;
2613	}
2614	if (slab != NULL)
2615	KEG_UNLOCK(keg);
2616
2617	return i;
2618	}
2619
2620	static uma_bucket_t
2621	zone_alloc_bucket(uma_zone_t zone, void *udata, int flags)
2622	{
2623	uma_bucket_t bucket;
2624	int max;
2625
2626	/* Don't wait for buckets, preserve caller's NOVM setting. */
2627	bucket = bucket_alloc(zone, udata, M_NOWAIT \| (flags & M_NOVM));
2628	if (bucket == NULL)
2629	return (NULL);
2630
2631	max = MIN(bucket->ub_entries, zone->uz_count);
2632	bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
2633	max, flags);
2634
2635	/*
2636	* Initialize the memory if necessary.
2637	*/
2638	if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
2639	int i;
2640
2641	for (i = 0; i < bucket->ub_cnt; i++)
2642	if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
2643	flags) != 0)
2644	break;
2645	/*
2646	* If we couldn't initialize the whole bucket, put the
2647	* rest back onto the freelist.
2648	*/
2649	if (i != bucket->ub_cnt) {
2650	zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
2651	bucket->ub_cnt - i);
2652	#ifdef INVARIANTS
2653	bzero(&bucket->ub_bucket[i],
2654	sizeof(void ) (bucket->ub_cnt - i));
2655	#endif
2656	bucket->ub_cnt = i;
2657	}
2658	}
2659
2660	if (bucket->ub_cnt == 0) {
2661	bucket_free(zone, bucket, udata);
2662	atomic_add_long(&zone->uz_fails, 1);
2663	return (NULL);
2664	}
2665
2666	return (bucket);
2667	}
2668
2669	/*
2670	* Allocates a single item from a zone.
2671	*
2672	* Arguments
2673	* zone The zone to alloc for.
2674	* udata The data to be passed to the constructor.
2675	* flags M_WAITOK, M_NOWAIT, M_ZERO.
2676	*
2677	* Returns
2678	* NULL if there is no memory and M_NOWAIT is set
2679	* An item if successful
2680	*/
2681
2682	static void *
2683	zone_alloc_item(uma_zone_t zone, void *udata, int flags)
2684	{
2685	void *item;
2686
2687	item = NULL;
2688
2689	#ifdef UMA_DEBUG_ALLOC
2690	printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2691	#endif
2692	if (zone->uz_import(zone->uz_arg, &item, 1, flags) != 1)
2693	goto fail;
2694	atomic_add_long(&zone->uz_allocs, 1);
2695
2696	/*
2697	* We have to call both the zone's init (not the keg's init)
2698	* and the zone's ctor. This is because the item is going from
2699	* a keg slab directly to the user, and the user is expecting it
2700	* to be both zone-init'd as well as zone-ctor'd.
2701	*/
2702	if (zone->uz_init != NULL) {
2703	if (zone->uz_init(item, zone->uz_size, flags) != 0) {
2704	zone_free_item(zone, item, udata, SKIP_FINI);
2705	goto fail;
2706	}
2707	}
2708	if (zone->uz_ctor != NULL) {
2709	if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2710	zone_free_item(zone, item, udata, SKIP_DTOR);
2711	goto fail;
2712	}
2713	}
2714	#ifdef INVARIANTS
2715	uma_dbg_alloc(zone, NULL, item);
2716	#endif
2717	if (flags & M_ZERO)
2718	uma_zero_item(item, zone);
2719
2720	return (item);
2721
2722	fail:
2723	atomic_add_long(&zone->uz_fails, 1);
2724	return (NULL);
2725	}
2726
2727	/* See uma.h */
2728	void
2729	uma_zfree_arg(uma_zone_t zone, void item, void udata)
2730	{
2731	uma_cache_t cache;
2732	uma_bucket_t bucket;
2733	int lockfail;
2734	int cpu;
2735
2736	/* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
2737	random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
2738
2739	#ifdef UMA_DEBUG_ALLOC_1
2740	printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2741	#endif
2742	CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2743	zone->uz_name);
2744
2745	KASSERT(curthread->td_critnest == 0 \|\| SCHEDULER_STOPPED(),
2746	("uma_zfree_arg: called with spinlock or critical section held"));
2747
2748	/* uma_zfree(..., NULL) does nothing, to match free(9). */
2749	if (item == NULL)
2750	return;
2751	#ifdef DEBUG_MEMGUARD
2752	if (is_memguard_addr(item)) {
2753	if (zone->uz_dtor != NULL)
2754	zone->uz_dtor(item, zone->uz_size, udata);
2755	if (zone->uz_fini != NULL)
2756	zone->uz_fini(item, zone->uz_size);
2757	memguard_free(item);
2758	return;
2759	}
2760	#endif
2761	#ifdef INVARIANTS
2762	if (zone->uz_flags & UMA_ZONE_MALLOC)
2763	uma_dbg_free(zone, udata, item);
2764	else
2765	uma_dbg_free(zone, NULL, item);
2766	#endif
2767	if (zone->uz_dtor != NULL)
2768	zone->uz_dtor(item, zone->uz_size, udata);
2769
2770	/*
2771	* The race here is acceptable. If we miss it we'll just have to wait
2772	* a little longer for the limits to be reset.
2773	*/
2774	if (zone->uz_flags & UMA_ZFLAG_FULL)
2775	goto zfree_item;
2776
2777	/*
2778	* If possible, free to the per-CPU cache. There are two
2779	* requirements for safe access to the per-CPU cache: (1) the thread
2780	* accessing the cache must not be preempted or yield during access,
2781	* and (2) the thread must not migrate CPUs without switching which
2782	* cache it accesses. We rely on a critical section to prevent
2783	* preemption and migration. We release the critical section in
2784	* order to acquire the zone mutex if we are unable to free to the
2785	* current cache; when we re-acquire the critical section, we must
2786	* detect and handle migration if it has occurred.
2787	*/
2788	zfree_restart:
2789	critical_enter();
2790	cpu = curcpu;
2791	cache = &zone->uz_cpu[cpu];
2792
2793	zfree_start:
2794	/*
2795	* Try to free into the allocbucket first to give LIFO ordering
2796	* for cache-hot datastructures. Spill over into the freebucket
2797	* if necessary. Alloc will swap them if one runs dry.
2798	*/
2799	bucket = cache->uc_allocbucket;
2800	if (bucket == NULL \|\| bucket->ub_cnt >= bucket->ub_entries)
2801	bucket = cache->uc_freebucket;
2802	if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
2803	KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2804	("uma_zfree: Freeing to non free bucket index."));
2805	bucket->ub_bucket[bucket->ub_cnt] = item;
2806	bucket->ub_cnt++;
2807	cache->uc_frees++;
2808	critical_exit();
2809	return;
2810	}
2811
2812	/*
2813	* We must go back the zone, which requires acquiring the zone lock,
2814	* which in turn means we must release and re-acquire the critical
2815	* section. Since the critical section is released, we may be
2816	* preempted or migrate. As such, make sure not to maintain any
2817	* thread-local state specific to the cache from prior to releasing
2818	* the critical section.
2819	*/
2820	critical_exit();
2821	if (zone->uz_count == 0 \|\| bucketdisable)
2822	goto zfree_item;
2823
2824	lockfail = 0;
2825	if (ZONE_TRYLOCK(zone) == 0) {
2826	/* Record contention to size the buckets. */
2827	ZONE_LOCK(zone);
2828	lockfail = 1;
2829	}
2830	critical_enter();
2831	cpu = curcpu;
2832	cache = &zone->uz_cpu[cpu];
2833
2834	/*
2835	* Since we have locked the zone we may as well send back our stats.
2836	*/
2837	atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
2838	atomic_add_long(&zone->uz_frees, cache->uc_frees);
2839	cache->uc_allocs = 0;
2840	cache->uc_frees = 0;
2841
2842	bucket = cache->uc_freebucket;
2843	if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
2844	ZONE_UNLOCK(zone);
2845	goto zfree_start;
2846	}
2847	cache->uc_freebucket = NULL;
2848	/* We are no longer associated with this CPU. */
2849	critical_exit();
2850
2851	/* Can we throw this on the zone full list? */
2852	if (bucket != NULL) {
2853	#ifdef UMA_DEBUG_ALLOC
2854	printf("uma_zfree: Putting old bucket on the free list.\n");
2855	#endif
2856	/* ub_cnt is pointing to the last free item */
2857	KASSERT(bucket->ub_cnt != 0,
2858	("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2859	LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
2860	}
2861
2862	/*
2863	* We bump the uz count when the cache size is insufficient to
2864	* handle the working set.
2865	*/
2866	if (lockfail && zone->uz_count < BUCKET_MAX)
2867	zone->uz_count++;
2868	ZONE_UNLOCK(zone);
2869
2870	#ifdef UMA_DEBUG_ALLOC
2871	printf("uma_zfree: Allocating new free bucket.\n");
2872	#endif
2873	bucket = bucket_alloc(zone, udata, M_NOWAIT);
2874	if (bucket) {
2875	critical_enter();
2876	cpu = curcpu;
2877	cache = &zone->uz_cpu[cpu];
2878	if (cache->uc_freebucket == NULL) {
2879	cache->uc_freebucket = bucket;
2880	goto zfree_start;
2881	}
2882	/*
2883	* We lost the race, start over. We have to drop our
2884	* critical section to free the bucket.
2885	*/
2886	critical_exit();
2887	bucket_free(zone, bucket, udata);
2888	goto zfree_restart;
2889	}
2890
2891	/*
2892	* If nothing else caught this, we'll just do an internal free.
2893	*/
2894	zfree_item:
2895	zone_free_item(zone, item, udata, SKIP_DTOR);
2896
2897	return;
2898	}
2899
2900	static void
2901	slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item)
2902	{
2903	uint8_t freei;
2904
2905	mtx_assert(&keg->uk_lock, MA_OWNED);
2906	MPASS(keg == slab->us_keg);
2907
2908	/* Do we need to remove from any lists? */
2909	if (slab->us_freecount+1 == keg->uk_ipers) {
2910	LIST_REMOVE(slab, us_link);
2911	LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2912	} else if (slab->us_freecount == 0) {
2913	LIST_REMOVE(slab, us_link);
2914	LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2915	}
2916
2917	/* Slab management. */
2918	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
2919	BIT_SET(SLAB_SETSIZE, freei, &slab->us_free);
2920	slab->us_freecount++;
2921
2922	/* Keg statistics. */
2923	keg->uk_free++;
2924	}
2925
2926	static void
2927	zone_release(uma_zone_t zone, void **bucket, int cnt)
2928	{
2929	void *item;
2930	uma_slab_t slab;
2931	uma_keg_t keg;
2932	uint8_t *mem;
2933	int clearfull;
2934	int i;
2935
2936	clearfull = 0;
2937	keg = zone_first_keg(zone);
2938	KEG_LOCK(keg);
2939	for (i = 0; i < cnt; i++) {
2940	item = bucket[i];
2941	if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
2942	mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
2943	if (zone->uz_flags & UMA_ZONE_HASH) {
2944	slab = hash_sfind(&keg->uk_hash, mem);
2945	} else {
2946	mem += keg->uk_pgoff;
2947	slab = (uma_slab_t)mem;
2948	}
2949	} else {
2950	slab = vtoslab((vm_offset_t)item);
2951	if (slab->us_keg != keg) {
2952	KEG_UNLOCK(keg);
2953	keg = slab->us_keg;
2954	KEG_LOCK(keg);
2955	}
2956	}
2957	slab_free_item(keg, slab, item);
2958	if (keg->uk_flags & UMA_ZFLAG_FULL) {
2959	if (keg->uk_pages < keg->uk_maxpages) {
2960	keg->uk_flags &= ~UMA_ZFLAG_FULL;
2961	clearfull = 1;
2962	}
2963
2964	/*
2965	* We can handle one more allocation. Since we're
2966	* clearing ZFLAG_FULL, wake up all procs blocked
2967	* on pages. This should be uncommon, so keeping this
2968	* simple for now (rather than adding count of blocked
2969	* threads etc).
2970	*/
2971	wakeup(keg);
2972	}
2973	}
2974	KEG_UNLOCK(keg);
2975	if (clearfull) {
2976	ZONE_LOCK(zone);
2977	zone->uz_flags &= ~UMA_ZFLAG_FULL;
2978	wakeup(zone);
2979	ZONE_UNLOCK(zone);
2980	}
2981
2982	}
2983
2984	/*
2985	* Frees a single item to any zone.
2986	*
2987	* Arguments:
2988	* zone The zone to free to
2989	* item The item we're freeing
2990	* udata User supplied data for the dtor
2991	* skip Skip dtors and finis
2992	*/
2993	static void
2994	zone_free_item(uma_zone_t zone, void item, void udata, enum zfreeskip skip)
2995	{
2996
2997	#ifdef INVARIANTS
2998	if (skip == SKIP_NONE) {
2999	if (zone->uz_flags & UMA_ZONE_MALLOC)
3000	uma_dbg_free(zone, udata, item);
3001	else
3002	uma_dbg_free(zone, NULL, item);
3003	}
3004	#endif
3005	if (skip < SKIP_DTOR && zone->uz_dtor)
3006	zone->uz_dtor(item, zone->uz_size, udata);
3007
3008	if (skip < SKIP_FINI && zone->uz_fini)
3009	zone->uz_fini(item, zone->uz_size);
3010
3011	atomic_add_long(&zone->uz_frees, 1);
3012	zone->uz_release(zone->uz_arg, &item, 1);
3013	}
3014
3015	/* See uma.h */
3016	int
3017	uma_zone_set_max(uma_zone_t zone, int nitems)
3018	{
3019	uma_keg_t keg;
3020
3021	keg = zone_first_keg(zone);
3022	if (keg == NULL)
3023	return (0);
3024	KEG_LOCK(keg);
3025	#ifdef __rtems__
3026	#ifdef SMP
3027	/*
3028	* Ensure we have enough items to fill the per-processor caches. This
3029	* is a heuristic approach and works not under all conditions.
3030	*/
3031	nitems += 2 * BUCKET_MAX * (mp_maxid + 1);
3032	#endif
3033	#endif /* __rtems__ */
3034	keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
3035	if (keg->uk_maxpages * keg->uk_ipers < nitems)
3036	keg->uk_maxpages += keg->uk_ppera;
3037	nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
3038	KEG_UNLOCK(keg);
3039
3040	return (nitems);
3041	}
3042
3043	/* See uma.h */
3044	int
3045	uma_zone_get_max(uma_zone_t zone)
3046	{
3047	int nitems;
3048	uma_keg_t keg;
3049
3050	keg = zone_first_keg(zone);
3051	if (keg == NULL)
3052	return (0);
3053	KEG_LOCK(keg);
3054	nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
3055	KEG_UNLOCK(keg);
3056
3057	return (nitems);
3058	}
3059
3060	/* See uma.h */
3061	void
3062	uma_zone_set_warning(uma_zone_t zone, const char *warning)
3063	{
3064
3065	ZONE_LOCK(zone);
3066	zone->uz_warning = warning;
3067	ZONE_UNLOCK(zone);
3068	}
3069
3070	/* See uma.h */
3071	void
3072	uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction)
3073	{
3074
3075	ZONE_LOCK(zone);
3076	TASK_INIT(&zone->uz_maxaction, 0, (task_fn_t *)maxaction, zone);
3077	ZONE_UNLOCK(zone);
3078	}
3079
3080	/* See uma.h */
3081	int
3082	uma_zone_get_cur(uma_zone_t zone)
3083	{
3084	int64_t nitems;
3085	u_int i;
3086
3087	ZONE_LOCK(zone);
3088	nitems = zone->uz_allocs - zone->uz_frees;
3089	CPU_FOREACH(i) {
3090	/*
3091	* See the comment in sysctl_vm_zone_stats() regarding the
3092	* safety of accessing the per-cpu caches. With the zone lock
3093	* held, it is safe, but can potentially result in stale data.
3094	*/
3095	nitems += zone->uz_cpu[i].uc_allocs -
3096	zone->uz_cpu[i].uc_frees;
3097	}
3098	ZONE_UNLOCK(zone);
3099
3100	return (nitems < 0 ? 0 : nitems);
3101	}
3102
3103	/* See uma.h */
3104	void
3105	uma_zone_set_init(uma_zone_t zone, uma_init uminit)
3106	{
3107	uma_keg_t keg;
3108
3109	keg = zone_first_keg(zone);
3110	KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
3111	KEG_LOCK(keg);
3112	KASSERT(keg->uk_pages == 0,
3113	("uma_zone_set_init on non-empty keg"));
3114	keg->uk_init = uminit;
3115	KEG_UNLOCK(keg);
3116	}
3117
3118	/* See uma.h */
3119	void
3120	uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
3121	{
3122	uma_keg_t keg;
3123
3124	keg = zone_first_keg(zone);
3125	KASSERT(keg != NULL, ("uma_zone_set_fini: Invalid zone type"));
3126	KEG_LOCK(keg);
3127	KASSERT(keg->uk_pages == 0,
3128	("uma_zone_set_fini on non-empty keg"));
3129	keg->uk_fini = fini;
3130	KEG_UNLOCK(keg);
3131	}
3132
3133	/* See uma.h */
3134	void
3135	uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
3136	{
3137
3138	ZONE_LOCK(zone);
3139	KASSERT(zone_first_keg(zone)->uk_pages == 0,
3140	("uma_zone_set_zinit on non-empty keg"));
3141	zone->uz_init = zinit;
3142	ZONE_UNLOCK(zone);
3143	}
3144
3145	/* See uma.h */
3146	void
3147	uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
3148	{
3149
3150	ZONE_LOCK(zone);
3151	KASSERT(zone_first_keg(zone)->uk_pages == 0,
3152	("uma_zone_set_zfini on non-empty keg"));
3153	zone->uz_fini = zfini;
3154	ZONE_UNLOCK(zone);
3155	}
3156
3157	/* See uma.h */
3158	/* XXX uk_freef is not actually used with the zone locked */
3159	void
3160	uma_zone_set_freef(uma_zone_t zone, uma_free freef)
3161	{
3162	uma_keg_t keg;
3163
3164	keg = zone_first_keg(zone);
3165	KASSERT(keg != NULL, ("uma_zone_set_freef: Invalid zone type"));
3166	KEG_LOCK(keg);
3167	keg->uk_freef = freef;
3168	KEG_UNLOCK(keg);
3169	}
3170
3171	/* See uma.h */
3172	/* XXX uk_allocf is not actually used with the zone locked */
3173	void
3174	uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
3175	{
3176	uma_keg_t keg;
3177
3178	keg = zone_first_keg(zone);
3179	KEG_LOCK(keg);
3180	keg->uk_allocf = allocf;
3181	KEG_UNLOCK(keg);
3182	}
3183
3184	/* See uma.h */
3185	void
3186	uma_zone_reserve(uma_zone_t zone, int items)
3187	{
3188	uma_keg_t keg;
3189
3190	keg = zone_first_keg(zone);
3191	if (keg == NULL)
3192	return;
3193	KEG_LOCK(keg);
3194	keg->uk_reserve = items;
3195	KEG_UNLOCK(keg);
3196
3197	return;
3198	}
3199
3200	#ifndef __rtems__
3201	/* See uma.h */
3202	int
3203	uma_zone_reserve_kva(uma_zone_t zone, int count)
3204	{
3205	uma_keg_t keg;
3206	vm_offset_t kva;
3207	u_int pages;
3208
3209	keg = zone_first_keg(zone);
3210	if (keg == NULL)
3211	return (0);
3212	pages = count / keg->uk_ipers;
3213
3214	if (pages * keg->uk_ipers < count)
3215	pages++;
3216	pages *= keg->uk_ppera;
3217
3218	#ifdef UMA_MD_SMALL_ALLOC
3219	if (keg->uk_ppera > 1) {
3220	#else
3221	if (1) {
3222	#endif
3223	kva = kva_alloc((vm_size_t)pages * PAGE_SIZE);
3224	if (kva == 0)
3225	return (0);
3226	} else
3227	kva = 0;
3228	KEG_LOCK(keg);
3229	keg->uk_kva = kva;
3230	keg->uk_offset = 0;
3231	keg->uk_maxpages = pages;
3232	#ifdef UMA_MD_SMALL_ALLOC
3233	keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
3234	#else
3235	keg->uk_allocf = noobj_alloc;
3236	#endif
3237	keg->uk_flags \|= UMA_ZONE_NOFREE;
3238	KEG_UNLOCK(keg);
3239
3240	return (1);
3241	}
3242
3243	/* See uma.h */
3244	void
3245	uma_prealloc(uma_zone_t zone, int items)
3246	{
3247	int slabs;
3248	uma_slab_t slab;
3249	uma_keg_t keg;
3250
3251	keg = zone_first_keg(zone);
3252	if (keg == NULL)
3253	return;
3254	KEG_LOCK(keg);
3255	slabs = items / keg->uk_ipers;
3256	if (slabs * keg->uk_ipers < items)
3257	slabs++;
3258	while (slabs > 0) {
3259	slab = keg_alloc_slab(keg, zone, M_WAITOK);
3260	if (slab == NULL)
3261	break;
3262	MPASS(slab->us_keg == keg);
3263	LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
3264	slabs--;
3265	}
3266	KEG_UNLOCK(keg);
3267	}
3268	#endif /* __rtems__ */
3269
3270	/* See uma.h */
3271	static void
3272	uma_reclaim_locked(bool kmem_danger)
3273	{
3274
3275	#ifdef UMA_DEBUG
3276	printf("UMA: vm asked us to release pages!\n");
3277	#endif
3278	sx_assert(&uma_drain_lock, SA_XLOCKED);
3279	bucket_enable();
3280	zone_foreach(zone_drain);
3281	#ifndef __rtems__
3282	if (vm_page_count_min() \|\| kmem_danger) {
3283	cache_drain_safe(NULL);
3284	zone_foreach(zone_drain);
3285	}
3286	#endif /* __rtems__ */
3287	/*
3288	* Some slabs may have been freed but this zone will be visited early
3289	* we visit again so that we can free pages that are empty once other
3290	* zones are drained. We have to do the same for buckets.
3291	*/
3292	zone_drain(slabzone);
3293	bucket_zone_drain();
3294	}
3295
3296	void
3297	uma_reclaim(void)
3298	{
3299
3300	sx_xlock(&uma_drain_lock);
3301	uma_reclaim_locked(false);
3302	sx_xunlock(&uma_drain_lock);
3303	}
3304
3305	static int uma_reclaim_needed;
3306
3307	void
3308	uma_reclaim_wakeup(void)
3309	{
3310
3311	uma_reclaim_needed = 1;
3312	wakeup(&uma_reclaim_needed);
3313	}
3314
3315	void
3316	uma_reclaim_worker(void *arg __unused)
3317	{
3318
3319	sx_xlock(&uma_drain_lock);
3320	for (;;) {
3321	sx_sleep(&uma_reclaim_needed, &uma_drain_lock, PVM,
3322	"umarcl", 0);
3323	if (uma_reclaim_needed) {
3324	uma_reclaim_needed = 0;
3325	#ifndef __rtems__
3326	sx_xunlock(&uma_drain_lock);
3327	EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM);
3328	sx_xlock(&uma_drain_lock);
3329	#endif /* __rtems__ */
3330	uma_reclaim_locked(true);
3331	}
3332	}
3333	}
3334
3335	/* See uma.h */
3336	int
3337	uma_zone_exhausted(uma_zone_t zone)
3338	{
3339	int full;
3340
3341	ZONE_LOCK(zone);
3342	full = (zone->uz_flags & UMA_ZFLAG_FULL);
3343	ZONE_UNLOCK(zone);
3344	return (full);
3345	}
3346
3347	int
3348	uma_zone_exhausted_nolock(uma_zone_t zone)
3349	{
3350	return (zone->uz_flags & UMA_ZFLAG_FULL);
3351	}
3352
3353	#ifndef __rtems__
3354	void *
3355	uma_large_malloc(vm_size_t size, int wait)
3356	{
3357	void *mem;
3358	uma_slab_t slab;
3359	uint8_t flags;
3360
3361	slab = zone_alloc_item(slabzone, NULL, wait);
3362	if (slab == NULL)
3363	return (NULL);
3364	mem = page_alloc(NULL, size, &flags, wait);
3365	if (mem) {
3366	vsetslab((vm_offset_t)mem, slab);
3367	slab->us_data = mem;
3368	slab->us_flags = flags \| UMA_SLAB_MALLOC;
3369	slab->us_size = size;
3370	} else {
3371	zone_free_item(slabzone, slab, NULL, SKIP_NONE);
3372	}
3373
3374	return (mem);
3375	}
3376
3377	void
3378	uma_large_free(uma_slab_t slab)
3379	{
3380
3381	page_free(slab->us_data, slab->us_size, slab->us_flags);
3382	zone_free_item(slabzone, slab, NULL, SKIP_NONE);
3383	}
3384	#endif /* __rtems__ */
3385
3386	static void
3387	uma_zero_item(void *item, uma_zone_t zone)
3388	{
3389	int i;
3390
3391	if (zone->uz_flags & UMA_ZONE_PCPU) {
3392	CPU_FOREACH(i)
3393	bzero(zpcpu_get_cpu(item, i), zone->uz_size);
3394	} else
3395	bzero(item, zone->uz_size);
3396	}
3397
3398	void
3399	uma_print_stats(void)
3400	{
3401	zone_foreach(uma_print_zone);
3402	}
3403
3404	static void
3405	slab_print(uma_slab_t slab)
3406	{
3407	printf("slab: keg %p, data %p, freecount %d\n",
3408	slab->us_keg, slab->us_data, slab->us_freecount);
3409	}
3410
3411	static void
3412	cache_print(uma_cache_t cache)
3413	{
3414	printf("alloc: %p(%d), free: %p(%d)\n",
3415	cache->uc_allocbucket,
3416	cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
3417	cache->uc_freebucket,
3418	cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
3419	}
3420
3421	static void
3422	uma_print_keg(uma_keg_t keg)
3423	{
3424	uma_slab_t slab;
3425
3426	printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
3427	"out %d free %d limit %d\n",
3428	keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
3429	keg->uk_ipers, keg->uk_ppera,
3430	(keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
3431	keg->uk_free, (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
3432	printf("Part slabs:\n");
3433	LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
3434	slab_print(slab);
3435	printf("Free slabs:\n");
3436	LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
3437	slab_print(slab);
3438	printf("Full slabs:\n");
3439	LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
3440	slab_print(slab);
3441	}
3442
3443	void
3444	uma_print_zone(uma_zone_t zone)
3445	{
3446	uma_cache_t cache;
3447	uma_klink_t kl;
3448	int i;
3449
3450	printf("zone: %s(%p) size %d flags %#x\n",
3451	zone->uz_name, zone, zone->uz_size, zone->uz_flags);
3452	LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
3453	uma_print_keg(kl->kl_keg);
3454	CPU_FOREACH(i) {
3455	cache = &zone->uz_cpu[i];
3456	printf("CPU %d Cache:\n", i);
3457	cache_print(cache);
3458	}
3459	}
3460
3461	#ifndef __rtems__
3462	#ifdef DDB
3463	/*
3464	* Generate statistics across both the zone and its per-cpu cache's. Return
3465	* desired statistics if the pointer is non-NULL for that statistic.
3466	*
3467	* Note: does not update the zone statistics, as it can't safely clear the
3468	* per-CPU cache statistic.
3469	*
3470	* XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
3471	* safe from off-CPU; we should modify the caches to track this information
3472	* directly so that we don't have to.
3473	*/
3474	static void
3475	uma_zone_sumstat(uma_zone_t z, int cachefreep, uint64_t allocsp,
3476	uint64_t freesp, uint64_t sleepsp)
3477	{
3478	uma_cache_t cache;
3479	uint64_t allocs, frees, sleeps;
3480	int cachefree, cpu;
3481
3482	allocs = frees = sleeps = 0;
3483	cachefree = 0;
3484	CPU_FOREACH(cpu) {
3485	cache = &z->uz_cpu[cpu];
3486	if (cache->uc_allocbucket != NULL)
3487	cachefree += cache->uc_allocbucket->ub_cnt;
3488	if (cache->uc_freebucket != NULL)
3489	cachefree += cache->uc_freebucket->ub_cnt;
3490	allocs += cache->uc_allocs;
3491	frees += cache->uc_frees;
3492	}
3493	allocs += z->uz_allocs;
3494	frees += z->uz_frees;
3495	sleeps += z->uz_sleeps;
3496	if (cachefreep != NULL)
3497	*cachefreep = cachefree;
3498	if (allocsp != NULL)
3499	*allocsp = allocs;
3500	if (freesp != NULL)
3501	*freesp = frees;
3502	if (sleepsp != NULL)
3503	*sleepsp = sleeps;
3504	}
3505	#endif /* DDB */
3506	#endif /* __rtems__ */
3507
3508	static int
3509	sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
3510	{
3511	uma_keg_t kz;
3512	uma_zone_t z;
3513	int count;
3514
3515	count = 0;
3516	rw_rlock(&uma_rwlock);
3517	LIST_FOREACH(kz, &uma_kegs, uk_link) {
3518	LIST_FOREACH(z, &kz->uk_zones, uz_link)
3519	count++;
3520	}
3521	rw_runlock(&uma_rwlock);
3522	return (sysctl_handle_int(oidp, &count, 0, req));
3523	}
3524
3525	static int
3526	sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
3527	{
3528	struct uma_stream_header ush;
3529	struct uma_type_header uth;
3530	struct uma_percpu_stat ups;
3531	uma_bucket_t bucket;
3532	struct sbuf sbuf;
3533	uma_cache_t cache;
3534	uma_klink_t kl;
3535	uma_keg_t kz;
3536	uma_zone_t z;
3537	uma_keg_t k;
3538	int count, error, i;
3539
3540	error = sysctl_wire_old_buffer(req, 0);
3541	if (error != 0)
3542	return (error);
3543	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
3544	sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL);
3545
3546	count = 0;
3547	rw_rlock(&uma_rwlock);
3548	LIST_FOREACH(kz, &uma_kegs, uk_link) {
3549	LIST_FOREACH(z, &kz->uk_zones, uz_link)
3550	count++;
3551	}
3552
3553	/*
3554	* Insert stream header.
3555	*/
3556	bzero(&ush, sizeof(ush));
3557	ush.ush_version = UMA_STREAM_VERSION;
3558	ush.ush_maxcpus = (mp_maxid + 1);
3559	ush.ush_count = count;
3560	(void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
3561
3562	LIST_FOREACH(kz, &uma_kegs, uk_link) {
3563	LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3564	bzero(&uth, sizeof(uth));
3565	ZONE_LOCK(z);
3566	strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
3567	uth.uth_align = kz->uk_align;
3568	uth.uth_size = kz->uk_size;
3569	uth.uth_rsize = kz->uk_rsize;
3570	LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
3571	k = kl->kl_keg;
3572	uth.uth_maxpages += k->uk_maxpages;
3573	uth.uth_pages += k->uk_pages;
3574	uth.uth_keg_free += k->uk_free;
3575	uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
3576	* k->uk_ipers;
3577	}
3578
3579	/*
3580	* A zone is secondary is it is not the first entry
3581	* on the keg's zone list.
3582	*/
3583	if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
3584	(LIST_FIRST(&kz->uk_zones) != z))
3585	uth.uth_zone_flags = UTH_ZONE_SECONDARY;
3586
3587	LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3588	uth.uth_zone_free += bucket->ub_cnt;
3589	uth.uth_allocs = z->uz_allocs;
3590	uth.uth_frees = z->uz_frees;
3591	uth.uth_fails = z->uz_fails;
3592	uth.uth_sleeps = z->uz_sleeps;
3593	(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
3594	/*
3595	* While it is not normally safe to access the cache
3596	* bucket pointers while not on the CPU that owns the
3597	* cache, we only allow the pointers to be exchanged
3598	* without the zone lock held, not invalidated, so
3599	* accept the possible race associated with bucket
3600	* exchange during monitoring.
3601	*/
3602	for (i = 0; i < (mp_maxid + 1); i++) {
3603	bzero(&ups, sizeof(ups));
3604	if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
3605	goto skip;
3606	if (CPU_ABSENT(i))
3607	goto skip;
3608	cache = &z->uz_cpu[i];
3609	if (cache->uc_allocbucket != NULL)
3610	ups.ups_cache_free +=
3611	cache->uc_allocbucket->ub_cnt;
3612	if (cache->uc_freebucket != NULL)
3613	ups.ups_cache_free +=
3614	cache->uc_freebucket->ub_cnt;
3615	ups.ups_allocs = cache->uc_allocs;
3616	ups.ups_frees = cache->uc_frees;
3617	skip:
3618	(void)sbuf_bcat(&sbuf, &ups, sizeof(ups));
3619	}
3620	ZONE_UNLOCK(z);
3621	}
3622	}
3623	rw_runlock(&uma_rwlock);
3624	error = sbuf_finish(&sbuf);
3625	sbuf_delete(&sbuf);
3626	return (error);
3627	}
3628
3629	int
3630	sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS)
3631	{
3632	uma_zone_t zone = (uma_zone_t )arg1;
3633	int error, max;
3634
3635	max = uma_zone_get_max(zone);
3636	error = sysctl_handle_int(oidp, &max, 0, req);
3637	if (error \|\| !req->newptr)
3638	return (error);
3639
3640	uma_zone_set_max(zone, max);
3641
3642	return (0);
3643	}
3644
3645	int
3646	sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS)
3647	{
3648	uma_zone_t zone = (uma_zone_t )arg1;
3649	int cur;
3650
3651	cur = uma_zone_get_cur(zone);
3652	return (sysctl_handle_int(oidp, &cur, 0, req));
3653	}
3654
3655	#ifdef INVARIANTS
3656	static uma_slab_t
3657	uma_dbg_getslab(uma_zone_t zone, void *item)
3658	{
3659	uma_slab_t slab;
3660	uma_keg_t keg;
3661	uint8_t *mem;
3662
3663	mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
3664	if (zone->uz_flags & UMA_ZONE_VTOSLAB) {
3665	slab = vtoslab((vm_offset_t)mem);
3666	} else {
3667	/*
3668	* It is safe to return the slab here even though the
3669	* zone is unlocked because the item's allocation state
3670	* essentially holds a reference.
3671	*/
3672	ZONE_LOCK(zone);
3673	keg = LIST_FIRST(&zone->uz_kegs)->kl_keg;
3674	if (keg->uk_flags & UMA_ZONE_HASH)
3675	slab = hash_sfind(&keg->uk_hash, mem);
3676	else
3677	slab = (uma_slab_t)(mem + keg->uk_pgoff);
3678	ZONE_UNLOCK(zone);
3679	}
3680
3681	return (slab);
3682	}
3683
3684	/*
3685	* Set up the slab's freei data such that uma_dbg_free can function.
3686	*
3687	*/
3688	static void
3689	uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
3690	{
3691	uma_keg_t keg;
3692	int freei;
3693
3694	if (zone_first_keg(zone) == NULL)
3695	return;
3696	if (slab == NULL) {
3697	slab = uma_dbg_getslab(zone, item);
3698	if (slab == NULL)
3699	panic("uma: item %p did not belong to zone %s\n",
3700	item, zone->uz_name);
3701	}
3702	keg = slab->us_keg;
3703	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
3704
3705	if (BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
3706	panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)\n",
3707	item, zone, zone->uz_name, slab, freei);
3708	BIT_SET_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
3709
3710	return;
3711	}
3712
3713	/*
3714	* Verifies freed addresses. Checks for alignment, valid slab membership
3715	* and duplicate frees.
3716	*
3717	*/
3718	static void
3719	uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
3720	{
3721	uma_keg_t keg;
3722	int freei;
3723
3724	if (zone_first_keg(zone) == NULL)
3725	return;
3726	if (slab == NULL) {
3727	slab = uma_dbg_getslab(zone, item);
3728	if (slab == NULL)
3729	panic("uma: Freed item %p did not belong to zone %s\n",
3730	item, zone->uz_name);
3731	}
3732	keg = slab->us_keg;
3733	freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
3734
3735	if (freei >= keg->uk_ipers)
3736	panic("Invalid free of %p from zone %p(%s) slab %p(%d)\n",
3737	item, zone, zone->uz_name, slab, freei);
3738
3739	if (((freei * keg->uk_rsize) + slab->us_data) != item)
3740	panic("Unaligned free of %p from zone %p(%s) slab %p(%d)\n",
3741	item, zone, zone->uz_name, slab, freei);
3742
3743	if (!BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
3744	panic("Duplicate free of %p from zone %p(%s) slab %p(%d)\n",
3745	item, zone, zone->uz_name, slab, freei);
3746
3747	BIT_CLR_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
3748	}
3749	#endif /* INVARIANTS */
3750
3751	#ifndef __rtems__
3752	#ifdef DDB
3753	DB_SHOW_COMMAND(uma, db_show_uma)
3754	{
3755	uint64_t allocs, frees, sleeps;
3756	uma_bucket_t bucket;
3757	uma_keg_t kz;
3758	uma_zone_t z;
3759	int cachefree;
3760
3761	db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used",
3762	"Free", "Requests", "Sleeps", "Bucket");
3763	LIST_FOREACH(kz, &uma_kegs, uk_link) {
3764	LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3765	if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
3766	allocs = z->uz_allocs;
3767	frees = z->uz_frees;
3768	sleeps = z->uz_sleeps;
3769	cachefree = 0;
3770	} else
3771	uma_zone_sumstat(z, &cachefree, &allocs,
3772	&frees, &sleeps);
3773	if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
3774	(LIST_FIRST(&kz->uk_zones) != z)))
3775	cachefree += kz->uk_free;
3776	LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3777	cachefree += bucket->ub_cnt;
3778	db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n",
3779	z->uz_name, (uintmax_t)kz->uk_size,
3780	(intmax_t)(allocs - frees), cachefree,
3781	(uintmax_t)allocs, sleeps, z->uz_count);
3782	if (db_pager_quit)
3783	return;
3784	}
3785	}
3786	}
3787
3788	DB_SHOW_COMMAND(umacache, db_show_umacache)
3789	{
3790	uint64_t allocs, frees;
3791	uma_bucket_t bucket;
3792	uma_zone_t z;
3793	int cachefree;
3794
3795	db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
3796	"Requests", "Bucket");
3797	LIST_FOREACH(z, &uma_cachezones, uz_link) {
3798	uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL);
3799	LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3800	cachefree += bucket->ub_cnt;
3801	db_printf("%18s %8ju %8jd %8d %12ju %8u\n",
3802	z->uz_name, (uintmax_t)z->uz_size,
3803	(intmax_t)(allocs - frees), cachefree,
3804	(uintmax_t)allocs, z->uz_count);
3805	if (db_pager_quit)
3806	return;
3807	}
3808	}
3809	#endif /* DDB */
3810	#endif /* __rtems__ */

Note: See TracBrowser for help on using the repository browser.

Download in other formats: