Context Navigation

uma_core.c @ e5db084

4.1155-freebsd-126-freebsd-12freebsd-9.3

Last change on this file since e5db084 was e5db084, checked in by Sebastian Huber <sebastian.huber@…>, on 03/06/15 at 12:58:45

ZONE(9): Enable per-processor cache for SMP

This prevents a potential deadlock via the Giant lock and is a
performance benefit.

Property mode set to 100644

File size: 84.4 KB

Line
1	#include <machine/rtems-bsd-kernel-space.h>
2
3	/*-
4	* Copyright (c) 2002-2005, 2009 Jeffrey Roberson <jeff@FreeBSD.org>
5	* Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
6	* Copyright (c) 2004-2006 Robert N. M. Watson
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions
11	* are met:
12	* 1. Redistributions of source code must retain the above copyright
13	* notice unmodified, this list of conditions, and the following
14	* disclaimer.
15	* 2. Redistributions in binary form must reproduce the above copyright
16	* notice, this list of conditions and the following disclaimer in the
17	* documentation and/or other materials provided with the distribution.
18	*
19	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29	*/
30
31	/*
32	* uma_core.c Implementation of the Universal Memory allocator
33	*
34	* This allocator is intended to replace the multitude of similar object caches
35	* in the standard FreeBSD kernel. The intent is to be flexible as well as
36	* effecient. A primary design goal is to return unused memory to the rest of
37	* the system. This will make the system as a whole more flexible due to the
38	* ability to move memory to subsystems which most need it instead of leaving
39	* pools of reserved memory unused.
40	*
41	* The basic ideas stem from similar slab/zone based allocators whose algorithms
42	* are well known.
43	*
44	*/
45
46	/*
47	* TODO:
48	* - Improve memory usage for large allocations
49	* - Investigate cache size adjustments
50	*/
51
52	#include <sys/cdefs.h>
53	__FBSDID("$FreeBSD$");
54
55	/* I should really use ktr.. */
56	/*
57	#define UMA_DEBUG 1
58	#define UMA_DEBUG_ALLOC 1
59	#define UMA_DEBUG_ALLOC_1 1
60	*/
61
62	#include <rtems/bsd/local/opt_ddb.h>
63	#include <rtems/bsd/local/opt_param.h>
64
65	#include <rtems/bsd/sys/param.h>
66	#include <sys/systm.h>
67	#include <sys/kernel.h>
68	#include <rtems/bsd/sys/types.h>
69	#include <sys/queue.h>
70	#include <sys/malloc.h>
71	#include <sys/ktr.h>
72	#include <rtems/bsd/sys/lock.h>
73	#include <sys/sysctl.h>
74	#include <sys/mutex.h>
75	#include <sys/proc.h>
76	#include <sys/sbuf.h>
77	#include <sys/smp.h>
78	#include <sys/vmmeter.h>
79
80	#include <vm/vm.h>
81	#include <vm/vm_object.h>
82	#include <vm/vm_page.h>
83	#include <vm/vm_param.h>
84	#include <vm/vm_map.h>
85	#include <vm/vm_kern.h>
86	#include <vm/vm_extern.h>
87	#include <vm/uma.h>
88	#include <vm/uma_int.h>
89	#include <vm/uma_dbg.h>
90
91	#include <ddb/ddb.h>
92	#ifdef __rtems__
93	#ifdef RTEMS_SMP
94	/*
95	* It is essential that we have a per-processor cache, otherwise the
96	* critical_enter()/critical_exit() protection would be insufficient.
97	*/
98	#undef curcpu
99	#define curcpu rtems_get_current_processor()
100	#undef mp_maxid
101	#define mp_maxid rtems_get_processor_count()
102	#endif
103	#endif /* __rtems__ */
104
105	/*
106	* This is the zone and keg from which all zones are spawned. The idea is that
107	* even the zone & keg heads are allocated from the allocator, so we use the
108	* bss section to bootstrap us.
109	*/
110	static struct uma_keg masterkeg;
111	static struct uma_zone masterzone_k;
112	static struct uma_zone masterzone_z;
113	static uma_zone_t kegs = &masterzone_k;
114	static uma_zone_t zones = &masterzone_z;
115
116	/* This is the zone from which all of uma_slab_t's are allocated. */
117	static uma_zone_t slabzone;
118	static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */
119
120	/*
121	* The initial hash tables come out of this zone so they can be allocated
122	* prior to malloc coming up.
123	*/
124	static uma_zone_t hashzone;
125
126	/* The boot-time adjusted value for cache line alignment. */
127	int uma_align_cache = 64 - 1;
128
129	static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
130
131	#ifndef __rtems__
132	/*
133	* Are we allowed to allocate buckets?
134	*/
135	static int bucketdisable = 1;
136	#endif /* __rtems__ */
137
138	/* Linked list of all kegs in the system */
139	static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
140
141	/* This mutex protects the keg list */
142	static struct mtx uma_mtx;
143
144	#ifndef __rtems__
145	/* Linked list of boot time pages */
146	static LIST_HEAD(,uma_slab) uma_boot_pages =
147	LIST_HEAD_INITIALIZER(uma_boot_pages);
148
149	/* This mutex protects the boot time pages list */
150	static struct mtx uma_boot_pages_mtx;
151
152	/* Is the VM done starting up? */
153	static int booted = 0;
154	#define UMA_STARTUP 1
155	#define UMA_STARTUP2 2
156	#endif /* __rtems__ */
157
158	/* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
159	static u_int uma_max_ipers;
160	static u_int uma_max_ipers_ref;
161
162	/*
163	* This is the handle used to schedule events that need to happen
164	* outside of the allocation fast path.
165	*/
166	static struct callout uma_callout;
167	#define UMA_TIMEOUT 20 /* Seconds for callout interval. */
168
169	/*
170	* This structure is passed as the zone ctor arg so that I don't have to create
171	* a special allocation function just for zones.
172	*/
173	struct uma_zctor_args {
174	const char *name;
175	size_t size;
176	uma_ctor ctor;
177	uma_dtor dtor;
178	uma_init uminit;
179	uma_fini fini;
180	uma_keg_t keg;
181	int align;
182	u_int32_t flags;
183	};
184
185	struct uma_kctor_args {
186	uma_zone_t zone;
187	size_t size;
188	uma_init uminit;
189	uma_fini fini;
190	int align;
191	u_int32_t flags;
192	};
193
194	struct uma_bucket_zone {
195	uma_zone_t ubz_zone;
196	char *ubz_name;
197	int ubz_entries;
198	};
199
200	#define BUCKET_MAX 128
201
202	struct uma_bucket_zone bucket_zones[] = {
203	{ NULL, "16 Bucket", 16 },
204	{ NULL, "32 Bucket", 32 },
205	{ NULL, "64 Bucket", 64 },
206	{ NULL, "128 Bucket", 128 },
207	{ NULL, NULL, 0}
208	};
209
210	#define BUCKET_SHIFT 4
211	#define BUCKET_ZONES ((BUCKET_MAX >> BUCKET_SHIFT) + 1)
212
213	/*
214	* bucket_size[] maps requested bucket sizes to zones that allocate a bucket
215	* of approximately the right size.
216	*/
217	static uint8_t bucket_size[BUCKET_ZONES];
218
219	/*
220	* Flags and enumerations to be passed to internal functions.
221	*/
222	enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI };
223
224	#define ZFREE_STATFAIL 0x00000001 /* Update zone failure statistic. */
225	#define ZFREE_STATFREE 0x00000002 /* Update zone free statistic. */
226
227	/* Prototypes.. */
228
229	#ifndef __rtems__
230	static void obj_alloc(uma_zone_t, int, u_int8_t , int);
231	#endif /* __rtems__ */
232	static void page_alloc(uma_zone_t, int, u_int8_t , int);
233	#ifndef __rtems__
234	static void startup_alloc(uma_zone_t, int, u_int8_t , int);
235	#endif /* __rtems__ */
236	static void page_free(void *, int, u_int8_t);
237	static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
238	static void cache_drain(uma_zone_t);
239	static void bucket_drain(uma_zone_t, uma_bucket_t);
240	static void bucket_cache_drain(uma_zone_t zone);
241	static int keg_ctor(void , int, void , int);
242	static void keg_dtor(void , int, void );
243	static int zone_ctor(void , int, void , int);
244	static void zone_dtor(void , int, void );
245	static int zero_init(void *, int, int);
246	static void keg_small_init(uma_keg_t keg);
247	static void keg_large_init(uma_keg_t keg);
248	static void zone_foreach(void (*zfunc)(uma_zone_t));
249	static void zone_timeout(uma_zone_t zone);
250	static int hash_alloc(struct uma_hash *);
251	static int hash_expand(struct uma_hash , struct uma_hash );
252	static void hash_free(struct uma_hash *hash);
253	static void uma_timeout(void *);
254	static void uma_startup3(void);
255	static void zone_alloc_item(uma_zone_t, void , int);
256	static void zone_free_item(uma_zone_t, void , void , enum zfreeskip,
257	int);
258	static void bucket_enable(void);
259	static void bucket_init(void);
260	static uma_bucket_t bucket_alloc(int, int);
261	static void bucket_free(uma_bucket_t);
262	static void bucket_zone_drain(void);
263	static int zone_alloc_bucket(uma_zone_t zone, int flags);
264	static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
265	#ifndef __rtems__
266	static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
267	#endif /* __rtems__ */
268	static void *slab_alloc_item(uma_zone_t zone, uma_slab_t slab);
269	static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
270	uma_fini fini, int align, u_int32_t flags);
271	static inline void zone_relock(uma_zone_t zone, uma_keg_t keg);
272	static inline void keg_relock(uma_keg_t keg, uma_zone_t zone);
273
274	void uma_print_zone(uma_zone_t);
275	void uma_print_stats(void);
276	static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
277	static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
278
279	SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
280
281	SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD\|CTLTYPE_INT,
282	0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
283
284	SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD\|CTLTYPE_STRUCT,
285	0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
286
287	/*
288	* This routine checks to see whether or not it's safe to enable buckets.
289	*/
290
291	static void
292	bucket_enable(void)
293	{
294	#ifndef __rtems__
295	bucketdisable = vm_page_count_min();
296	#endif /* __rtems__ */
297	}
298
299	/*
300	* Initialize bucket_zones, the array of zones of buckets of various sizes.
301	*
302	* For each zone, calculate the memory required for each bucket, consisting
303	* of the header and an array of pointers. Initialize bucket_size[] to point
304	* the range of appropriate bucket sizes at the zone.
305	*/
306	static void
307	bucket_init(void)
308	{
309	struct uma_bucket_zone *ubz;
310	int i;
311	int j;
312
313	for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
314	int size;
315
316	ubz = &bucket_zones[j];
317	size = roundup(sizeof(struct uma_bucket), sizeof(void *));
318	size += sizeof(void ) ubz->ubz_entries;
319	ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
320	NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
321	UMA_ZFLAG_INTERNAL \| UMA_ZFLAG_BUCKET);
322	for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
323	bucket_size[i >> BUCKET_SHIFT] = j;
324	}
325	}
326
327	/*
328	* Given a desired number of entries for a bucket, return the zone from which
329	* to allocate the bucket.
330	*/
331	static struct uma_bucket_zone *
332	bucket_zone_lookup(int entries)
333	{
334	int idx;
335
336	idx = howmany(entries, 1 << BUCKET_SHIFT);
337	return (&bucket_zones[bucket_size[idx]]);
338	}
339
340	static uma_bucket_t
341	bucket_alloc(int entries, int bflags)
342	{
343	struct uma_bucket_zone *ubz;
344	uma_bucket_t bucket;
345
346	#ifndef __rtems__
347	/*
348	* This is to stop us from allocating per cpu buckets while we're
349	* running out of vm.boot_pages. Otherwise, we would exhaust the
350	* boot pages. This also prevents us from allocating buckets in
351	* low memory situations.
352	*/
353	if (bucketdisable)
354	return (NULL);
355	#endif /* __rtems__ */
356
357	ubz = bucket_zone_lookup(entries);
358	bucket = zone_alloc_item(ubz->ubz_zone, NULL, bflags);
359	if (bucket) {
360	#ifdef INVARIANTS
361	bzero(bucket->ub_bucket, sizeof(void ) ubz->ubz_entries);
362	#endif
363	bucket->ub_cnt = 0;
364	bucket->ub_entries = ubz->ubz_entries;
365	}
366
367	return (bucket);
368	}
369
370	static void
371	bucket_free(uma_bucket_t bucket)
372	{
373	struct uma_bucket_zone *ubz;
374
375	ubz = bucket_zone_lookup(bucket->ub_entries);
376	zone_free_item(ubz->ubz_zone, bucket, NULL, SKIP_NONE,
377	ZFREE_STATFREE);
378	}
379
380	static void
381	bucket_zone_drain(void)
382	{
383	struct uma_bucket_zone *ubz;
384
385	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
386	zone_drain(ubz->ubz_zone);
387	}
388
389	static inline uma_keg_t
390	zone_first_keg(uma_zone_t zone)
391	{
392
393	return (LIST_FIRST(&zone->uz_kegs)->kl_keg);
394	}
395
396	static void
397	zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
398	{
399	uma_klink_t klink;
400
401	LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
402	kegfn(klink->kl_keg);
403	}
404
405	/*
406	* Routine called by timeout which is used to fire off some time interval
407	* based calculations. (stats, hash size, etc.)
408	*
409	* Arguments:
410	* arg Unused
411	*
412	* Returns:
413	* Nothing
414	*/
415	static void
416	uma_timeout(void *unused)
417	{
418	bucket_enable();
419	zone_foreach(zone_timeout);
420
421	/* Reschedule this event */
422	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
423	}
424
425	/*
426	* Routine to perform timeout driven calculations. This expands the
427	* hashes and does per cpu statistics aggregation.
428	*
429	* Returns nothing.
430	*/
431	static void
432	keg_timeout(uma_keg_t keg)
433	{
434
435	KEG_LOCK(keg);
436	/*
437	* Expand the keg hash table.
438	*
439	* This is done if the number of slabs is larger than the hash size.
440	* What I'm trying to do here is completely reduce collisions. This
441	* may be a little aggressive. Should I allow for two collisions max?
442	*/
443	if (keg->uk_flags & UMA_ZONE_HASH &&
444	keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
445	struct uma_hash newhash;
446	struct uma_hash oldhash;
447	int ret;
448
449	/*
450	* This is so involved because allocating and freeing
451	* while the keg lock is held will lead to deadlock.
452	* I have to do everything in stages and check for
453	* races.
454	*/
455	newhash = keg->uk_hash;
456	KEG_UNLOCK(keg);
457	ret = hash_alloc(&newhash);
458	KEG_LOCK(keg);
459	if (ret) {
460	if (hash_expand(&keg->uk_hash, &newhash)) {
461	oldhash = keg->uk_hash;
462	keg->uk_hash = newhash;
463	} else
464	oldhash = newhash;
465
466	KEG_UNLOCK(keg);
467	hash_free(&oldhash);
468	KEG_LOCK(keg);
469	}
470	}
471	KEG_UNLOCK(keg);
472	}
473
474	static void
475	zone_timeout(uma_zone_t zone)
476	{
477
478	zone_foreach_keg(zone, &keg_timeout);
479	}
480
481	/*
482	* Allocate and zero fill the next sized hash table from the appropriate
483	* backing store.
484	*
485	* Arguments:
486	* hash A new hash structure with the old hash size in uh_hashsize
487	*
488	* Returns:
489	* 1 on sucess and 0 on failure.
490	*/
491	static int
492	hash_alloc(struct uma_hash *hash)
493	{
494	int oldsize;
495	int alloc;
496
497	oldsize = hash->uh_hashsize;
498
499	/* We're just going to go to a power of two greater */
500	if (oldsize) {
501	hash->uh_hashsize = oldsize * 2;
502	alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
503	hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
504	M_UMAHASH, M_NOWAIT);
505	} else {
506	alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
507	hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
508	M_WAITOK);
509	hash->uh_hashsize = UMA_HASH_SIZE_INIT;
510	}
511	if (hash->uh_slab_hash) {
512	bzero(hash->uh_slab_hash, alloc);
513	hash->uh_hashmask = hash->uh_hashsize - 1;
514	return (1);
515	}
516
517	return (0);
518	}
519
520	/*
521	* Expands the hash table for HASH zones. This is done from zone_timeout
522	* to reduce collisions. This must not be done in the regular allocation
523	* path, otherwise, we can recurse on the vm while allocating pages.
524	*
525	* Arguments:
526	* oldhash The hash you want to expand
527	* newhash The hash structure for the new table
528	*
529	* Returns:
530	* Nothing
531	*
532	* Discussion:
533	*/
534	static int
535	hash_expand(struct uma_hash oldhash, struct uma_hash newhash)
536	{
537	uma_slab_t slab;
538	int hval;
539	int i;
540
541	if (!newhash->uh_slab_hash)
542	return (0);
543
544	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
545	return (0);
546
547	/*
548	* I need to investigate hash algorithms for resizing without a
549	* full rehash.
550	*/
551
552	for (i = 0; i < oldhash->uh_hashsize; i++)
553	while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
554	slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
555	SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
556	hval = UMA_HASH(newhash, slab->us_data);
557	SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
558	slab, us_hlink);
559	}
560
561	return (1);
562	}
563
564	/*
565	* Free the hash bucket to the appropriate backing store.
566	*
567	* Arguments:
568	* slab_hash The hash bucket we're freeing
569	* hashsize The number of entries in that hash bucket
570	*
571	* Returns:
572	* Nothing
573	*/
574	static void
575	hash_free(struct uma_hash *hash)
576	{
577	if (hash->uh_slab_hash == NULL)
578	return;
579	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
580	zone_free_item(hashzone,
581	hash->uh_slab_hash, NULL, SKIP_NONE, ZFREE_STATFREE);
582	else
583	free(hash->uh_slab_hash, M_UMAHASH);
584	}
585
586	/*
587	* Frees all outstanding items in a bucket
588	*
589	* Arguments:
590	* zone The zone to free to, must be unlocked.
591	* bucket The free/alloc bucket with items, cpu queue must be locked.
592	*
593	* Returns:
594	* Nothing
595	*/
596
597	static void
598	bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
599	{
600	void *item;
601
602	if (bucket == NULL)
603	return;
604
605	while (bucket->ub_cnt > 0) {
606	bucket->ub_cnt--;
607	item = bucket->ub_bucket[bucket->ub_cnt];
608	#ifdef INVARIANTS
609	bucket->ub_bucket[bucket->ub_cnt] = NULL;
610	KASSERT(item != NULL,
611	("bucket_drain: botched ptr, item is NULL"));
612	#endif
613	zone_free_item(zone, item, NULL, SKIP_DTOR, 0);
614	}
615	}
616
617	/*
618	* Drains the per cpu caches for a zone.
619	*
620	* NOTE: This may only be called while the zone is being turn down, and not
621	* during normal operation. This is necessary in order that we do not have
622	* to migrate CPUs to drain the per-CPU caches.
623	*
624	* Arguments:
625	* zone The zone to drain, must be unlocked.
626	*
627	* Returns:
628	* Nothing
629	*/
630	static void
631	cache_drain(uma_zone_t zone)
632	{
633	uma_cache_t cache;
634	int cpu;
635
636	/*
637	* XXX: It is safe to not lock the per-CPU caches, because we're
638	* tearing down the zone anyway. I.e., there will be no further use
639	* of the caches at this point.
640	*
641	* XXX: It would good to be able to assert that the zone is being
642	* torn down to prevent improper use of cache_drain().
643	*
644	* XXX: We lock the zone before passing into bucket_cache_drain() as
645	* it is used elsewhere. Should the tear-down path be made special
646	* there in some form?
647	*/
648	CPU_FOREACH(cpu) {
649	cache = &zone->uz_cpu[cpu];
650	bucket_drain(zone, cache->uc_allocbucket);
651	bucket_drain(zone, cache->uc_freebucket);
652	if (cache->uc_allocbucket != NULL)
653	bucket_free(cache->uc_allocbucket);
654	if (cache->uc_freebucket != NULL)
655	bucket_free(cache->uc_freebucket);
656	cache->uc_allocbucket = cache->uc_freebucket = NULL;
657	}
658	ZONE_LOCK(zone);
659	bucket_cache_drain(zone);
660	ZONE_UNLOCK(zone);
661	}
662
663	/*
664	* Drain the cached buckets from a zone. Expects a locked zone on entry.
665	*/
666	static void
667	bucket_cache_drain(uma_zone_t zone)
668	{
669	uma_bucket_t bucket;
670
671	/*
672	* Drain the bucket queues and free the buckets, we just keep two per
673	* cpu (alloc/free).
674	*/
675	while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
676	LIST_REMOVE(bucket, ub_link);
677	ZONE_UNLOCK(zone);
678	bucket_drain(zone, bucket);
679	bucket_free(bucket);
680	ZONE_LOCK(zone);
681	}
682
683	/* Now we do the free queue.. */
684	while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
685	LIST_REMOVE(bucket, ub_link);
686	bucket_free(bucket);
687	}
688	}
689
690	/*
691	* Frees pages from a keg back to the system. This is done on demand from
692	* the pageout daemon.
693	*
694	* Returns nothing.
695	*/
696	static void
697	keg_drain(uma_keg_t keg)
698	{
699	struct slabhead freeslabs = { 0 };
700	uma_slab_t slab;
701	uma_slab_t n;
702	u_int8_t flags;
703	u_int8_t *mem;
704	int i;
705
706	/*
707	* We don't want to take pages from statically allocated kegs at this
708	* time
709	*/
710	if (keg->uk_flags & UMA_ZONE_NOFREE \|\| keg->uk_freef == NULL)
711	return;
712
713	#ifdef UMA_DEBUG
714	printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
715	#endif
716	KEG_LOCK(keg);
717	if (keg->uk_free == 0)
718	goto finished;
719
720	slab = LIST_FIRST(&keg->uk_free_slab);
721	while (slab) {
722	n = LIST_NEXT(slab, us_link);
723
724	/* We have no where to free these to */
725	if (slab->us_flags & UMA_SLAB_BOOT) {
726	slab = n;
727	continue;
728	}
729
730	LIST_REMOVE(slab, us_link);
731	keg->uk_pages -= keg->uk_ppera;
732	keg->uk_free -= keg->uk_ipers;
733
734	if (keg->uk_flags & UMA_ZONE_HASH)
735	UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
736
737	SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
738
739	slab = n;
740	}
741	finished:
742	KEG_UNLOCK(keg);
743
744	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
745	SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
746	if (keg->uk_fini)
747	for (i = 0; i < keg->uk_ipers; i++)
748	keg->uk_fini(
749	slab->us_data + (keg->uk_rsize * i),
750	keg->uk_size);
751	flags = slab->us_flags;
752	mem = slab->us_data;
753
754	#ifndef __rtems__
755	if (keg->uk_flags & UMA_ZONE_VTOSLAB) {
756	vm_object_t obj;
757
758	if (flags & UMA_SLAB_KMEM)
759	obj = kmem_object;
760	else if (flags & UMA_SLAB_KERNEL)
761	obj = kernel_object;
762	else
763	obj = NULL;
764	for (i = 0; i < keg->uk_ppera; i++)
765	vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
766	obj);
767	}
768	#endif /* __rtems__ */
769	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
770	zone_free_item(keg->uk_slabzone, slab, NULL,
771	SKIP_NONE, ZFREE_STATFREE);
772	#ifdef UMA_DEBUG
773	printf("%s: Returning %d bytes.\n",
774	keg->uk_name, UMA_SLAB_SIZE * keg->uk_ppera);
775	#endif
776	keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
777	}
778	}
779
780	static void
781	zone_drain_wait(uma_zone_t zone, int waitok)
782	{
783
784	/*
785	* Set draining to interlock with zone_dtor() so we can release our
786	* locks as we go. Only dtor() should do a WAITOK call since it
787	* is the only call that knows the structure will still be available
788	* when it wakes up.
789	*/
790	ZONE_LOCK(zone);
791	while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
792	if (waitok == M_NOWAIT)
793	goto out;
794	mtx_unlock(&uma_mtx);
795	msleep(zone, zone->uz_lock, PVM, "zonedrain", 1);
796	mtx_lock(&uma_mtx);
797	}
798	zone->uz_flags \|= UMA_ZFLAG_DRAINING;
799	bucket_cache_drain(zone);
800	ZONE_UNLOCK(zone);
801	/*
802	* The DRAINING flag protects us from being freed while
803	* we're running. Normally the uma_mtx would protect us but we
804	* must be able to release and acquire the right lock for each keg.
805	*/
806	zone_foreach_keg(zone, &keg_drain);
807	ZONE_LOCK(zone);
808	zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
809	wakeup(zone);
810	out:
811	ZONE_UNLOCK(zone);
812	}
813
814	void
815	zone_drain(uma_zone_t zone)
816	{
817
818	zone_drain_wait(zone, M_NOWAIT);
819	}
820
821	/*
822	* Allocate a new slab for a keg. This does not insert the slab onto a list.
823	*
824	* Arguments:
825	* wait Shall we wait?
826	*
827	* Returns:
828	* The slab that was allocated or NULL if there is no memory and the
829	* caller specified M_NOWAIT.
830	*/
831	static uma_slab_t
832	keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
833	{
834	uma_slabrefcnt_t slabref;
835	uma_alloc allocf;
836	uma_slab_t slab;
837	u_int8_t *mem;
838	u_int8_t flags;
839	int i;
840
841	mtx_assert(&keg->uk_lock, MA_OWNED);
842	slab = NULL;
843
844	#ifdef UMA_DEBUG
845	printf("slab_zalloc: Allocating a new slab for %s\n", keg->uk_name);
846	#endif
847	allocf = keg->uk_allocf;
848	KEG_UNLOCK(keg);
849
850	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
851	slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
852	if (slab == NULL) {
853	KEG_LOCK(keg);
854	return NULL;
855	}
856	}
857
858	/*
859	* This reproduces the old vm_zone behavior of zero filling pages the
860	* first time they are added to a zone.
861	*
862	* Malloced items are zeroed in uma_zalloc.
863	*/
864
865	if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
866	wait \|= M_ZERO;
867	else
868	wait &= ~M_ZERO;
869
870	if (keg->uk_flags & UMA_ZONE_NODUMP)
871	wait \|= M_NODUMP;
872
873	/* zone is passed for legacy reasons. */
874	mem = allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait);
875	if (mem == NULL) {
876	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
877	zone_free_item(keg->uk_slabzone, slab, NULL,
878	SKIP_NONE, ZFREE_STATFREE);
879	KEG_LOCK(keg);
880	return (NULL);
881	}
882
883	/* Point the slab into the allocated memory */
884	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
885	slab = (uma_slab_t )(mem + keg->uk_pgoff);
886
887	if (keg->uk_flags & UMA_ZONE_VTOSLAB)
888	for (i = 0; i < keg->uk_ppera; i++)
889	vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
890
891	slab->us_keg = keg;
892	slab->us_data = mem;
893	slab->us_freecount = keg->uk_ipers;
894	slab->us_firstfree = 0;
895	slab->us_flags = flags;
896
897	if (keg->uk_flags & UMA_ZONE_REFCNT) {
898	slabref = (uma_slabrefcnt_t)slab;
899	for (i = 0; i < keg->uk_ipers; i++) {
900	slabref->us_freelist[i].us_refcnt = 0;
901	slabref->us_freelist[i].us_item = i+1;
902	}
903	} else {
904	for (i = 0; i < keg->uk_ipers; i++)
905	slab->us_freelist[i].us_item = i+1;
906	}
907
908	if (keg->uk_init != NULL) {
909	for (i = 0; i < keg->uk_ipers; i++)
910	if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
911	keg->uk_size, wait) != 0)
912	break;
913	if (i != keg->uk_ipers) {
914	if (keg->uk_fini != NULL) {
915	for (i--; i > -1; i--)
916	keg->uk_fini(slab->us_data +
917	(keg->uk_rsize * i),
918	keg->uk_size);
919	}
920	#ifndef __rtems__
921	if (keg->uk_flags & UMA_ZONE_VTOSLAB) {
922	vm_object_t obj;
923
924	if (flags & UMA_SLAB_KMEM)
925	obj = kmem_object;
926	else if (flags & UMA_SLAB_KERNEL)
927	obj = kernel_object;
928	else
929	obj = NULL;
930	for (i = 0; i < keg->uk_ppera; i++)
931	vsetobj((vm_offset_t)mem +
932	(i * PAGE_SIZE), obj);
933	}
934	#endif /* __rtems__ */
935	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
936	zone_free_item(keg->uk_slabzone, slab,
937	NULL, SKIP_NONE, ZFREE_STATFREE);
938	keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
939	flags);
940	KEG_LOCK(keg);
941	return (NULL);
942	}
943	}
944	KEG_LOCK(keg);
945
946	if (keg->uk_flags & UMA_ZONE_HASH)
947	UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
948
949	keg->uk_pages += keg->uk_ppera;
950	keg->uk_free += keg->uk_ipers;
951
952	return (slab);
953	}
954
955	#ifndef __rtems__
956	/*
957	* This function is intended to be used early on in place of page_alloc() so
958	* that we may use the boot time page cache to satisfy allocations before
959	* the VM is ready.
960	*/
961	static void *
962	startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
963	{
964	uma_keg_t keg;
965	uma_slab_t tmps;
966	int pages, check_pages;
967
968	keg = zone_first_keg(zone);
969	pages = howmany(bytes, PAGE_SIZE);
970	check_pages = pages - 1;
971	KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
972
973	/*
974	* Check our small startup cache to see if it has pages remaining.
975	*/
976	mtx_lock(&uma_boot_pages_mtx);
977
978	/* First check if we have enough room. */
979	tmps = LIST_FIRST(&uma_boot_pages);
980	while (tmps != NULL && check_pages-- > 0)
981	tmps = LIST_NEXT(tmps, us_link);
982	if (tmps != NULL) {
983	/*
984	* It's ok to lose tmps references. The last one will
985	* have tmps->us_data pointing to the start address of
986	* "pages" contiguous pages of memory.
987	*/
988	while (pages-- > 0) {
989	tmps = LIST_FIRST(&uma_boot_pages);
990	LIST_REMOVE(tmps, us_link);
991	}
992	mtx_unlock(&uma_boot_pages_mtx);
993	*pflag = tmps->us_flags;
994	return (tmps->us_data);
995	}
996	mtx_unlock(&uma_boot_pages_mtx);
997	if (booted < UMA_STARTUP2)
998	panic("UMA: Increase vm.boot_pages");
999	/*
1000	* Now that we've booted reset these users to their real allocator.
1001	*/
1002	#ifdef UMA_MD_SMALL_ALLOC
1003	keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
1004	#else
1005	keg->uk_allocf = page_alloc;
1006	#endif
1007	return keg->uk_allocf(zone, bytes, pflag, wait);
1008	}
1009	#endif /* __rtems__ */
1010
1011	/*
1012	* Allocates a number of pages from the system
1013	*
1014	* Arguments:
1015	* bytes The number of bytes requested
1016	* wait Shall we wait?
1017	*
1018	* Returns:
1019	* A pointer to the alloced memory or possibly
1020	* NULL if M_NOWAIT is set.
1021	*/
1022	static void *
1023	page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
1024	{
1025	void p; / Returned page */
1026
1027	*pflag = UMA_SLAB_KMEM;
1028	#ifndef __rtems__
1029	p = (void *) kmem_malloc(kmem_map, bytes, wait);
1030	#else /* __rtems__ */
1031	p = rtems_bsd_page_alloc(bytes, wait);
1032	#endif /* __rtems__ */
1033
1034	return (p);
1035	}
1036
1037	#ifndef __rtems__
1038	/*
1039	* Allocates a number of pages from within an object
1040	*
1041	* Arguments:
1042	* bytes The number of bytes requested
1043	* wait Shall we wait?
1044	*
1045	* Returns:
1046	* A pointer to the alloced memory or possibly
1047	* NULL if M_NOWAIT is set.
1048	*/
1049	static void *
1050	obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
1051	{
1052	vm_object_t object;
1053	vm_offset_t retkva, zkva;
1054	vm_page_t p;
1055	int pages, startpages;
1056	uma_keg_t keg;
1057
1058	keg = zone_first_keg(zone);
1059	object = keg->uk_obj;
1060	retkva = 0;
1061
1062	/*
1063	* This looks a little weird since we're getting one page at a time.
1064	*/
1065	VM_OBJECT_LOCK(object);
1066	p = TAILQ_LAST(&object->memq, pglist);
1067	pages = p != NULL ? p->pindex + 1 : 0;
1068	startpages = pages;
1069	zkva = keg->uk_kva + pages * PAGE_SIZE;
1070	for (; bytes > 0; bytes -= PAGE_SIZE) {
1071	p = vm_page_alloc(object, pages,
1072	VM_ALLOC_INTERRUPT \| VM_ALLOC_WIRED);
1073	if (p == NULL) {
1074	if (pages != startpages)
1075	pmap_qremove(retkva, pages - startpages);
1076	while (pages != startpages) {
1077	pages--;
1078	p = TAILQ_LAST(&object->memq, pglist);
1079	vm_page_unwire(p, 0);
1080	vm_page_free(p);
1081	}
1082	retkva = 0;
1083	goto done;
1084	}
1085	pmap_qenter(zkva, &p, 1);
1086	if (retkva == 0)
1087	retkva = zkva;
1088	zkva += PAGE_SIZE;
1089	pages += 1;
1090	}
1091	done:
1092	VM_OBJECT_UNLOCK(object);
1093	*flags = UMA_SLAB_PRIV;
1094
1095	return ((void *)retkva);
1096	}
1097	#endif /* __rtems__ */
1098
1099	/*
1100	* Frees a number of pages to the system
1101	*
1102	* Arguments:
1103	* mem A pointer to the memory to be freed
1104	* size The size of the memory being freed
1105	* flags The original p->us_flags field
1106	*
1107	* Returns:
1108	* Nothing
1109	*/
1110	static void
1111	page_free(void *mem, int size, u_int8_t flags)
1112	{
1113	#ifndef __rtems__
1114	vm_map_t map;
1115
1116	if (flags & UMA_SLAB_KMEM)
1117	map = kmem_map;
1118	else if (flags & UMA_SLAB_KERNEL)
1119	map = kernel_map;
1120	else
1121	panic("UMA: page_free used with invalid flags %d", flags);
1122
1123	kmem_free(map, (vm_offset_t)mem, size);
1124	#else /* __rtems__ */
1125	rtems_bsd_page_free(mem);
1126	#endif /* __rtems__ */
1127	}
1128
1129	/*
1130	* Zero fill initializer
1131	*
1132	* Arguments/Returns follow uma_init specifications
1133	*/
1134	static int
1135	zero_init(void *mem, int size, int flags)
1136	{
1137	bzero(mem, size);
1138	return (0);
1139	}
1140
1141	/*
1142	* Finish creating a small uma keg. This calculates ipers, and the keg size.
1143	*
1144	* Arguments
1145	* keg The zone we should initialize
1146	*
1147	* Returns
1148	* Nothing
1149	*/
1150	static void
1151	keg_small_init(uma_keg_t keg)
1152	{
1153	u_int rsize;
1154	u_int memused;
1155	u_int wastedspace;
1156	u_int shsize;
1157
1158	KASSERT(keg != NULL, ("Keg is null in keg_small_init"));
1159	rsize = keg->uk_size;
1160
1161	if (rsize < UMA_SMALLEST_UNIT)
1162	rsize = UMA_SMALLEST_UNIT;
1163	if (rsize & keg->uk_align)
1164	rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1165
1166	keg->uk_rsize = rsize;
1167	keg->uk_ppera = 1;
1168
1169	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1170	shsize = 0;
1171	} else if (keg->uk_flags & UMA_ZONE_REFCNT) {
1172	rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */
1173	shsize = sizeof(struct uma_slab_refcnt);
1174	} else {
1175	rsize += UMA_FRITM_SZ; /* Account for linkage */
1176	shsize = sizeof(struct uma_slab);
1177	}
1178
1179	keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
1180	KASSERT(keg->uk_ipers != 0, ("keg_small_init: ipers is 0"));
1181	memused = keg->uk_ipers * rsize + shsize;
1182	wastedspace = UMA_SLAB_SIZE - memused;
1183
1184	/*
1185	* We can't do OFFPAGE if we're internal or if we've been
1186	* asked to not go to the VM for buckets. If we do this we
1187	* may end up going to the VM (kmem_map) for slabs which we
1188	* do not want to do if we're UMA_ZFLAG_CACHEONLY as a
1189	* result of UMA_ZONE_VM, which clearly forbids it.
1190	*/
1191	if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) \|\|
1192	(keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1193	return;
1194
1195	if ((wastedspace >= UMA_MAX_WASTE) &&
1196	(keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
1197	keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
1198	KASSERT(keg->uk_ipers <= 255,
1199	("keg_small_init: keg->uk_ipers too high!"));
1200	#ifdef UMA_DEBUG
1201	printf("UMA decided we need offpage slab headers for "
1202	"keg: %s, calculated wastedspace = %d, "
1203	"maximum wasted space allowed = %d, "
1204	"calculated ipers = %d, "
1205	"new wasted space = %d\n", keg->uk_name, wastedspace,
1206	UMA_MAX_WASTE, keg->uk_ipers,
1207	UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
1208	#endif
1209	keg->uk_flags \|= UMA_ZONE_OFFPAGE;
1210	if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1211	keg->uk_flags \|= UMA_ZONE_HASH;
1212	}
1213	}
1214
1215	/*
1216	* Finish creating a large (> UMA_SLAB_SIZE) uma kegs. Just give in and do
1217	* OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
1218	* more complicated.
1219	*
1220	* Arguments
1221	* keg The keg we should initialize
1222	*
1223	* Returns
1224	* Nothing
1225	*/
1226	static void
1227	keg_large_init(uma_keg_t keg)
1228	{
1229	int pages;
1230
1231	KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
1232	KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1233	("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
1234
1235	pages = keg->uk_size / UMA_SLAB_SIZE;
1236
1237	/* Account for remainder */
1238	if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
1239	pages++;
1240
1241	keg->uk_ppera = pages;
1242	keg->uk_ipers = 1;
1243	keg->uk_rsize = keg->uk_size;
1244
1245	/* We can't do OFFPAGE if we're internal, bail out here. */
1246	if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
1247	return;
1248
1249	keg->uk_flags \|= UMA_ZONE_OFFPAGE;
1250	if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1251	keg->uk_flags \|= UMA_ZONE_HASH;
1252	}
1253
1254	static void
1255	keg_cachespread_init(uma_keg_t keg)
1256	{
1257	int alignsize;
1258	int trailer;
1259	int pages;
1260	int rsize;
1261
1262	alignsize = keg->uk_align + 1;
1263	rsize = keg->uk_size;
1264	/*
1265	* We want one item to start on every align boundary in a page. To
1266	* do this we will span pages. We will also extend the item by the
1267	* size of align if it is an even multiple of align. Otherwise, it
1268	* would fall on the same boundary every time.
1269	*/
1270	if (rsize & keg->uk_align)
1271	rsize = (rsize & ~keg->uk_align) + alignsize;
1272	if ((rsize & alignsize) == 0)
1273	rsize += alignsize;
1274	trailer = rsize - keg->uk_size;
1275	pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
1276	pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
1277	keg->uk_rsize = rsize;
1278	keg->uk_ppera = pages;
1279	keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
1280	keg->uk_flags \|= UMA_ZONE_OFFPAGE \| UMA_ZONE_VTOSLAB;
1281	KASSERT(keg->uk_ipers <= uma_max_ipers,
1282	("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
1283	keg->uk_ipers));
1284	}
1285
1286	/*
1287	* Keg header ctor. This initializes all fields, locks, etc. And inserts
1288	* the keg onto the global keg list.
1289	*
1290	* Arguments/Returns follow uma_ctor specifications
1291	* udata Actually uma_kctor_args
1292	*/
1293	static int
1294	keg_ctor(void mem, int size, void udata, int flags)
1295	{
1296	struct uma_kctor_args *arg = udata;
1297	uma_keg_t keg = mem;
1298	uma_zone_t zone;
1299
1300	bzero(keg, size);
1301	keg->uk_size = arg->size;
1302	keg->uk_init = arg->uminit;
1303	keg->uk_fini = arg->fini;
1304	keg->uk_align = arg->align;
1305	keg->uk_free = 0;
1306	keg->uk_pages = 0;
1307	keg->uk_flags = arg->flags;
1308	keg->uk_allocf = page_alloc;
1309	keg->uk_freef = page_free;
1310	keg->uk_recurse = 0;
1311	keg->uk_slabzone = NULL;
1312
1313	/*
1314	* The master zone is passed to us at keg-creation time.
1315	*/
1316	zone = arg->zone;
1317	keg->uk_name = zone->uz_name;
1318
1319	if (arg->flags & UMA_ZONE_VM)
1320	keg->uk_flags \|= UMA_ZFLAG_CACHEONLY;
1321
1322	if (arg->flags & UMA_ZONE_ZINIT)
1323	keg->uk_init = zero_init;
1324
1325	if (arg->flags & UMA_ZONE_REFCNT \|\| arg->flags & UMA_ZONE_MALLOC)
1326	keg->uk_flags \|= UMA_ZONE_VTOSLAB;
1327
1328	/*
1329	* The +UMA_FRITM_SZ added to uk_size is to account for the
1330	* linkage that is added to the size in keg_small_init(). If
1331	* we don't account for this here then we may end up in
1332	* keg_small_init() with a calculated 'ipers' of 0.
1333	*/
1334	if (keg->uk_flags & UMA_ZONE_REFCNT) {
1335	if (keg->uk_flags & UMA_ZONE_CACHESPREAD)
1336	keg_cachespread_init(keg);
1337	else if ((keg->uk_size+UMA_FRITMREF_SZ) >
1338	(UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
1339	keg_large_init(keg);
1340	else
1341	keg_small_init(keg);
1342	} else {
1343	if (keg->uk_flags & UMA_ZONE_CACHESPREAD)
1344	keg_cachespread_init(keg);
1345	else if ((keg->uk_size+UMA_FRITM_SZ) >
1346	(UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1347	keg_large_init(keg);
1348	else
1349	keg_small_init(keg);
1350	}
1351
1352	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1353	if (keg->uk_flags & UMA_ZONE_REFCNT)
1354	keg->uk_slabzone = slabrefzone;
1355	else
1356	keg->uk_slabzone = slabzone;
1357	}
1358
1359	/*
1360	* If we haven't booted yet we need allocations to go through the
1361	* startup cache until the vm is ready.
1362	*/
1363	if (keg->uk_ppera == 1) {
1364	#ifdef UMA_MD_SMALL_ALLOC
1365	keg->uk_allocf = uma_small_alloc;
1366	keg->uk_freef = uma_small_free;
1367
1368	#ifndef __rtems__
1369	if (booted < UMA_STARTUP)
1370	keg->uk_allocf = startup_alloc;
1371	#endif /* __rtems__ */
1372	#else
1373	#ifndef __rtems__
1374	if (booted < UMA_STARTUP2)
1375	keg->uk_allocf = startup_alloc;
1376	#endif /* __rtems__ */
1377	#endif
1378	#ifndef __rtems__
1379	} else if (booted < UMA_STARTUP2 &&
1380	(keg->uk_flags & UMA_ZFLAG_INTERNAL))
1381	keg->uk_allocf = startup_alloc;
1382	#else /* __rtems__ */
1383	}
1384	#endif /* __rtems__ */
1385
1386	/*
1387	* Initialize keg's lock (shared among zones).
1388	*/
1389	if (arg->flags & UMA_ZONE_MTXCLASS)
1390	KEG_LOCK_INIT(keg, 1);
1391	else
1392	KEG_LOCK_INIT(keg, 0);
1393
1394	/*
1395	* If we're putting the slab header in the actual page we need to
1396	* figure out where in each page it goes. This calculates a right
1397	* justified offset into the memory on an ALIGN_PTR boundary.
1398	*/
1399	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1400	u_int totsize;
1401
1402	/* Size of the slab struct and free list */
1403	if (keg->uk_flags & UMA_ZONE_REFCNT)
1404	totsize = sizeof(struct uma_slab_refcnt) +
1405	keg->uk_ipers * UMA_FRITMREF_SZ;
1406	else
1407	totsize = sizeof(struct uma_slab) +
1408	keg->uk_ipers * UMA_FRITM_SZ;
1409
1410	if (totsize & UMA_ALIGN_PTR)
1411	totsize = (totsize & ~UMA_ALIGN_PTR) +
1412	(UMA_ALIGN_PTR + 1);
1413	keg->uk_pgoff = (UMA_SLAB_SIZE * keg->uk_ppera) - totsize;
1414
1415	if (keg->uk_flags & UMA_ZONE_REFCNT)
1416	totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
1417	+ keg->uk_ipers * UMA_FRITMREF_SZ;
1418	else
1419	totsize = keg->uk_pgoff + sizeof(struct uma_slab)
1420	+ keg->uk_ipers * UMA_FRITM_SZ;
1421
1422	/*
1423	* The only way the following is possible is if with our
1424	* UMA_ALIGN_PTR adjustments we are now bigger than
1425	* UMA_SLAB_SIZE. I haven't checked whether this is
1426	* mathematically possible for all cases, so we make
1427	* sure here anyway.
1428	*/
1429	if (totsize > UMA_SLAB_SIZE * keg->uk_ppera) {
1430	printf("zone %s ipers %d rsize %d size %d\n",
1431	zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1432	keg->uk_size);
1433	panic("UMA slab won't fit.");
1434	}
1435	}
1436
1437	if (keg->uk_flags & UMA_ZONE_HASH)
1438	hash_alloc(&keg->uk_hash);
1439
1440	#ifdef UMA_DEBUG
1441	printf("UMA: %s(%p) size %d(%d) flags %#x ipers %d ppera %d out %d free %d\n",
1442	zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
1443	keg->uk_ipers, keg->uk_ppera,
1444	(keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
1445	#endif
1446
1447	LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1448
1449	mtx_lock(&uma_mtx);
1450	LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1451	mtx_unlock(&uma_mtx);
1452	return (0);
1453	}
1454
1455	/*
1456	* Zone header ctor. This initializes all fields, locks, etc.
1457	*
1458	* Arguments/Returns follow uma_ctor specifications
1459	* udata Actually uma_zctor_args
1460	*/
1461	static int
1462	zone_ctor(void mem, int size, void udata, int flags)
1463	{
1464	struct uma_zctor_args *arg = udata;
1465	uma_zone_t zone = mem;
1466	uma_zone_t z;
1467	uma_keg_t keg;
1468
1469	bzero(zone, size);
1470	zone->uz_name = arg->name;
1471	zone->uz_ctor = arg->ctor;
1472	zone->uz_dtor = arg->dtor;
1473	zone->uz_slab = zone_fetch_slab;
1474	zone->uz_init = NULL;
1475	zone->uz_fini = NULL;
1476	zone->uz_allocs = 0;
1477	zone->uz_frees = 0;
1478	zone->uz_fails = 0;
1479	zone->uz_sleeps = 0;
1480	zone->uz_fills = zone->uz_count = 0;
1481	zone->uz_flags = 0;
1482	keg = arg->keg;
1483
1484	if (arg->flags & UMA_ZONE_SECONDARY) {
1485	KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1486	zone->uz_init = arg->uminit;
1487	zone->uz_fini = arg->fini;
1488	zone->uz_lock = &keg->uk_lock;
1489	zone->uz_flags \|= UMA_ZONE_SECONDARY;
1490	mtx_lock(&uma_mtx);
1491	ZONE_LOCK(zone);
1492	LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1493	if (LIST_NEXT(z, uz_link) == NULL) {
1494	LIST_INSERT_AFTER(z, zone, uz_link);
1495	break;
1496	}
1497	}
1498	ZONE_UNLOCK(zone);
1499	mtx_unlock(&uma_mtx);
1500	} else if (keg == NULL) {
1501	if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1502	arg->align, arg->flags)) == NULL)
1503	return (ENOMEM);
1504	} else {
1505	struct uma_kctor_args karg;
1506	int error;
1507
1508	/* We should only be here from uma_startup() */
1509	karg.size = arg->size;
1510	karg.uminit = arg->uminit;
1511	karg.fini = arg->fini;
1512	karg.align = arg->align;
1513	karg.flags = arg->flags;
1514	karg.zone = zone;
1515	error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1516	flags);
1517	if (error)
1518	return (error);
1519	}
1520	/*
1521	* Link in the first keg.
1522	*/
1523	zone->uz_klink.kl_keg = keg;
1524	LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
1525	zone->uz_lock = &keg->uk_lock;
1526	zone->uz_size = keg->uk_size;
1527	zone->uz_flags \|= (keg->uk_flags &
1528	(UMA_ZONE_INHERIT \| UMA_ZFLAG_INHERIT));
1529
1530	/*
1531	* Some internal zones don't have room allocated for the per cpu
1532	* caches. If we're internal, bail out here.
1533	*/
1534	if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1535	KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
1536	("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1537	return (0);
1538	}
1539
1540	if (keg->uk_flags & UMA_ZONE_MAXBUCKET)
1541	zone->uz_count = BUCKET_MAX;
1542	else if (keg->uk_ipers <= BUCKET_MAX)
1543	zone->uz_count = keg->uk_ipers;
1544	else
1545	zone->uz_count = BUCKET_MAX;
1546	return (0);
1547	}
1548
1549	/*
1550	* Keg header dtor. This frees all data, destroys locks, frees the hash
1551	* table and removes the keg from the global list.
1552	*
1553	* Arguments/Returns follow uma_dtor specifications
1554	* udata unused
1555	*/
1556	static void
1557	keg_dtor(void arg, int size, void udata)
1558	{
1559	uma_keg_t keg;
1560
1561	keg = (uma_keg_t)arg;
1562	KEG_LOCK(keg);
1563	if (keg->uk_free != 0) {
1564	printf("Freed UMA keg (%s) was not empty (%d items). "
1565	" Lost %d pages of memory.\n",
1566	keg->uk_name ? keg->uk_name : "",
1567	keg->uk_free, keg->uk_pages);
1568	}
1569	KEG_UNLOCK(keg);
1570
1571	hash_free(&keg->uk_hash);
1572
1573	KEG_LOCK_FINI(keg);
1574	}
1575
1576	/*
1577	* Zone header dtor.
1578	*
1579	* Arguments/Returns follow uma_dtor specifications
1580	* udata unused
1581	*/
1582	static void
1583	zone_dtor(void arg, int size, void udata)
1584	{
1585	uma_klink_t klink;
1586	uma_zone_t zone;
1587	uma_keg_t keg;
1588
1589	zone = (uma_zone_t)arg;
1590	keg = zone_first_keg(zone);
1591
1592	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
1593	cache_drain(zone);
1594
1595	mtx_lock(&uma_mtx);
1596	LIST_REMOVE(zone, uz_link);
1597	mtx_unlock(&uma_mtx);
1598	/*
1599	* XXX there are some races here where
1600	* the zone can be drained but zone lock
1601	* released and then refilled before we
1602	* remove it... we dont care for now
1603	*/
1604	zone_drain_wait(zone, M_WAITOK);
1605	/*
1606	* Unlink all of our kegs.
1607	*/
1608	while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
1609	klink->kl_keg = NULL;
1610	LIST_REMOVE(klink, kl_link);
1611	if (klink == &zone->uz_klink)
1612	continue;
1613	free(klink, M_TEMP);
1614	}
1615	/*
1616	* We only destroy kegs from non secondary zones.
1617	*/
1618	if ((zone->uz_flags & UMA_ZONE_SECONDARY) == 0) {
1619	mtx_lock(&uma_mtx);
1620	LIST_REMOVE(keg, uk_link);
1621	mtx_unlock(&uma_mtx);
1622	zone_free_item(kegs, keg, NULL, SKIP_NONE,
1623	ZFREE_STATFREE);
1624	}
1625	}
1626
1627	/*
1628	* Traverses every zone in the system and calls a callback
1629	*
1630	* Arguments:
1631	* zfunc A pointer to a function which accepts a zone
1632	* as an argument.
1633	*
1634	* Returns:
1635	* Nothing
1636	*/
1637	static void
1638	zone_foreach(void (*zfunc)(uma_zone_t))
1639	{
1640	uma_keg_t keg;
1641	uma_zone_t zone;
1642
1643	mtx_lock(&uma_mtx);
1644	LIST_FOREACH(keg, &uma_kegs, uk_link) {
1645	LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1646	zfunc(zone);
1647	}
1648	mtx_unlock(&uma_mtx);
1649	}
1650
1651	/* Public functions */
1652	/* See uma.h */
1653	void
1654	uma_startup(void *bootmem, int boot_pages)
1655	{
1656	struct uma_zctor_args args;
1657	#ifndef __rtems__
1658	uma_slab_t slab;
1659	#endif /* __rtems__ */
1660	u_int slabsize;
1661	u_int objsize, totsize, wsize;
1662	#ifndef __rtems__
1663	int i;
1664	#endif /* __rtems__ */
1665
1666	#ifdef UMA_DEBUG
1667	printf("Creating uma keg headers zone and keg.\n");
1668	#endif
1669	mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1670
1671	/*
1672	* Figure out the maximum number of items-per-slab we'll have if
1673	* we're using the OFFPAGE slab header to track free items, given
1674	* all possible object sizes and the maximum desired wastage
1675	* (UMA_MAX_WASTE).
1676	*
1677	* We iterate until we find an object size for
1678	* which the calculated wastage in keg_small_init() will be
1679	* enough to warrant OFFPAGE. Since wastedspace versus objsize
1680	* is an overall increasing see-saw function, we find the smallest
1681	* objsize such that the wastage is always acceptable for objects
1682	* with that objsize or smaller. Since a smaller objsize always
1683	* generates a larger possible uma_max_ipers, we use this computed
1684	* objsize to calculate the largest ipers possible. Since the
1685	* ipers calculated for OFFPAGE slab headers is always larger than
1686	* the ipers initially calculated in keg_small_init(), we use
1687	* the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
1688	* obtain the maximum ipers possible for offpage slab headers.
1689	*
1690	* It should be noted that ipers versus objsize is an inversly
1691	* proportional function which drops off rather quickly so as
1692	* long as our UMA_MAX_WASTE is such that the objsize we calculate
1693	* falls into the portion of the inverse relation AFTER the steep
1694	* falloff, then uma_max_ipers shouldn't be too high (~10 on i386).
1695	*
1696	* Note that we have 8-bits (1 byte) to use as a freelist index
1697	* inside the actual slab header itself and this is enough to
1698	* accomodate us. In the worst case, a UMA_SMALLEST_UNIT sized
1699	* object with offpage slab header would have ipers =
1700	* UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is
1701	* 1 greater than what our byte-integer freelist index can
1702	* accomodate, but we know that this situation never occurs as
1703	* for UMA_SMALLEST_UNIT-sized objects, we will never calculate
1704	* that we need to go to offpage slab headers. Or, if we do,
1705	* then we trap that condition below and panic in the INVARIANTS case.
1706	*/
1707	wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
1708	totsize = wsize;
1709	objsize = UMA_SMALLEST_UNIT;
1710	while (totsize >= wsize) {
1711	totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) /
1712	(objsize + UMA_FRITM_SZ);
1713	totsize *= (UMA_FRITM_SZ + objsize);
1714	objsize++;
1715	}
1716	if (objsize > UMA_SMALLEST_UNIT)
1717	objsize--;
1718	uma_max_ipers = MAX(UMA_SLAB_SIZE / objsize, 64);
1719
1720	wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
1721	totsize = wsize;
1722	objsize = UMA_SMALLEST_UNIT;
1723	while (totsize >= wsize) {
1724	totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) /
1725	(objsize + UMA_FRITMREF_SZ);
1726	totsize *= (UMA_FRITMREF_SZ + objsize);
1727	objsize++;
1728	}
1729	if (objsize > UMA_SMALLEST_UNIT)
1730	objsize--;
1731	uma_max_ipers_ref = MAX(UMA_SLAB_SIZE / objsize, 64);
1732
1733	KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255),
1734	("uma_startup: calculated uma_max_ipers values too large!"));
1735
1736	#ifdef UMA_DEBUG
1737	printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers);
1738	printf("Calculated uma_max_ipers_ref (for OFFPAGE) is %d\n",
1739	uma_max_ipers_ref);
1740	#endif
1741
1742	/* "manually" create the initial zone */
1743	args.name = "UMA Kegs";
1744	args.size = sizeof(struct uma_keg);
1745	args.ctor = keg_ctor;
1746	args.dtor = keg_dtor;
1747	args.uminit = zero_init;
1748	args.fini = NULL;
1749	args.keg = &masterkeg;
1750	args.align = 32 - 1;
1751	args.flags = UMA_ZFLAG_INTERNAL;
1752	/* The initial zone has no Per cpu queues so it's smaller */
1753	zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1754
1755	#ifndef __rtems__
1756	#ifdef UMA_DEBUG
1757	printf("Filling boot free list.\n");
1758	#endif
1759	for (i = 0; i < boot_pages; i++) {
1760	slab = (uma_slab_t)((u_int8_t )bootmem + (i UMA_SLAB_SIZE));
1761	slab->us_data = (u_int8_t *)slab;
1762	slab->us_flags = UMA_SLAB_BOOT;
1763	LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1764	}
1765	mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
1766	#endif /* __rtems__ */
1767
1768	#ifdef UMA_DEBUG
1769	printf("Creating uma zone headers zone and keg.\n");
1770	#endif
1771	args.name = "UMA Zones";
1772	args.size = sizeof(struct uma_zone) +
1773	(sizeof(struct uma_cache) * (mp_maxid + 1));
1774	args.ctor = zone_ctor;
1775	args.dtor = zone_dtor;
1776	args.uminit = zero_init;
1777	args.fini = NULL;
1778	args.keg = NULL;
1779	args.align = 32 - 1;
1780	args.flags = UMA_ZFLAG_INTERNAL;
1781	/* The initial zone has no Per cpu queues so it's smaller */
1782	zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1783
1784	#ifdef UMA_DEBUG
1785	printf("Initializing pcpu cache locks.\n");
1786	#endif
1787	#ifdef UMA_DEBUG
1788	printf("Creating slab and hash zones.\n");
1789	#endif
1790
1791	/*
1792	* This is the max number of free list items we'll have with
1793	* offpage slabs.
1794	*/
1795	slabsize = uma_max_ipers * UMA_FRITM_SZ;
1796	slabsize += sizeof(struct uma_slab);
1797
1798	/* Now make a zone for slab headers */
1799	slabzone = uma_zcreate("UMA Slabs",
1800	slabsize,
1801	NULL, NULL, NULL, NULL,
1802	UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1803
1804	/*
1805	* We also create a zone for the bigger slabs with reference
1806	* counts in them, to accomodate UMA_ZONE_REFCNT zones.
1807	*/
1808	slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ;
1809	slabsize += sizeof(struct uma_slab_refcnt);
1810	slabrefzone = uma_zcreate("UMA RCntSlabs",
1811	slabsize,
1812	NULL, NULL, NULL, NULL,
1813	UMA_ALIGN_PTR,
1814	UMA_ZFLAG_INTERNAL);
1815
1816	hashzone = uma_zcreate("UMA Hash",
1817	sizeof(struct slabhead ) UMA_HASH_SIZE_INIT,
1818	NULL, NULL, NULL, NULL,
1819	UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1820
1821	bucket_init();
1822
1823	#ifndef __rtems__
1824	booted = UMA_STARTUP;
1825	#endif /* __rtems__ */
1826
1827	#ifdef UMA_DEBUG
1828	printf("UMA startup complete.\n");
1829	#endif
1830	}
1831	#ifdef __rtems__
1832	static void
1833	rtems_bsd_uma_startup(void *unused)
1834	{
1835	(void) unused;
1836
1837	uma_startup(NULL, 0);
1838	}
1839
1840	SYSINIT(rtems_bsd_uma_startup, SI_SUB_VM, SI_ORDER_SECOND,
1841	rtems_bsd_uma_startup, NULL);
1842	#endif /* __rtems__ */
1843
1844	#ifndef __rtems__
1845	/* see uma.h */
1846	void
1847	uma_startup2(void)
1848	{
1849	booted = UMA_STARTUP2;
1850	bucket_enable();
1851	#ifdef UMA_DEBUG
1852	printf("UMA startup2 complete.\n");
1853	#endif
1854	}
1855	#endif /* __rtems__ */
1856
1857	/*
1858	* Initialize our callout handle
1859	*
1860	*/
1861
1862	static void
1863	uma_startup3(void)
1864	{
1865	#ifdef UMA_DEBUG
1866	printf("Starting callout.\n");
1867	#endif
1868	callout_init(&uma_callout, CALLOUT_MPSAFE);
1869	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1870	#ifdef UMA_DEBUG
1871	printf("UMA startup3 complete.\n");
1872	#endif
1873	}
1874
1875	static uma_keg_t
1876	uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1877	int align, u_int32_t flags)
1878	{
1879	struct uma_kctor_args args;
1880
1881	args.size = size;
1882	args.uminit = uminit;
1883	args.fini = fini;
1884	args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
1885	args.flags = flags;
1886	args.zone = zone;
1887	return (zone_alloc_item(kegs, &args, M_WAITOK));
1888	}
1889
1890	/* See uma.h */
1891	void
1892	uma_set_align(int align)
1893	{
1894
1895	if (align != UMA_ALIGN_CACHE)
1896	uma_align_cache = align;
1897	}
1898
1899	/* See uma.h */
1900	uma_zone_t
1901	uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1902	uma_init uminit, uma_fini fini, int align, u_int32_t flags)
1903
1904	{
1905	struct uma_zctor_args args;
1906
1907	/* This stuff is essential for the zone ctor */
1908	args.name = name;
1909	args.size = size;
1910	args.ctor = ctor;
1911	args.dtor = dtor;
1912	args.uminit = uminit;
1913	args.fini = fini;
1914	args.align = align;
1915	args.flags = flags;
1916	args.keg = NULL;
1917
1918	return (zone_alloc_item(zones, &args, M_WAITOK));
1919	}
1920
1921	/* See uma.h */
1922	uma_zone_t
1923	uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1924	uma_init zinit, uma_fini zfini, uma_zone_t master)
1925	{
1926	struct uma_zctor_args args;
1927	uma_keg_t keg;
1928
1929	keg = zone_first_keg(master);
1930	args.name = name;
1931	args.size = keg->uk_size;
1932	args.ctor = ctor;
1933	args.dtor = dtor;
1934	args.uminit = zinit;
1935	args.fini = zfini;
1936	args.align = keg->uk_align;
1937	args.flags = keg->uk_flags \| UMA_ZONE_SECONDARY;
1938	args.keg = keg;
1939
1940	/* XXX Attaches only one keg of potentially many. */
1941	return (zone_alloc_item(zones, &args, M_WAITOK));
1942	}
1943
1944	#ifndef __rtems__
1945	static void
1946	zone_lock_pair(uma_zone_t a, uma_zone_t b)
1947	{
1948	if (a < b) {
1949	ZONE_LOCK(a);
1950	mtx_lock_flags(b->uz_lock, MTX_DUPOK);
1951	} else {
1952	ZONE_LOCK(b);
1953	mtx_lock_flags(a->uz_lock, MTX_DUPOK);
1954	}
1955	}
1956
1957	static void
1958	zone_unlock_pair(uma_zone_t a, uma_zone_t b)
1959	{
1960
1961	ZONE_UNLOCK(a);
1962	ZONE_UNLOCK(b);
1963	}
1964
1965	int
1966	uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
1967	{
1968	uma_klink_t klink;
1969	uma_klink_t kl;
1970	int error;
1971
1972	error = 0;
1973	klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK \| M_ZERO);
1974
1975	zone_lock_pair(zone, master);
1976	/*
1977	* zone must use vtoslab() to resolve objects and must already be
1978	* a secondary.
1979	*/
1980	if ((zone->uz_flags & (UMA_ZONE_VTOSLAB \| UMA_ZONE_SECONDARY))
1981	!= (UMA_ZONE_VTOSLAB \| UMA_ZONE_SECONDARY)) {
1982	error = EINVAL;
1983	goto out;
1984	}
1985	/*
1986	* The new master must also use vtoslab().
1987	*/
1988	if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
1989	error = EINVAL;
1990	goto out;
1991	}
1992	/*
1993	* Both must either be refcnt, or not be refcnt.
1994	*/
1995	if ((zone->uz_flags & UMA_ZONE_REFCNT) !=
1996	(master->uz_flags & UMA_ZONE_REFCNT)) {
1997	error = EINVAL;
1998	goto out;
1999	}
2000	/*
2001	* The underlying object must be the same size. rsize
2002	* may be different.
2003	*/
2004	if (master->uz_size != zone->uz_size) {
2005	error = E2BIG;
2006	goto out;
2007	}
2008	/*
2009	* Put it at the end of the list.
2010	*/
2011	klink->kl_keg = zone_first_keg(master);
2012	LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
2013	if (LIST_NEXT(kl, kl_link) == NULL) {
2014	LIST_INSERT_AFTER(kl, klink, kl_link);
2015	break;
2016	}
2017	}
2018	klink = NULL;
2019	zone->uz_flags \|= UMA_ZFLAG_MULTI;
2020	zone->uz_slab = zone_fetch_slab_multi;
2021
2022	out:
2023	zone_unlock_pair(zone, master);
2024	if (klink != NULL)
2025	free(klink, M_TEMP);
2026
2027	return (error);
2028	}
2029	#endif /* __rtems__ */
2030
2031
2032	/* See uma.h */
2033	void
2034	uma_zdestroy(uma_zone_t zone)
2035	{
2036
2037	zone_free_item(zones, zone, NULL, SKIP_NONE, ZFREE_STATFREE);
2038	}
2039
2040	/* See uma.h */
2041	void *
2042	uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
2043	{
2044	void *item;
2045	uma_cache_t cache;
2046	uma_bucket_t bucket;
2047	int cpu;
2048
2049	/* This is the fast path allocation */
2050	#ifdef UMA_DEBUG_ALLOC_1
2051	printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
2052	#endif
2053	CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
2054	zone->uz_name, flags);
2055
2056	if (flags & M_WAITOK) {
2057	WITNESS_WARN(WARN_GIANTOK \| WARN_SLEEPOK, NULL,
2058	"uma_zalloc_arg: zone \"%s\"", zone->uz_name);
2059	}
2060
2061	/*
2062	* If possible, allocate from the per-CPU cache. There are two
2063	* requirements for safe access to the per-CPU cache: (1) the thread
2064	* accessing the cache must not be preempted or yield during access,
2065	* and (2) the thread must not migrate CPUs without switching which
2066	* cache it accesses. We rely on a critical section to prevent
2067	* preemption and migration. We release the critical section in
2068	* order to acquire the zone mutex if we are unable to allocate from
2069	* the current cache; when we re-acquire the critical section, we
2070	* must detect and handle migration if it has occurred.
2071	*/
2072	zalloc_restart:
2073	critical_enter();
2074	cpu = curcpu;
2075	cache = &zone->uz_cpu[cpu];
2076
2077	zalloc_start:
2078	bucket = cache->uc_allocbucket;
2079
2080	if (bucket) {
2081	if (bucket->ub_cnt > 0) {
2082	bucket->ub_cnt--;
2083	item = bucket->ub_bucket[bucket->ub_cnt];
2084	#ifdef INVARIANTS
2085	bucket->ub_bucket[bucket->ub_cnt] = NULL;
2086	#endif
2087	KASSERT(item != NULL,
2088	("uma_zalloc: Bucket pointer mangled."));
2089	cache->uc_allocs++;
2090	critical_exit();
2091	#ifdef INVARIANTS
2092	ZONE_LOCK(zone);
2093	uma_dbg_alloc(zone, NULL, item);
2094	ZONE_UNLOCK(zone);
2095	#endif
2096	if (zone->uz_ctor != NULL) {
2097	if (zone->uz_ctor(item, zone->uz_size,
2098	udata, flags) != 0) {
2099	zone_free_item(zone, item, udata,
2100	SKIP_DTOR, ZFREE_STATFAIL \|
2101	ZFREE_STATFREE);
2102	return (NULL);
2103	}
2104	}
2105	if (flags & M_ZERO)
2106	bzero(item, zone->uz_size);
2107	return (item);
2108	} else if (cache->uc_freebucket) {
2109	/*
2110	* We have run out of items in our allocbucket.
2111	* See if we can switch with our free bucket.
2112	*/
2113	if (cache->uc_freebucket->ub_cnt > 0) {
2114	#ifdef UMA_DEBUG_ALLOC
2115	printf("uma_zalloc: Swapping empty with"
2116	" alloc.\n");
2117	#endif
2118	bucket = cache->uc_freebucket;
2119	cache->uc_freebucket = cache->uc_allocbucket;
2120	cache->uc_allocbucket = bucket;
2121
2122	goto zalloc_start;
2123	}
2124	}
2125	}
2126	/*
2127	* Attempt to retrieve the item from the per-CPU cache has failed, so
2128	* we must go back to the zone. This requires the zone lock, so we
2129	* must drop the critical section, then re-acquire it when we go back
2130	* to the cache. Since the critical section is released, we may be
2131	* preempted or migrate. As such, make sure not to maintain any
2132	* thread-local state specific to the cache from prior to releasing
2133	* the critical section.
2134	*/
2135	critical_exit();
2136	ZONE_LOCK(zone);
2137	critical_enter();
2138	cpu = curcpu;
2139	cache = &zone->uz_cpu[cpu];
2140	bucket = cache->uc_allocbucket;
2141	if (bucket != NULL) {
2142	if (bucket->ub_cnt > 0) {
2143	ZONE_UNLOCK(zone);
2144	goto zalloc_start;
2145	}
2146	bucket = cache->uc_freebucket;
2147	if (bucket != NULL && bucket->ub_cnt > 0) {
2148	ZONE_UNLOCK(zone);
2149	goto zalloc_start;
2150	}
2151	}
2152
2153	/* Since we have locked the zone we may as well send back our stats */
2154	zone->uz_allocs += cache->uc_allocs;
2155	cache->uc_allocs = 0;
2156	zone->uz_frees += cache->uc_frees;
2157	cache->uc_frees = 0;
2158
2159	/* Our old one is now a free bucket */
2160	if (cache->uc_allocbucket) {
2161	KASSERT(cache->uc_allocbucket->ub_cnt == 0,
2162	("uma_zalloc_arg: Freeing a non free bucket."));
2163	LIST_INSERT_HEAD(&zone->uz_free_bucket,
2164	cache->uc_allocbucket, ub_link);
2165	cache->uc_allocbucket = NULL;
2166	}
2167
2168	/* Check the free list for a new alloc bucket */
2169	if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
2170	KASSERT(bucket->ub_cnt != 0,
2171	("uma_zalloc_arg: Returning an empty bucket."));
2172
2173	LIST_REMOVE(bucket, ub_link);
2174	cache->uc_allocbucket = bucket;
2175	ZONE_UNLOCK(zone);
2176	goto zalloc_start;
2177	}
2178	/* We are no longer associated with this CPU. */
2179	critical_exit();
2180
2181	/* Bump up our uz_count so we get here less */
2182	if (zone->uz_count < BUCKET_MAX)
2183	zone->uz_count++;
2184
2185	/*
2186	* Now lets just fill a bucket and put it on the free list. If that
2187	* works we'll restart the allocation from the begining.
2188	*/
2189	if (zone_alloc_bucket(zone, flags)) {
2190	ZONE_UNLOCK(zone);
2191	goto zalloc_restart;
2192	}
2193	ZONE_UNLOCK(zone);
2194	/*
2195	* We may not be able to get a bucket so return an actual item.
2196	*/
2197	#ifdef UMA_DEBUG
2198	printf("uma_zalloc_arg: Bucketzone returned NULL\n");
2199	#endif
2200
2201	item = zone_alloc_item(zone, udata, flags);
2202	return (item);
2203	}
2204
2205	static uma_slab_t
2206	keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
2207	{
2208	uma_slab_t slab;
2209
2210	mtx_assert(&keg->uk_lock, MA_OWNED);
2211	slab = NULL;
2212
2213	for (;;) {
2214	/*
2215	* Find a slab with some space. Prefer slabs that are partially
2216	* used over those that are totally full. This helps to reduce
2217	* fragmentation.
2218	*/
2219	if (keg->uk_free != 0) {
2220	if (!LIST_EMPTY(&keg->uk_part_slab)) {
2221	slab = LIST_FIRST(&keg->uk_part_slab);
2222	} else {
2223	slab = LIST_FIRST(&keg->uk_free_slab);
2224	LIST_REMOVE(slab, us_link);
2225	LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
2226	us_link);
2227	}
2228	MPASS(slab->us_keg == keg);
2229	return (slab);
2230	}
2231
2232	/*
2233	* M_NOVM means don't ask at all!
2234	*/
2235	if (flags & M_NOVM)
2236	break;
2237
2238	if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
2239	keg->uk_flags \|= UMA_ZFLAG_FULL;
2240	/*
2241	* If this is not a multi-zone, set the FULL bit.
2242	* Otherwise slab_multi() takes care of it.
2243	*/
2244	if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0)
2245	zone->uz_flags \|= UMA_ZFLAG_FULL;
2246	if (flags & M_NOWAIT)
2247	break;
2248	zone->uz_sleeps++;
2249	msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
2250	continue;
2251	}
2252	keg->uk_recurse++;
2253	slab = keg_alloc_slab(keg, zone, flags);
2254	keg->uk_recurse--;
2255	/*
2256	* If we got a slab here it's safe to mark it partially used
2257	* and return. We assume that the caller is going to remove
2258	* at least one item.
2259	*/
2260	if (slab) {
2261	MPASS(slab->us_keg == keg);
2262	LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2263	return (slab);
2264	}
2265	/*
2266	* We might not have been able to get a slab but another cpu
2267	* could have while we were unlocked. Check again before we
2268	* fail.
2269	*/
2270	flags \|= M_NOVM;
2271	}
2272	return (slab);
2273	}
2274
2275	static inline void
2276	zone_relock(uma_zone_t zone, uma_keg_t keg)
2277	{
2278	if (zone->uz_lock != &keg->uk_lock) {
2279	KEG_UNLOCK(keg);
2280	ZONE_LOCK(zone);
2281	}
2282	}
2283
2284	static inline void
2285	keg_relock(uma_keg_t keg, uma_zone_t zone)
2286	{
2287	if (zone->uz_lock != &keg->uk_lock) {
2288	ZONE_UNLOCK(zone);
2289	KEG_LOCK(keg);
2290	}
2291	}
2292
2293	static uma_slab_t
2294	zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
2295	{
2296	uma_slab_t slab;
2297
2298	if (keg == NULL)
2299	keg = zone_first_keg(zone);
2300	/*
2301	* This is to prevent us from recursively trying to allocate
2302	* buckets. The problem is that if an allocation forces us to
2303	* grab a new bucket we will call page_alloc, which will go off
2304	* and cause the vm to allocate vm_map_entries. If we need new
2305	* buckets there too we will recurse in kmem_alloc and bad
2306	* things happen. So instead we return a NULL bucket, and make
2307	* the code that allocates buckets smart enough to deal with it
2308	*/
2309	if (keg->uk_flags & UMA_ZFLAG_BUCKET && keg->uk_recurse != 0)
2310	return (NULL);
2311
2312	for (;;) {
2313	slab = keg_fetch_slab(keg, zone, flags);
2314	if (slab)
2315	return (slab);
2316	if (flags & (M_NOWAIT \| M_NOVM))
2317	break;
2318	}
2319	return (NULL);
2320	}
2321
2322	#ifndef __rtems__
2323	/*
2324	* uma_zone_fetch_slab_multi: Fetches a slab from one available keg. Returns
2325	* with the keg locked. Caller must call zone_relock() afterwards if the
2326	* zone lock is required. On NULL the zone lock is held.
2327	*
2328	* The last pointer is used to seed the search. It is not required.
2329	*/
2330	static uma_slab_t
2331	zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
2332	{
2333	uma_klink_t klink;
2334	uma_slab_t slab;
2335	uma_keg_t keg;
2336	int flags;
2337	int empty;
2338	int full;
2339
2340	/*
2341	* Don't wait on the first pass. This will skip limit tests
2342	* as well. We don't want to block if we can find a provider
2343	* without blocking.
2344	*/
2345	flags = (rflags & ~M_WAITOK) \| M_NOWAIT;
2346	/*
2347	* Use the last slab allocated as a hint for where to start
2348	* the search.
2349	*/
2350	if (last) {
2351	slab = keg_fetch_slab(last, zone, flags);
2352	if (slab)
2353	return (slab);
2354	zone_relock(zone, last);
2355	last = NULL;
2356	}
2357	/*
2358	* Loop until we have a slab incase of transient failures
2359	* while M_WAITOK is specified. I'm not sure this is 100%
2360	* required but we've done it for so long now.
2361	*/
2362	for (;;) {
2363	empty = 0;
2364	full = 0;
2365	/*
2366	* Search the available kegs for slabs. Be careful to hold the
2367	* correct lock while calling into the keg layer.
2368	*/
2369	LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
2370	keg = klink->kl_keg;
2371	keg_relock(keg, zone);
2372	if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
2373	slab = keg_fetch_slab(keg, zone, flags);
2374	if (slab)
2375	return (slab);
2376	}
2377	if (keg->uk_flags & UMA_ZFLAG_FULL)
2378	full++;
2379	else
2380	empty++;
2381	zone_relock(zone, keg);
2382	}
2383	if (rflags & (M_NOWAIT \| M_NOVM))
2384	break;
2385	flags = rflags;
2386	/*
2387	* All kegs are full. XXX We can't atomically check all kegs
2388	* and sleep so just sleep for a short period and retry.
2389	*/
2390	if (full && !empty) {
2391	zone->uz_flags \|= UMA_ZFLAG_FULL;
2392	zone->uz_sleeps++;
2393	msleep(zone, zone->uz_lock, PVM, "zonelimit", hz/100);
2394	zone->uz_flags &= ~UMA_ZFLAG_FULL;
2395	continue;
2396	}
2397	}
2398	return (NULL);
2399	}
2400	#endif /* __rtems__ */
2401
2402	static void *
2403	slab_alloc_item(uma_zone_t zone, uma_slab_t slab)
2404	{
2405	uma_keg_t keg;
2406	uma_slabrefcnt_t slabref;
2407	void *item;
2408	u_int8_t freei;
2409
2410	keg = slab->us_keg;
2411	mtx_assert(&keg->uk_lock, MA_OWNED);
2412
2413	freei = slab->us_firstfree;
2414	if (keg->uk_flags & UMA_ZONE_REFCNT) {
2415	slabref = (uma_slabrefcnt_t)slab;
2416	slab->us_firstfree = slabref->us_freelist[freei].us_item;
2417	} else {
2418	slab->us_firstfree = slab->us_freelist[freei].us_item;
2419	}
2420	item = slab->us_data + (keg->uk_rsize * freei);
2421
2422	slab->us_freecount--;
2423	keg->uk_free--;
2424	#ifdef INVARIANTS
2425	uma_dbg_alloc(zone, slab, item);
2426	#endif
2427	/* Move this slab to the full list */
2428	if (slab->us_freecount == 0) {
2429	LIST_REMOVE(slab, us_link);
2430	LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2431	}
2432
2433	return (item);
2434	}
2435
2436	static int
2437	zone_alloc_bucket(uma_zone_t zone, int flags)
2438	{
2439	uma_bucket_t bucket;
2440	uma_slab_t slab;
2441	uma_keg_t keg;
2442	int16_t saved;
2443	int max, origflags = flags;
2444
2445	/*
2446	* Try this zone's free list first so we don't allocate extra buckets.
2447	*/
2448	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2449	KASSERT(bucket->ub_cnt == 0,
2450	("zone_alloc_bucket: Bucket on free list is not empty."));
2451	LIST_REMOVE(bucket, ub_link);
2452	} else {
2453	int bflags;
2454
2455	bflags = (flags & ~M_ZERO);
2456	if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
2457	bflags \|= M_NOVM;
2458
2459	ZONE_UNLOCK(zone);
2460	bucket = bucket_alloc(zone->uz_count, bflags);
2461	ZONE_LOCK(zone);
2462	}
2463
2464	if (bucket == NULL) {
2465	return (0);
2466	}
2467
2468	#ifdef SMP
2469	/*
2470	* This code is here to limit the number of simultaneous bucket fills
2471	* for any given zone to the number of per cpu caches in this zone. This
2472	* is done so that we don't allocate more memory than we really need.
2473	*/
2474	if (zone->uz_fills >= mp_ncpus)
2475	goto done;
2476
2477	#endif
2478	zone->uz_fills++;
2479
2480	max = MIN(bucket->ub_entries, zone->uz_count);
2481	/* Try to keep the buckets totally full */
2482	saved = bucket->ub_cnt;
2483	slab = NULL;
2484	keg = NULL;
2485	while (bucket->ub_cnt < max &&
2486	(slab = zone->uz_slab(zone, keg, flags)) != NULL) {
2487	keg = slab->us_keg;
2488	while (slab->us_freecount && bucket->ub_cnt < max) {
2489	bucket->ub_bucket[bucket->ub_cnt++] =
2490	slab_alloc_item(zone, slab);
2491	}
2492
2493	/* Don't block on the next fill */
2494	flags \|= M_NOWAIT;
2495	}
2496	if (slab)
2497	zone_relock(zone, keg);
2498
2499	/*
2500	* We unlock here because we need to call the zone's init.
2501	* It should be safe to unlock because the slab dealt with
2502	* above is already on the appropriate list within the keg
2503	* and the bucket we filled is not yet on any list, so we
2504	* own it.
2505	*/
2506	if (zone->uz_init != NULL) {
2507	int i;
2508
2509	ZONE_UNLOCK(zone);
2510	for (i = saved; i < bucket->ub_cnt; i++)
2511	if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
2512	origflags) != 0)
2513	break;
2514	/*
2515	* If we couldn't initialize the whole bucket, put the
2516	* rest back onto the freelist.
2517	*/
2518	if (i != bucket->ub_cnt) {
2519	int j;
2520
2521	for (j = i; j < bucket->ub_cnt; j++) {
2522	zone_free_item(zone, bucket->ub_bucket[j],
2523	NULL, SKIP_FINI, 0);
2524	#ifdef INVARIANTS
2525	bucket->ub_bucket[j] = NULL;
2526	#endif
2527	}
2528	bucket->ub_cnt = i;
2529	}
2530	ZONE_LOCK(zone);
2531	}
2532
2533	zone->uz_fills--;
2534	if (bucket->ub_cnt != 0) {
2535	LIST_INSERT_HEAD(&zone->uz_full_bucket,
2536	bucket, ub_link);
2537	return (1);
2538	}
2539	#ifdef SMP
2540	done:
2541	#endif
2542	bucket_free(bucket);
2543
2544	return (0);
2545	}
2546	/*
2547	* Allocates an item for an internal zone
2548	*
2549	* Arguments
2550	* zone The zone to alloc for.
2551	* udata The data to be passed to the constructor.
2552	* flags M_WAITOK, M_NOWAIT, M_ZERO.
2553	*
2554	* Returns
2555	* NULL if there is no memory and M_NOWAIT is set
2556	* An item if successful
2557	*/
2558
2559	static void *
2560	zone_alloc_item(uma_zone_t zone, void *udata, int flags)
2561	{
2562	uma_slab_t slab;
2563	void *item;
2564
2565	item = NULL;
2566
2567	#ifdef UMA_DEBUG_ALLOC
2568	printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2569	#endif
2570	ZONE_LOCK(zone);
2571
2572	slab = zone->uz_slab(zone, NULL, flags);
2573	if (slab == NULL) {
2574	zone->uz_fails++;
2575	ZONE_UNLOCK(zone);
2576	return (NULL);
2577	}
2578
2579	item = slab_alloc_item(zone, slab);
2580
2581	zone_relock(zone, slab->us_keg);
2582	zone->uz_allocs++;
2583	ZONE_UNLOCK(zone);
2584
2585	/*
2586	* We have to call both the zone's init (not the keg's init)
2587	* and the zone's ctor. This is because the item is going from
2588	* a keg slab directly to the user, and the user is expecting it
2589	* to be both zone-init'd as well as zone-ctor'd.
2590	*/
2591	if (zone->uz_init != NULL) {
2592	if (zone->uz_init(item, zone->uz_size, flags) != 0) {
2593	zone_free_item(zone, item, udata, SKIP_FINI,
2594	ZFREE_STATFAIL \| ZFREE_STATFREE);
2595	return (NULL);
2596	}
2597	}
2598	if (zone->uz_ctor != NULL) {
2599	if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2600	zone_free_item(zone, item, udata, SKIP_DTOR,
2601	ZFREE_STATFAIL \| ZFREE_STATFREE);
2602	return (NULL);
2603	}
2604	}
2605	if (flags & M_ZERO)
2606	bzero(item, zone->uz_size);
2607
2608	return (item);
2609	}
2610
2611	/* See uma.h */
2612	void
2613	uma_zfree_arg(uma_zone_t zone, void item, void udata)
2614	{
2615	uma_cache_t cache;
2616	uma_bucket_t bucket;
2617	int bflags;
2618	int cpu;
2619
2620	#ifdef UMA_DEBUG_ALLOC_1
2621	printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2622	#endif
2623	CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2624	zone->uz_name);
2625
2626	/* uma_zfree(..., NULL) does nothing, to match free(9). */
2627	if (item == NULL)
2628	return;
2629
2630	if (zone->uz_dtor)
2631	zone->uz_dtor(item, zone->uz_size, udata);
2632
2633	#ifdef INVARIANTS
2634	ZONE_LOCK(zone);
2635	if (zone->uz_flags & UMA_ZONE_MALLOC)
2636	uma_dbg_free(zone, udata, item);
2637	else
2638	uma_dbg_free(zone, NULL, item);
2639	ZONE_UNLOCK(zone);
2640	#endif
2641	/*
2642	* The race here is acceptable. If we miss it we'll just have to wait
2643	* a little longer for the limits to be reset.
2644	*/
2645	if (zone->uz_flags & UMA_ZFLAG_FULL)
2646	goto zfree_internal;
2647
2648	/*
2649	* If possible, free to the per-CPU cache. There are two
2650	* requirements for safe access to the per-CPU cache: (1) the thread
2651	* accessing the cache must not be preempted or yield during access,
2652	* and (2) the thread must not migrate CPUs without switching which
2653	* cache it accesses. We rely on a critical section to prevent
2654	* preemption and migration. We release the critical section in
2655	* order to acquire the zone mutex if we are unable to free to the
2656	* current cache; when we re-acquire the critical section, we must
2657	* detect and handle migration if it has occurred.
2658	*/
2659	zfree_restart:
2660	critical_enter();
2661	cpu = curcpu;
2662	cache = &zone->uz_cpu[cpu];
2663
2664	zfree_start:
2665	bucket = cache->uc_freebucket;
2666
2667	if (bucket) {
2668	/*
2669	* Do we have room in our bucket? It is OK for this uz count
2670	* check to be slightly out of sync.
2671	*/
2672
2673	if (bucket->ub_cnt < bucket->ub_entries) {
2674	KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2675	("uma_zfree: Freeing to non free bucket index."));
2676	bucket->ub_bucket[bucket->ub_cnt] = item;
2677	bucket->ub_cnt++;
2678	cache->uc_frees++;
2679	critical_exit();
2680	return;
2681	} else if (cache->uc_allocbucket) {
2682	#ifdef UMA_DEBUG_ALLOC
2683	printf("uma_zfree: Swapping buckets.\n");
2684	#endif
2685	/*
2686	* We have run out of space in our freebucket.
2687	* See if we can switch with our alloc bucket.
2688	*/
2689	if (cache->uc_allocbucket->ub_cnt <
2690	cache->uc_freebucket->ub_cnt) {
2691	bucket = cache->uc_freebucket;
2692	cache->uc_freebucket = cache->uc_allocbucket;
2693	cache->uc_allocbucket = bucket;
2694	goto zfree_start;
2695	}
2696	}
2697	}
2698	/*
2699	* We can get here for two reasons:
2700	*
2701	* 1) The buckets are NULL
2702	* 2) The alloc and free buckets are both somewhat full.
2703	*
2704	* We must go back the zone, which requires acquiring the zone lock,
2705	* which in turn means we must release and re-acquire the critical
2706	* section. Since the critical section is released, we may be
2707	* preempted or migrate. As such, make sure not to maintain any
2708	* thread-local state specific to the cache from prior to releasing
2709	* the critical section.
2710	*/
2711	critical_exit();
2712	ZONE_LOCK(zone);
2713	critical_enter();
2714	cpu = curcpu;
2715	cache = &zone->uz_cpu[cpu];
2716	if (cache->uc_freebucket != NULL) {
2717	if (cache->uc_freebucket->ub_cnt <
2718	cache->uc_freebucket->ub_entries) {
2719	ZONE_UNLOCK(zone);
2720	goto zfree_start;
2721	}
2722	if (cache->uc_allocbucket != NULL &&
2723	(cache->uc_allocbucket->ub_cnt <
2724	cache->uc_freebucket->ub_cnt)) {
2725	ZONE_UNLOCK(zone);
2726	goto zfree_start;
2727	}
2728	}
2729
2730	/* Since we have locked the zone we may as well send back our stats */
2731	zone->uz_allocs += cache->uc_allocs;
2732	cache->uc_allocs = 0;
2733	zone->uz_frees += cache->uc_frees;
2734	cache->uc_frees = 0;
2735
2736	bucket = cache->uc_freebucket;
2737	cache->uc_freebucket = NULL;
2738
2739	/* Can we throw this on the zone full list? */
2740	if (bucket != NULL) {
2741	#ifdef UMA_DEBUG_ALLOC
2742	printf("uma_zfree: Putting old bucket on the free list.\n");
2743	#endif
2744	/* ub_cnt is pointing to the last free item */
2745	KASSERT(bucket->ub_cnt != 0,
2746	("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2747	LIST_INSERT_HEAD(&zone->uz_full_bucket,
2748	bucket, ub_link);
2749	}
2750	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2751	LIST_REMOVE(bucket, ub_link);
2752	ZONE_UNLOCK(zone);
2753	cache->uc_freebucket = bucket;
2754	goto zfree_start;
2755	}
2756	/* We are no longer associated with this CPU. */
2757	critical_exit();
2758
2759	/* And the zone.. */
2760	ZONE_UNLOCK(zone);
2761
2762	#ifdef UMA_DEBUG_ALLOC
2763	printf("uma_zfree: Allocating new free bucket.\n");
2764	#endif
2765	bflags = M_NOWAIT;
2766
2767	if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
2768	bflags \|= M_NOVM;
2769	bucket = bucket_alloc(zone->uz_count, bflags);
2770	if (bucket) {
2771	ZONE_LOCK(zone);
2772	LIST_INSERT_HEAD(&zone->uz_free_bucket,
2773	bucket, ub_link);
2774	ZONE_UNLOCK(zone);
2775	goto zfree_restart;
2776	}
2777
2778	/*
2779	* If nothing else caught this, we'll just do an internal free.
2780	*/
2781	zfree_internal:
2782	zone_free_item(zone, item, udata, SKIP_DTOR, ZFREE_STATFREE);
2783
2784	return;
2785	}
2786
2787	/*
2788	* Frees an item to an INTERNAL zone or allocates a free bucket
2789	*
2790	* Arguments:
2791	* zone The zone to free to
2792	* item The item we're freeing
2793	* udata User supplied data for the dtor
2794	* skip Skip dtors and finis
2795	*/
2796	static void
2797	zone_free_item(uma_zone_t zone, void item, void udata,
2798	enum zfreeskip skip, int flags)
2799	{
2800	uma_slab_t slab;
2801	uma_slabrefcnt_t slabref;
2802	uma_keg_t keg;
2803	u_int8_t *mem;
2804	u_int8_t freei;
2805	int clearfull;
2806
2807	if (skip < SKIP_DTOR && zone->uz_dtor)
2808	zone->uz_dtor(item, zone->uz_size, udata);
2809
2810	if (skip < SKIP_FINI && zone->uz_fini)
2811	zone->uz_fini(item, zone->uz_size);
2812
2813	ZONE_LOCK(zone);
2814
2815	if (flags & ZFREE_STATFAIL)
2816	zone->uz_fails++;
2817	if (flags & ZFREE_STATFREE)
2818	zone->uz_frees++;
2819
2820	if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
2821	mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
2822	keg = zone_first_keg(zone); /* Must only be one. */
2823	if (zone->uz_flags & UMA_ZONE_HASH) {
2824	slab = hash_sfind(&keg->uk_hash, mem);
2825	} else {
2826	mem += keg->uk_pgoff;
2827	slab = (uma_slab_t)mem;
2828	}
2829	} else {
2830	/* This prevents redundant lookups via free(). */
2831	if ((zone->uz_flags & UMA_ZONE_MALLOC) && udata != NULL)
2832	slab = (uma_slab_t)udata;
2833	else
2834	slab = vtoslab((vm_offset_t)item);
2835	keg = slab->us_keg;
2836	keg_relock(keg, zone);
2837	}
2838	MPASS(keg == slab->us_keg);
2839
2840	/* Do we need to remove from any lists? */
2841	if (slab->us_freecount+1 == keg->uk_ipers) {
2842	LIST_REMOVE(slab, us_link);
2843	LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2844	} else if (slab->us_freecount == 0) {
2845	LIST_REMOVE(slab, us_link);
2846	LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2847	}
2848
2849	/* Slab management stuff */
2850	freei = ((unsigned long)item - (unsigned long)slab->us_data)
2851	/ keg->uk_rsize;
2852
2853	#ifdef INVARIANTS
2854	if (!skip)
2855	uma_dbg_free(zone, slab, item);
2856	#endif
2857
2858	if (keg->uk_flags & UMA_ZONE_REFCNT) {
2859	slabref = (uma_slabrefcnt_t)slab;
2860	slabref->us_freelist[freei].us_item = slab->us_firstfree;
2861	} else {
2862	slab->us_freelist[freei].us_item = slab->us_firstfree;
2863	}
2864	slab->us_firstfree = freei;
2865	slab->us_freecount++;
2866
2867	/* Zone statistics */
2868	keg->uk_free++;
2869
2870	clearfull = 0;
2871	if (keg->uk_flags & UMA_ZFLAG_FULL) {
2872	if (keg->uk_pages < keg->uk_maxpages) {
2873	keg->uk_flags &= ~UMA_ZFLAG_FULL;
2874	clearfull = 1;
2875	}
2876
2877	/*
2878	* We can handle one more allocation. Since we're clearing ZFLAG_FULL,
2879	* wake up all procs blocked on pages. This should be uncommon, so
2880	* keeping this simple for now (rather than adding count of blocked
2881	* threads etc).
2882	*/
2883	wakeup(keg);
2884	}
2885	if (clearfull) {
2886	zone_relock(zone, keg);
2887	zone->uz_flags &= ~UMA_ZFLAG_FULL;
2888	wakeup(zone);
2889	ZONE_UNLOCK(zone);
2890	} else
2891	KEG_UNLOCK(keg);
2892	}
2893
2894	/* See uma.h */
2895	int
2896	uma_zone_set_max(uma_zone_t zone, int nitems)
2897	{
2898	uma_keg_t keg;
2899
2900	ZONE_LOCK(zone);
2901	keg = zone_first_keg(zone);
2902	keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
2903	if (keg->uk_maxpages * keg->uk_ipers < nitems)
2904	keg->uk_maxpages += keg->uk_ppera;
2905	nitems = keg->uk_maxpages * keg->uk_ipers;
2906	ZONE_UNLOCK(zone);
2907
2908	return (nitems);
2909	}
2910
2911	/* See uma.h */
2912	int
2913	uma_zone_get_max(uma_zone_t zone)
2914	{
2915	int nitems;
2916	uma_keg_t keg;
2917
2918	ZONE_LOCK(zone);
2919	keg = zone_first_keg(zone);
2920	nitems = keg->uk_maxpages * keg->uk_ipers;
2921	ZONE_UNLOCK(zone);
2922
2923	return (nitems);
2924	}
2925
2926	/* See uma.h */
2927	int
2928	uma_zone_get_cur(uma_zone_t zone)
2929	{
2930	int64_t nitems;
2931	u_int i;
2932
2933	ZONE_LOCK(zone);
2934	nitems = zone->uz_allocs - zone->uz_frees;
2935	CPU_FOREACH(i) {
2936	/*
2937	* See the comment in sysctl_vm_zone_stats() regarding the
2938	* safety of accessing the per-cpu caches. With the zone lock
2939	* held, it is safe, but can potentially result in stale data.
2940	*/
2941	nitems += zone->uz_cpu[i].uc_allocs -
2942	zone->uz_cpu[i].uc_frees;
2943	}
2944	ZONE_UNLOCK(zone);
2945
2946	return (nitems < 0 ? 0 : nitems);
2947	}
2948
2949	/* See uma.h */
2950	void
2951	uma_zone_set_init(uma_zone_t zone, uma_init uminit)
2952	{
2953	uma_keg_t keg;
2954
2955	ZONE_LOCK(zone);
2956	keg = zone_first_keg(zone);
2957	KASSERT(keg->uk_pages == 0,
2958	("uma_zone_set_init on non-empty keg"));
2959	keg->uk_init = uminit;
2960	ZONE_UNLOCK(zone);
2961	}
2962
2963	/* See uma.h */
2964	void
2965	uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
2966	{
2967	uma_keg_t keg;
2968
2969	ZONE_LOCK(zone);
2970	keg = zone_first_keg(zone);
2971	KASSERT(keg->uk_pages == 0,
2972	("uma_zone_set_fini on non-empty keg"));
2973	keg->uk_fini = fini;
2974	ZONE_UNLOCK(zone);
2975	}
2976
2977	/* See uma.h */
2978	void
2979	uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
2980	{
2981	ZONE_LOCK(zone);
2982	KASSERT(zone_first_keg(zone)->uk_pages == 0,
2983	("uma_zone_set_zinit on non-empty keg"));
2984	zone->uz_init = zinit;
2985	ZONE_UNLOCK(zone);
2986	}
2987
2988	/* See uma.h */
2989	void
2990	uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
2991	{
2992	ZONE_LOCK(zone);
2993	KASSERT(zone_first_keg(zone)->uk_pages == 0,
2994	("uma_zone_set_zfini on non-empty keg"));
2995	zone->uz_fini = zfini;
2996	ZONE_UNLOCK(zone);
2997	}
2998
2999	/* See uma.h */
3000	/* XXX uk_freef is not actually used with the zone locked */
3001	void
3002	uma_zone_set_freef(uma_zone_t zone, uma_free freef)
3003	{
3004
3005	ZONE_LOCK(zone);
3006	zone_first_keg(zone)->uk_freef = freef;
3007	ZONE_UNLOCK(zone);
3008	}
3009
3010	/* See uma.h */
3011	/* XXX uk_allocf is not actually used with the zone locked */
3012	void
3013	uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
3014	{
3015	uma_keg_t keg;
3016
3017	ZONE_LOCK(zone);
3018	keg = zone_first_keg(zone);
3019	keg->uk_flags \|= UMA_ZFLAG_PRIVALLOC;
3020	keg->uk_allocf = allocf;
3021	ZONE_UNLOCK(zone);
3022	}
3023
3024	#ifndef __rtems__
3025	/* See uma.h */
3026	int
3027	uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
3028	{
3029	uma_keg_t keg;
3030	vm_offset_t kva;
3031	int pages;
3032
3033	keg = zone_first_keg(zone);
3034	pages = count / keg->uk_ipers;
3035
3036	if (pages * keg->uk_ipers < count)
3037	pages++;
3038
3039	kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE);
3040
3041	if (kva == 0)
3042	return (0);
3043	if (obj == NULL)
3044	obj = vm_object_allocate(OBJT_PHYS, pages);
3045	else {
3046	VM_OBJECT_LOCK_INIT(obj, "uma object");
3047	_vm_object_allocate(OBJT_PHYS, pages, obj);
3048	}
3049	ZONE_LOCK(zone);
3050	keg->uk_kva = kva;
3051	keg->uk_obj = obj;
3052	keg->uk_maxpages = pages;
3053	keg->uk_allocf = obj_alloc;
3054	keg->uk_flags \|= UMA_ZONE_NOFREE \| UMA_ZFLAG_PRIVALLOC;
3055	ZONE_UNLOCK(zone);
3056	return (1);
3057	}
3058	#endif /* __rtems__ */
3059
3060	/* See uma.h */
3061	void
3062	uma_prealloc(uma_zone_t zone, int items)
3063	{
3064	int slabs;
3065	uma_slab_t slab;
3066	uma_keg_t keg;
3067
3068	keg = zone_first_keg(zone);
3069	ZONE_LOCK(zone);
3070	slabs = items / keg->uk_ipers;
3071	if (slabs * keg->uk_ipers < items)
3072	slabs++;
3073	while (slabs > 0) {
3074	slab = keg_alloc_slab(keg, zone, M_WAITOK);
3075	if (slab == NULL)
3076	break;
3077	MPASS(slab->us_keg == keg);
3078	LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
3079	slabs--;
3080	}
3081	ZONE_UNLOCK(zone);
3082	}
3083
3084	/* See uma.h */
3085	u_int32_t *
3086	uma_find_refcnt(uma_zone_t zone, void *item)
3087	{
3088	uma_slabrefcnt_t slabref;
3089	uma_keg_t keg;
3090	u_int32_t *refcnt;
3091	int idx;
3092
3093	slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item &
3094	(~UMA_SLAB_MASK));
3095	keg = slabref->us_keg;
3096	KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT,
3097	("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
3098	idx = ((unsigned long)item - (unsigned long)slabref->us_data)
3099	/ keg->uk_rsize;
3100	refcnt = &slabref->us_freelist[idx].us_refcnt;
3101	return refcnt;
3102	}
3103
3104	/* See uma.h */
3105	void
3106	uma_reclaim(void)
3107	{
3108	#ifdef UMA_DEBUG
3109	printf("UMA: vm asked us to release pages!\n");
3110	#endif
3111	bucket_enable();
3112	zone_foreach(zone_drain);
3113	/*
3114	* Some slabs may have been freed but this zone will be visited early
3115	* we visit again so that we can free pages that are empty once other
3116	* zones are drained. We have to do the same for buckets.
3117	*/
3118	zone_drain(slabzone);
3119	zone_drain(slabrefzone);
3120	bucket_zone_drain();
3121	}
3122
3123	/* See uma.h */
3124	int
3125	uma_zone_exhausted(uma_zone_t zone)
3126	{
3127	int full;
3128
3129	ZONE_LOCK(zone);
3130	full = (zone->uz_flags & UMA_ZFLAG_FULL);
3131	ZONE_UNLOCK(zone);
3132	return (full);
3133	}
3134
3135	int
3136	uma_zone_exhausted_nolock(uma_zone_t zone)
3137	{
3138	return (zone->uz_flags & UMA_ZFLAG_FULL);
3139	}
3140
3141	#ifndef __rtems__
3142	void *
3143	uma_large_malloc(int size, int wait)
3144	{
3145	void *mem;
3146	uma_slab_t slab;
3147	u_int8_t flags;
3148
3149	slab = zone_alloc_item(slabzone, NULL, wait);
3150	if (slab == NULL)
3151	return (NULL);
3152	mem = page_alloc(NULL, size, &flags, wait);
3153	if (mem) {
3154	vsetslab((vm_offset_t)mem, slab);
3155	slab->us_data = mem;
3156	slab->us_flags = flags \| UMA_SLAB_MALLOC;
3157	slab->us_size = size;
3158	} else {
3159	zone_free_item(slabzone, slab, NULL, SKIP_NONE,
3160	ZFREE_STATFAIL \| ZFREE_STATFREE);
3161	}
3162
3163	return (mem);
3164	}
3165
3166	void
3167	uma_large_free(uma_slab_t slab)
3168	{
3169	vsetobj((vm_offset_t)slab->us_data, kmem_object);
3170	page_free(slab->us_data, slab->us_size, slab->us_flags);
3171	zone_free_item(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE);
3172	}
3173	#endif /* __rtems__ */
3174
3175	void
3176	uma_print_stats(void)
3177	{
3178	zone_foreach(uma_print_zone);
3179	}
3180
3181	static void
3182	slab_print(uma_slab_t slab)
3183	{
3184	printf("slab: keg %p, data %p, freecount %d, firstfree %d\n",
3185	slab->us_keg, slab->us_data, slab->us_freecount,
3186	slab->us_firstfree);
3187	}
3188
3189	static void
3190	cache_print(uma_cache_t cache)
3191	{
3192	printf("alloc: %p(%d), free: %p(%d)\n",
3193	cache->uc_allocbucket,
3194	cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
3195	cache->uc_freebucket,
3196	cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
3197	}
3198
3199	static void
3200	uma_print_keg(uma_keg_t keg)
3201	{
3202	uma_slab_t slab;
3203
3204	printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
3205	"out %d free %d limit %d\n",
3206	keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
3207	keg->uk_ipers, keg->uk_ppera,
3208	(keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free,
3209	(keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
3210	printf("Part slabs:\n");
3211	LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
3212	slab_print(slab);
3213	printf("Free slabs:\n");
3214	LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
3215	slab_print(slab);
3216	printf("Full slabs:\n");
3217	LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
3218	slab_print(slab);
3219	}
3220
3221	void
3222	uma_print_zone(uma_zone_t zone)
3223	{
3224	uma_cache_t cache;
3225	uma_klink_t kl;
3226	int i;
3227
3228	printf("zone: %s(%p) size %d flags %#x\n",
3229	zone->uz_name, zone, zone->uz_size, zone->uz_flags);
3230	LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
3231	uma_print_keg(kl->kl_keg);
3232	CPU_FOREACH(i) {
3233	cache = &zone->uz_cpu[i];
3234	printf("CPU %d Cache:\n", i);
3235	cache_print(cache);
3236	}
3237	}
3238
3239	#ifndef __rtems__
3240	#ifdef DDB
3241	/*
3242	* Generate statistics across both the zone and its per-cpu cache's. Return
3243	* desired statistics if the pointer is non-NULL for that statistic.
3244	*
3245	* Note: does not update the zone statistics, as it can't safely clear the
3246	* per-CPU cache statistic.
3247	*
3248	* XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
3249	* safe from off-CPU; we should modify the caches to track this information
3250	* directly so that we don't have to.
3251	*/
3252	static void
3253	uma_zone_sumstat(uma_zone_t z, int cachefreep, u_int64_t allocsp,
3254	u_int64_t freesp, u_int64_t sleepsp)
3255	{
3256	uma_cache_t cache;
3257	u_int64_t allocs, frees, sleeps;
3258	int cachefree, cpu;
3259
3260	allocs = frees = sleeps = 0;
3261	cachefree = 0;
3262	CPU_FOREACH(cpu) {
3263	cache = &z->uz_cpu[cpu];
3264	if (cache->uc_allocbucket != NULL)
3265	cachefree += cache->uc_allocbucket->ub_cnt;
3266	if (cache->uc_freebucket != NULL)
3267	cachefree += cache->uc_freebucket->ub_cnt;
3268	allocs += cache->uc_allocs;
3269	frees += cache->uc_frees;
3270	}
3271	allocs += z->uz_allocs;
3272	frees += z->uz_frees;
3273	sleeps += z->uz_sleeps;
3274	if (cachefreep != NULL)
3275	*cachefreep = cachefree;
3276	if (allocsp != NULL)
3277	*allocsp = allocs;
3278	if (freesp != NULL)
3279	*freesp = frees;
3280	if (sleepsp != NULL)
3281	*sleepsp = sleeps;
3282	}
3283	#endif /* DDB */
3284	#endif /* __rtems__ */
3285
3286	static int
3287	sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
3288	{
3289	uma_keg_t kz;
3290	uma_zone_t z;
3291	int count;
3292
3293	count = 0;
3294	mtx_lock(&uma_mtx);
3295	LIST_FOREACH(kz, &uma_kegs, uk_link) {
3296	LIST_FOREACH(z, &kz->uk_zones, uz_link)
3297	count++;
3298	}
3299	mtx_unlock(&uma_mtx);
3300	return (sysctl_handle_int(oidp, &count, 0, req));
3301	}
3302
3303	static int
3304	sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
3305	{
3306	struct uma_stream_header ush;
3307	struct uma_type_header uth;
3308	struct uma_percpu_stat ups;
3309	uma_bucket_t bucket;
3310	struct sbuf sbuf;
3311	uma_cache_t cache;
3312	uma_klink_t kl;
3313	uma_keg_t kz;
3314	uma_zone_t z;
3315	uma_keg_t k;
3316	int count, error, i;
3317
3318	error = sysctl_wire_old_buffer(req, 0);
3319	if (error != 0)
3320	return (error);
3321	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
3322
3323	count = 0;
3324	mtx_lock(&uma_mtx);
3325	LIST_FOREACH(kz, &uma_kegs, uk_link) {
3326	LIST_FOREACH(z, &kz->uk_zones, uz_link)
3327	count++;
3328	}
3329
3330	/*
3331	* Insert stream header.
3332	*/
3333	bzero(&ush, sizeof(ush));
3334	ush.ush_version = UMA_STREAM_VERSION;
3335	ush.ush_maxcpus = (mp_maxid + 1);
3336	ush.ush_count = count;
3337	(void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
3338
3339	LIST_FOREACH(kz, &uma_kegs, uk_link) {
3340	LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3341	bzero(&uth, sizeof(uth));
3342	ZONE_LOCK(z);
3343	strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
3344	uth.uth_align = kz->uk_align;
3345	uth.uth_size = kz->uk_size;
3346	uth.uth_rsize = kz->uk_rsize;
3347	LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
3348	k = kl->kl_keg;
3349	uth.uth_maxpages += k->uk_maxpages;
3350	uth.uth_pages += k->uk_pages;
3351	uth.uth_keg_free += k->uk_free;
3352	uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
3353	* k->uk_ipers;
3354	}
3355
3356	/*
3357	* A zone is secondary is it is not the first entry
3358	* on the keg's zone list.
3359	*/
3360	if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
3361	(LIST_FIRST(&kz->uk_zones) != z))
3362	uth.uth_zone_flags = UTH_ZONE_SECONDARY;
3363
3364	LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
3365	uth.uth_zone_free += bucket->ub_cnt;
3366	uth.uth_allocs = z->uz_allocs;
3367	uth.uth_frees = z->uz_frees;
3368	uth.uth_fails = z->uz_fails;
3369	uth.uth_sleeps = z->uz_sleeps;
3370	(void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
3371	/*
3372	* While it is not normally safe to access the cache
3373	* bucket pointers while not on the CPU that owns the
3374	* cache, we only allow the pointers to be exchanged
3375	* without the zone lock held, not invalidated, so
3376	* accept the possible race associated with bucket
3377	* exchange during monitoring.
3378	*/
3379	for (i = 0; i < (mp_maxid + 1); i++) {
3380	bzero(&ups, sizeof(ups));
3381	if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
3382	goto skip;
3383	if (CPU_ABSENT(i))
3384	goto skip;
3385	cache = &z->uz_cpu[i];
3386	if (cache->uc_allocbucket != NULL)
3387	ups.ups_cache_free +=
3388	cache->uc_allocbucket->ub_cnt;
3389	if (cache->uc_freebucket != NULL)
3390	ups.ups_cache_free +=
3391	cache->uc_freebucket->ub_cnt;
3392	ups.ups_allocs = cache->uc_allocs;
3393	ups.ups_frees = cache->uc_frees;
3394	skip:
3395	(void)sbuf_bcat(&sbuf, &ups, sizeof(ups));
3396	}
3397	ZONE_UNLOCK(z);
3398	}
3399	}
3400	mtx_unlock(&uma_mtx);
3401	error = sbuf_finish(&sbuf);
3402	sbuf_delete(&sbuf);
3403	return (error);
3404	}
3405
3406	#ifndef __rtems__
3407	#ifdef DDB
3408	DB_SHOW_COMMAND(uma, db_show_uma)
3409	{
3410	u_int64_t allocs, frees, sleeps;
3411	uma_bucket_t bucket;
3412	uma_keg_t kz;
3413	uma_zone_t z;
3414	int cachefree;
3415
3416	db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
3417	"Requests", "Sleeps");
3418	LIST_FOREACH(kz, &uma_kegs, uk_link) {
3419	LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3420	if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
3421	allocs = z->uz_allocs;
3422	frees = z->uz_frees;
3423	sleeps = z->uz_sleeps;
3424	cachefree = 0;
3425	} else
3426	uma_zone_sumstat(z, &cachefree, &allocs,
3427	&frees, &sleeps);
3428	if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
3429	(LIST_FIRST(&kz->uk_zones) != z)))
3430	cachefree += kz->uk_free;
3431	LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
3432	cachefree += bucket->ub_cnt;
3433	db_printf("%18s %8ju %8jd %8d %12ju %8ju\n", z->uz_name,
3434	(uintmax_t)kz->uk_size,
3435	(intmax_t)(allocs - frees), cachefree,
3436	(uintmax_t)allocs, sleeps);
3437	if (db_pager_quit)
3438	return;
3439	}
3440	}
3441	}
3442	#endif
3443	#endif /* __rtems__ */

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: rtems-libbsd/freebsd/sys/vm/uma_core.c @ e5db084

Download in other formats: