Context Navigation

source: rtems-libbsd/rtemsbsd/src/rtems-bsd-uma.c @ cbffdb7f

4.1155-freebsd-126-freebsd-12freebsd-9.3

Last change on this file since cbffdb7f was cbffdb7f, checked in by Joel Sherrill <joel.sherrill@…>, on 03/07/12 at 22:14:13
Separate RTEMS Specific Files from Those Direct from FreeBSD
Property mode set to `100644`
File size: 68.7 KB

Line
1	/**
2	* @file
3	*
4	* @ingroup rtems_bsd_rtems
5	*
6	* @brief TODO.
7	*/
8
9	/*
10	* Copyright (c) 2009, 2010 embedded brains GmbH. All rights reserved.
11	*
12	* embedded brains GmbH
13	* Obere Lagerstr. 30
14	* 82178 Puchheim
15	* Germany
16	* <rtems@embedded-brains.de>
17	*
18	* The license and distribution terms for this file may be
19	* found in the file LICENSE in this distribution or at
20	* http://www.rtems.com/license/LICENSE.
21	*/
22
23	#include <rtems/freebsd/machine/rtems-bsd-config.h>
24
25	#include <rtems/freebsd/sys/param.h>
26	#include <rtems/freebsd/sys/types.h>
27	#include <rtems/freebsd/sys/systm.h>
28	#include <rtems/freebsd/sys/malloc.h>
29	#include <rtems/freebsd/sys/kernel.h>
30	#include <rtems/freebsd/sys/lock.h>
31	#include <rtems/freebsd/sys/mutex.h>
32	#include <rtems/freebsd/sys/ktr.h>
33	#include <rtems/freebsd/vm/uma.h>
34	#include <rtems/freebsd/vm/uma_int.h>
35	#include <rtems/freebsd/vm/uma_dbg.h>
36
37	/*
38	* This is the zone and keg from which all zones are spawned. The idea is that
39	* even the zone & keg heads are allocated from the allocator, so we use the
40	* bss section to bootstrap us.
41	*/
42	static struct uma_keg masterkeg;
43	static struct uma_zone masterzone_k;
44	static struct uma_zone masterzone_z;
45	static uma_zone_t kegs = &masterzone_k;
46	static uma_zone_t zones = &masterzone_z;
47
48	/* This is the zone from which all of uma_slab_t's are allocated. */
49	static uma_zone_t slabzone;
50	static uma_zone_t slabrefzone; /* With refcounters (for UMA_ZONE_REFCNT) */
51
52	static u_int mp_maxid = 0; /* simulate 1 CPU. This should really come from RTEMS SMP. AT this time, RTEMS SMP is not functional */
53	#define CPU_ABSENT(x_cpu) 0 /* force all cpus to be present. This should really come from RTEMS SMP. */
54	#define CPU_FOREACH(i) \
55	for ((i) = 0; (i) <= mp_maxid; (i)++) \
56	if (!CPU_ABSENT((i)))
57
58	/*
59	* The initial hash tables come out of this zone so they can be allocated
60	* prior to malloc coming up.
61	*/
62	static uma_zone_t hashzone;
63
64	/* The boot-time adjusted value for cache line alignment. */
65	static int uma_align_cache = 64 - 1;
66
67	static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
68
69	/*
70	* Are we allowed to allocate buckets?
71	*/
72	static int bucketdisable = 1;
73
74	/* Linked list of all kegs in the system */
75	static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
76
77	/* This mutex protects the keg list */
78	static struct mtx uma_mtx;
79
80	/* Linked list of boot time pages */
81	static LIST_HEAD(,uma_slab) uma_boot_pages =
82	LIST_HEAD_INITIALIZER(uma_boot_pages);
83
84	/* This mutex protects the boot time pages list */
85	static struct mtx uma_boot_pages_mtx;
86
87	/* Is the VM done starting up? */
88	static int booted = 0;
89
90	/* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
91	static u_int uma_max_ipers;
92	static u_int uma_max_ipers_ref;
93
94	/*
95	* This is the handle used to schedule events that need to happen
96	* outside of the allocation fast path.
97	*/
98	static struct callout uma_callout;
99	#define UMA_TIMEOUT 20 /* Seconds for callout interval. */
100
101	/*
102	* This structure is passed as the zone ctor arg so that I don't have to create
103	* a special allocation function just for zones.
104	*/
105	struct uma_zctor_args {
106	char *name;
107	size_t size;
108	uma_ctor ctor;
109	uma_dtor dtor;
110	uma_init uminit;
111	uma_fini fini;
112	uma_keg_t keg;
113	int align;
114	u_int32_t flags;
115	};
116
117	struct uma_kctor_args {
118	uma_zone_t zone;
119	size_t size;
120	uma_init uminit;
121	uma_fini fini;
122	int align;
123	u_int32_t flags;
124	};
125
126	struct uma_bucket_zone {
127	uma_zone_t ubz_zone;
128	char *ubz_name;
129	int ubz_entries;
130	};
131
132	#define BUCKET_MAX 128
133
134	struct uma_bucket_zone bucket_zones[] = {
135	{ NULL, "16 Bucket", 16 },
136	{ NULL, "32 Bucket", 32 },
137	{ NULL, "64 Bucket", 64 },
138	{ NULL, "128 Bucket", 128 },
139	{ NULL, NULL, 0}
140	};
141
142	#define BUCKET_SHIFT 4
143	#define BUCKET_ZONES ((BUCKET_MAX >> BUCKET_SHIFT) + 1)
144
145	/*
146	* bucket_size[] maps requested bucket sizes to zones that allocate a bucket
147	* of approximately the right size.
148	*/
149	static uint8_t bucket_size[BUCKET_ZONES];
150
151	/*
152	* Flags and enumerations to be passed to internal functions.
153	*/
154	enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI };
155
156	#define ZFREE_STATFAIL 0x00000001 /* Update zone failure statistic. */
157	#define ZFREE_STATFREE 0x00000002 /* Update zone free statistic. */
158
159	/* Prototypes.. */
160
161	static void page_alloc(uma_zone_t, int, u_int8_t , int);
162	static void startup_alloc(uma_zone_t, int, u_int8_t , int);
163	static void page_free(void *, int, u_int8_t);
164	static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
165	static void cache_drain(uma_zone_t);
166	static void bucket_drain(uma_zone_t, uma_bucket_t);
167	static void bucket_cache_drain(uma_zone_t zone);
168	static int keg_ctor(void , int, void , int);
169	static void keg_dtor(void , int, void );
170	static int zone_ctor(void , int, void , int);
171	static void zone_dtor(void , int, void );
172	static int zero_init(void *, int, int);
173	static void keg_small_init(uma_keg_t keg);
174	static void keg_large_init(uma_keg_t keg);
175	static void zone_foreach(void (*zfunc)(uma_zone_t));
176	static void zone_timeout(uma_zone_t zone);
177	static int hash_alloc(struct uma_hash *);
178	static int hash_expand(struct uma_hash , struct uma_hash );
179	static void hash_free(struct uma_hash *hash);
180	static void zone_alloc_item(uma_zone_t, void , int);
181	static void zone_free_item(uma_zone_t, void , void , enum zfreeskip,
182	int);
183	static void bucket_init(void);
184	static uma_bucket_t bucket_alloc(int, int);
185	static void bucket_free(uma_bucket_t);
186	static void bucket_zone_drain(void);
187	static int zone_alloc_bucket(uma_zone_t zone, int flags);
188	static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
189	static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
190	static void *slab_alloc_item(uma_zone_t zone, uma_slab_t slab);
191	static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
192	uma_fini fini, int align, u_int32_t flags);
193	static inline void zone_relock(uma_zone_t zone, uma_keg_t keg);
194	static inline void keg_relock(uma_keg_t keg, uma_zone_t zone);
195
196	void uma_print_zone(uma_zone_t);
197	void uma_print_stats(void);
198
199	/*
200	* Initialize bucket_zones, the array of zones of buckets of various sizes.
201	*
202	* For each zone, calculate the memory required for each bucket, consisting
203	* of the header and an array of pointers. Initialize bucket_size[] to point
204	* the range of appropriate bucket sizes at the zone.
205	*/
206	static void
207	bucket_init(void)
208	{
209	struct uma_bucket_zone *ubz;
210	int i;
211	int j;
212
213	for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
214	int size;
215
216	ubz = &bucket_zones[j];
217	size = roundup(sizeof(struct uma_bucket), sizeof(void *));
218	size += sizeof(void ) ubz->ubz_entries;
219	ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
220	NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
221	UMA_ZFLAG_INTERNAL \| UMA_ZFLAG_BUCKET);
222	for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
223	bucket_size[i >> BUCKET_SHIFT] = j;
224	}
225	}
226
227	/*
228	* Given a desired number of entries for a bucket, return the zone from which
229	* to allocate the bucket.
230	*/
231	static struct uma_bucket_zone *
232	bucket_zone_lookup(int entries)
233	{
234	int idx;
235
236	idx = howmany(entries, 1 << BUCKET_SHIFT);
237	return (&bucket_zones[bucket_size[idx]]);
238	}
239
240	static uma_bucket_t
241	bucket_alloc(int entries, int bflags)
242	{
243	struct uma_bucket_zone *ubz;
244	uma_bucket_t bucket;
245
246	/*
247	* This is to stop us from allocating per cpu buckets while we're
248	* running out of vm.boot_pages. Otherwise, we would exhaust the
249	* boot pages. This also prevents us from allocating buckets in
250	* low memory situations.
251	*/
252	if (bucketdisable)
253	return (NULL);
254
255	ubz = bucket_zone_lookup(entries);
256	bucket = zone_alloc_item(ubz->ubz_zone, NULL, bflags);
257	if (bucket) {
258	#ifdef INVARIANTS
259	bzero(bucket->ub_bucket, sizeof(void ) ubz->ubz_entries);
260	#endif
261	bucket->ub_cnt = 0;
262	bucket->ub_entries = ubz->ubz_entries;
263	}
264
265	return (bucket);
266	}
267
268	static void
269	bucket_free(uma_bucket_t bucket)
270	{
271	struct uma_bucket_zone *ubz;
272
273	ubz = bucket_zone_lookup(bucket->ub_entries);
274	zone_free_item(ubz->ubz_zone, bucket, NULL, SKIP_NONE,
275	ZFREE_STATFREE);
276	}
277
278	static void
279	bucket_zone_drain(void)
280	{
281	struct uma_bucket_zone *ubz;
282
283	for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
284	zone_drain(ubz->ubz_zone);
285	}
286
287	static inline uma_keg_t
288	zone_first_keg(uma_zone_t zone)
289	{
290
291	return (LIST_FIRST(&zone->uz_kegs)->kl_keg);
292	}
293
294	static void
295	zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
296	{
297	uma_klink_t klink;
298
299	LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
300	kegfn(klink->kl_keg);
301	}
302
303	/*
304	* Routine to perform timeout driven calculations. This expands the
305	* hashes and does per cpu statistics aggregation.
306	*
307	* Returns nothing.
308	*/
309	static void
310	keg_timeout(uma_keg_t keg)
311	{
312
313	KEG_LOCK(keg);
314	/*
315	* Expand the keg hash table.
316	*
317	* This is done if the number of slabs is larger than the hash size.
318	* What I'm trying to do here is completely reduce collisions. This
319	* may be a little aggressive. Should I allow for two collisions max?
320	*/
321	if (keg->uk_flags & UMA_ZONE_HASH &&
322	keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
323	struct uma_hash newhash;
324	struct uma_hash oldhash;
325	int ret;
326
327	/*
328	* This is so involved because allocating and freeing
329	* while the keg lock is held will lead to deadlock.
330	* I have to do everything in stages and check for
331	* races.
332	*/
333	newhash = keg->uk_hash;
334	KEG_UNLOCK(keg);
335	ret = hash_alloc(&newhash);
336	KEG_LOCK(keg);
337	if (ret) {
338	if (hash_expand(&keg->uk_hash, &newhash)) {
339	oldhash = keg->uk_hash;
340	keg->uk_hash = newhash;
341	} else
342	oldhash = newhash;
343
344	KEG_UNLOCK(keg);
345	hash_free(&oldhash);
346	KEG_LOCK(keg);
347	}
348	}
349	KEG_UNLOCK(keg);
350	}
351
352	static void
353	zone_timeout(uma_zone_t zone)
354	{
355
356	zone_foreach_keg(zone, &keg_timeout);
357	}
358
359	/*
360	* Allocate and zero fill the next sized hash table from the appropriate
361	* backing store.
362	*
363	* Arguments:
364	* hash A new hash structure with the old hash size in uh_hashsize
365	*
366	* Returns:
367	* 1 on sucess and 0 on failure.
368	*/
369	static int
370	hash_alloc(struct uma_hash *hash)
371	{
372	int oldsize;
373	int alloc;
374
375	oldsize = hash->uh_hashsize;
376
377	/* We're just going to go to a power of two greater */
378	if (oldsize) {
379	hash->uh_hashsize = oldsize * 2;
380	alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
381	hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
382	M_UMAHASH, M_NOWAIT);
383	} else {
384	alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
385	hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
386	M_WAITOK);
387	hash->uh_hashsize = UMA_HASH_SIZE_INIT;
388	}
389	if (hash->uh_slab_hash) {
390	bzero(hash->uh_slab_hash, alloc);
391	hash->uh_hashmask = hash->uh_hashsize - 1;
392	return (1);
393	}
394
395	return (0);
396	}
397
398	/*
399	* Expands the hash table for HASH zones. This is done from zone_timeout
400	* to reduce collisions. This must not be done in the regular allocation
401	* path, otherwise, we can recurse on the vm while allocating pages.
402	*
403	* Arguments:
404	* oldhash The hash you want to expand
405	* newhash The hash structure for the new table
406	*
407	* Returns:
408	* Nothing
409	*
410	* Discussion:
411	*/
412	static int
413	hash_expand(struct uma_hash oldhash, struct uma_hash newhash)
414	{
415	uma_slab_t slab;
416	int hval;
417	int i;
418
419	if (!newhash->uh_slab_hash)
420	return (0);
421
422	if (oldhash->uh_hashsize >= newhash->uh_hashsize)
423	return (0);
424
425	/*
426	* I need to investigate hash algorithms for resizing without a
427	* full rehash.
428	*/
429
430	for (i = 0; i < oldhash->uh_hashsize; i++)
431	while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
432	slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
433	SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
434	hval = UMA_HASH(newhash, slab->us_data);
435	SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
436	slab, us_hlink);
437	}
438
439	return (1);
440	}
441
442	/*
443	* Free the hash bucket to the appropriate backing store.
444	*
445	* Arguments:
446	* slab_hash The hash bucket we're freeing
447	* hashsize The number of entries in that hash bucket
448	*
449	* Returns:
450	* Nothing
451	*/
452	static void
453	hash_free(struct uma_hash *hash)
454	{
455	if (hash->uh_slab_hash == NULL)
456	return;
457	if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
458	zone_free_item(hashzone,
459	hash->uh_slab_hash, NULL, SKIP_NONE, ZFREE_STATFREE);
460	else
461	free(hash->uh_slab_hash, M_UMAHASH);
462	}
463
464	/*
465	* Frees all outstanding items in a bucket
466	*
467	* Arguments:
468	* zone The zone to free to, must be unlocked.
469	* bucket The free/alloc bucket with items, cpu queue must be locked.
470	*
471	* Returns:
472	* Nothing
473	*/
474
475	static void
476	bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
477	{
478	void *item;
479
480	if (bucket == NULL)
481	return;
482
483	while (bucket->ub_cnt > 0) {
484	bucket->ub_cnt--;
485	item = bucket->ub_bucket[bucket->ub_cnt];
486	#ifdef INVARIANTS
487	bucket->ub_bucket[bucket->ub_cnt] = NULL;
488	KASSERT(item != NULL,
489	("bucket_drain: botched ptr, item is NULL"));
490	#endif
491	zone_free_item(zone, item, NULL, SKIP_DTOR, 0);
492	}
493	}
494
495	/*
496	* Drains the per cpu caches for a zone.
497	*
498	* NOTE: This may only be called while the zone is being turn down, and not
499	* during normal operation. This is necessary in order that we do not have
500	* to migrate CPUs to drain the per-CPU caches.
501	*
502	* Arguments:
503	* zone The zone to drain, must be unlocked.
504	*
505	* Returns:
506	* Nothing
507	*/
508	static void
509	cache_drain(uma_zone_t zone)
510	{
511	uma_cache_t cache;
512	int cpu;
513
514	/*
515	* XXX: It is safe to not lock the per-CPU caches, because we're
516	* tearing down the zone anyway. I.e., there will be no further use
517	* of the caches at this point.
518	*
519	* XXX: It would good to be able to assert that the zone is being
520	* torn down to prevent improper use of cache_drain().
521	*
522	* XXX: We lock the zone before passing into bucket_cache_drain() as
523	* it is used elsewhere. Should the tear-down path be made special
524	* there in some form?
525	*/
526	for (cpu = 0; cpu <= mp_maxid; cpu++) {
527	if (CPU_ABSENT(cpu))
528	continue;
529	cache = &zone->uz_cpu[cpu];
530	bucket_drain(zone, cache->uc_allocbucket);
531	bucket_drain(zone, cache->uc_freebucket);
532	if (cache->uc_allocbucket != NULL)
533	bucket_free(cache->uc_allocbucket);
534	if (cache->uc_freebucket != NULL)
535	bucket_free(cache->uc_freebucket);
536	cache->uc_allocbucket = cache->uc_freebucket = NULL;
537	}
538	ZONE_LOCK(zone);
539	bucket_cache_drain(zone);
540	ZONE_UNLOCK(zone);
541	}
542
543	/*
544	* Drain the cached buckets from a zone. Expects a locked zone on entry.
545	*/
546	static void
547	bucket_cache_drain(uma_zone_t zone)
548	{
549	uma_bucket_t bucket;
550
551	/*
552	* Drain the bucket queues and free the buckets, we just keep two per
553	* cpu (alloc/free).
554	*/
555	while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
556	LIST_REMOVE(bucket, ub_link);
557	ZONE_UNLOCK(zone);
558	bucket_drain(zone, bucket);
559	bucket_free(bucket);
560	ZONE_LOCK(zone);
561	}
562
563	/* Now we do the free queue.. */
564	while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
565	LIST_REMOVE(bucket, ub_link);
566	bucket_free(bucket);
567	}
568	}
569
570	/*
571	* Frees pages from a keg back to the system. This is done on demand from
572	* the pageout daemon.
573	*
574	* Returns nothing.
575	*/
576	static void
577	keg_drain(uma_keg_t keg)
578	{
579	struct slabhead freeslabs = { 0 };
580	uma_slab_t slab;
581	uma_slab_t n;
582	u_int8_t flags;
583	u_int8_t *mem;
584	int i;
585
586	/*
587	* We don't want to take pages from statically allocated kegs at this
588	* time
589	*/
590	if (keg->uk_flags & UMA_ZONE_NOFREE \|\| keg->uk_freef == NULL)
591	return;
592
593	#ifdef UMA_DEBUG
594	printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
595	#endif
596	KEG_LOCK(keg);
597	if (keg->uk_free == 0)
598	goto finished;
599
600	slab = LIST_FIRST(&keg->uk_free_slab);
601	while (slab) {
602	n = LIST_NEXT(slab, us_link);
603
604	/* We have no where to free these to */
605	if (slab->us_flags & UMA_SLAB_BOOT) {
606	slab = n;
607	continue;
608	}
609
610	LIST_REMOVE(slab, us_link);
611	keg->uk_pages -= keg->uk_ppera;
612	keg->uk_free -= keg->uk_ipers;
613
614	if (keg->uk_flags & UMA_ZONE_HASH)
615	UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
616
617	SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
618
619	slab = n;
620	}
621	finished:
622	KEG_UNLOCK(keg);
623
624	while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
625	SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
626	if (keg->uk_fini)
627	for (i = 0; i < keg->uk_ipers; i++)
628	keg->uk_fini(
629	slab->us_data + (keg->uk_rsize * i),
630	keg->uk_size);
631	flags = slab->us_flags;
632	mem = slab->us_data;
633
634	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
635	zone_free_item(keg->uk_slabzone, slab, NULL,
636	SKIP_NONE, ZFREE_STATFREE);
637	#ifdef UMA_DEBUG
638	printf("%s: Returning %d bytes.\n",
639	keg->uk_name, UMA_SLAB_SIZE * keg->uk_ppera);
640	#endif
641	keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
642	}
643	}
644
645	static void
646	zone_drain_wait(uma_zone_t zone, int waitok)
647	{
648
649	/*
650	* Set draining to interlock with zone_dtor() so we can release our
651	* locks as we go. Only dtor() should do a WAITOK call since it
652	* is the only call that knows the structure will still be available
653	* when it wakes up.
654	*/
655	ZONE_LOCK(zone);
656	while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
657	if (waitok == M_NOWAIT)
658	goto out;
659	mtx_unlock(&uma_mtx);
660	msleep(zone, zone->uz_lock, PVM, "zonedrain", 1);
661	mtx_lock(&uma_mtx);
662	}
663	zone->uz_flags \|= UMA_ZFLAG_DRAINING;
664	bucket_cache_drain(zone);
665	ZONE_UNLOCK(zone);
666	/*
667	* The DRAINING flag protects us from being freed while
668	* we're running. Normally the uma_mtx would protect us but we
669	* must be able to release and acquire the right lock for each keg.
670	*/
671	zone_foreach_keg(zone, &keg_drain);
672	ZONE_LOCK(zone);
673	zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
674	wakeup(zone);
675	out:
676	ZONE_UNLOCK(zone);
677	}
678
679	void
680	zone_drain(uma_zone_t zone)
681	{
682
683	zone_drain_wait(zone, M_NOWAIT);
684	}
685
686	/*
687	* Allocate a new slab for a keg. This does not insert the slab onto a list.
688	*
689	* Arguments:
690	* wait Shall we wait?
691	*
692	* Returns:
693	* The slab that was allocated or NULL if there is no memory and the
694	* caller specified M_NOWAIT.
695	*/
696	static uma_slab_t
697	keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
698	{
699	uma_slabrefcnt_t slabref;
700	uma_alloc allocf;
701	uma_slab_t slab;
702	u_int8_t *mem;
703	u_int8_t flags;
704	int i;
705
706	mtx_assert(&keg->uk_lock, MA_OWNED);
707	slab = NULL;
708
709	#ifdef UMA_DEBUG
710	printf("slab_zalloc: Allocating a new slab for %s\n", keg->uk_name);
711	#endif
712	allocf = keg->uk_allocf;
713	KEG_UNLOCK(keg);
714
715	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
716	slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
717	if (slab == NULL) {
718	KEG_LOCK(keg);
719	return NULL;
720	}
721	}
722
723	/*
724	* This reproduces the old vm_zone behavior of zero filling pages the
725	* first time they are added to a zone.
726	*
727	* Malloced items are zeroed in uma_zalloc.
728	*/
729
730	if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
731	wait \|= M_ZERO;
732	else
733	wait &= ~M_ZERO;
734
735	/* zone is passed for legacy reasons. */
736	mem = allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait);
737	if (mem == NULL) {
738	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
739	zone_free_item(keg->uk_slabzone, slab, NULL,
740	SKIP_NONE, ZFREE_STATFREE);
741	KEG_LOCK(keg);
742	return (NULL);
743	}
744
745	/* Point the slab into the allocated memory */
746	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
747	slab = (uma_slab_t )(mem + keg->uk_pgoff);
748
749	slab->us_keg = keg;
750	slab->us_data = mem;
751	slab->us_freecount = keg->uk_ipers;
752	slab->us_firstfree = 0;
753	slab->us_flags = flags;
754
755	if (keg->uk_flags & UMA_ZONE_REFCNT) {
756	slabref = (uma_slabrefcnt_t)slab;
757	for (i = 0; i < keg->uk_ipers; i++) {
758	slabref->us_freelist[i].us_refcnt = 0;
759	slabref->us_freelist[i].us_item = i+1;
760	}
761	} else {
762	for (i = 0; i < keg->uk_ipers; i++)
763	slab->us_freelist[i].us_item = i+1;
764	}
765
766	if (keg->uk_init != NULL) {
767	for (i = 0; i < keg->uk_ipers; i++)
768	if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
769	keg->uk_size, wait) != 0)
770	break;
771	if (i != keg->uk_ipers) {
772	if (keg->uk_fini != NULL) {
773	for (i--; i > -1; i--)
774	keg->uk_fini(slab->us_data +
775	(keg->uk_rsize * i),
776	keg->uk_size);
777	}
778	if (keg->uk_flags & UMA_ZONE_OFFPAGE)
779	zone_free_item(keg->uk_slabzone, slab,
780	NULL, SKIP_NONE, ZFREE_STATFREE);
781	keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
782	flags);
783	KEG_LOCK(keg);
784	return (NULL);
785	}
786	}
787	KEG_LOCK(keg);
788
789	if (keg->uk_flags & UMA_ZONE_HASH)
790	UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
791
792	keg->uk_pages += keg->uk_ppera;
793	keg->uk_free += keg->uk_ipers;
794
795	return (slab);
796	}
797
798	/*
799	* This function is intended to be used early on in place of page_alloc() so
800	* that we may use the boot time page cache to satisfy allocations before
801	* the VM is ready.
802	*/
803	static void *
804	startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
805	{
806	uma_keg_t keg;
807	uma_slab_t tmps;
808	int pages, check_pages;
809
810	keg = zone_first_keg(zone);
811	pages = howmany(bytes, PAGE_SIZE);
812	check_pages = pages - 1;
813	KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
814
815	/*
816	* Check our small startup cache to see if it has pages remaining.
817	*/
818	mtx_lock(&uma_boot_pages_mtx);
819
820	/* First check if we have enough room. */
821	tmps = LIST_FIRST(&uma_boot_pages);
822	while (tmps != NULL && check_pages-- > 0)
823	tmps = LIST_NEXT(tmps, us_link);
824	if (tmps != NULL) {
825	/*
826	* It's ok to lose tmps references. The last one will
827	* have tmps->us_data pointing to the start address of
828	* "pages" contiguous pages of memory.
829	*/
830	while (pages-- > 0) {
831	tmps = LIST_FIRST(&uma_boot_pages);
832	LIST_REMOVE(tmps, us_link);
833	}
834	mtx_unlock(&uma_boot_pages_mtx);
835	*pflag = tmps->us_flags;
836	return (tmps->us_data);
837	}
838	mtx_unlock(&uma_boot_pages_mtx);
839	if (booted == 0)
840	panic("UMA: Increase vm.boot_pages");
841	/*
842	* Now that we've booted reset these users to their real allocator.
843	*/
844	#ifdef UMA_MD_SMALL_ALLOC
845	keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
846	#else
847	keg->uk_allocf = page_alloc;
848	#endif
849	return keg->uk_allocf(zone, bytes, pflag, wait);
850	}
851
852	/*
853	* Allocates a number of pages from the system
854	*
855	* Arguments:
856	* bytes The number of bytes requested
857	* wait Shall we wait?
858	*
859	* Returns:
860	* A pointer to the alloced memory or possibly
861	* NULL if M_NOWAIT is set.
862	*/
863	static void *
864	page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
865	{
866	void p; / Returned page */
867
868	*pflag = UMA_SLAB_KMEM;
869	p = (void *) malloc(bytes, M_TEMP, wait);
870
871	return (p);
872	}
873
874	/*
875	* Frees a number of pages to the system
876	*
877	* Arguments:
878	* mem A pointer to the memory to be freed
879	* size The size of the memory being freed
880	* flags The original p->us_flags field
881	*
882	* Returns:
883	* Nothing
884	*/
885	static void
886	page_free(void *mem, int size, u_int8_t flags)
887	{
888	free( mem, M_TEMP );
889	}
890
891	/*
892	* Zero fill initializer
893	*
894	* Arguments/Returns follow uma_init specifications
895	*/
896	static int
897	zero_init(void *mem, int size, int flags)
898	{
899	bzero(mem, size);
900	return (0);
901	}
902
903	/*
904	* Finish creating a small uma keg. This calculates ipers, and the keg size.
905	*
906	* Arguments
907	* keg The zone we should initialize
908	*
909	* Returns
910	* Nothing
911	*/
912	static void
913	keg_small_init(uma_keg_t keg)
914	{
915	u_int rsize;
916	u_int memused;
917	u_int wastedspace;
918	u_int shsize;
919
920	KASSERT(keg != NULL, ("Keg is null in keg_small_init"));
921	rsize = keg->uk_size;
922
923	if (rsize < UMA_SMALLEST_UNIT)
924	rsize = UMA_SMALLEST_UNIT;
925	if (rsize & keg->uk_align)
926	rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
927
928	keg->uk_rsize = rsize;
929	keg->uk_ppera = 1;
930
931	if (keg->uk_flags & UMA_ZONE_REFCNT) {
932	rsize += UMA_FRITMREF_SZ; /* linkage & refcnt */
933	shsize = sizeof(struct uma_slab_refcnt);
934	} else {
935	rsize += UMA_FRITM_SZ; /* Account for linkage */
936	shsize = sizeof(struct uma_slab);
937	}
938
939	keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
940	KASSERT(keg->uk_ipers != 0, ("keg_small_init: ipers is 0"));
941	memused = keg->uk_ipers * rsize + shsize;
942	wastedspace = UMA_SLAB_SIZE - memused;
943
944	/*
945	* We can't do OFFPAGE if we're internal or if we've been
946	* asked to not go to the VM for buckets. If we do this we
947	* may end up going to the VM (kmem_map) for slabs which we
948	* do not want to do if we're UMA_ZFLAG_CACHEONLY as a
949	* result of UMA_ZONE_VM, which clearly forbids it.
950	*/
951	if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) \|\|
952	(keg->uk_flags & UMA_ZFLAG_CACHEONLY))
953	return;
954
955	if ((wastedspace >= UMA_MAX_WASTE) &&
956	(keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
957	keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
958	KASSERT(keg->uk_ipers <= 255,
959	("keg_small_init: keg->uk_ipers too high!"));
960	#ifdef UMA_DEBUG
961	printf("UMA decided we need offpage slab headers for "
962	"keg: %s, calculated wastedspace = %d, "
963	"maximum wasted space allowed = %d, "
964	"calculated ipers = %d, "
965	"new wasted space = %d\n", keg->uk_name, wastedspace,
966	UMA_MAX_WASTE, keg->uk_ipers,
967	UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
968	#endif
969	keg->uk_flags \|= UMA_ZONE_OFFPAGE;
970	if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
971	keg->uk_flags \|= UMA_ZONE_HASH;
972	}
973	}
974
975	/*
976	* Finish creating a large (> UMA_SLAB_SIZE) uma kegs. Just give in and do
977	* OFFPAGE for now. When I can allow for more dynamic slab sizes this will be
978	* more complicated.
979	*
980	* Arguments
981	* keg The keg we should initialize
982	*
983	* Returns
984	* Nothing
985	*/
986	static void
987	keg_large_init(uma_keg_t keg)
988	{
989	int pages;
990
991	KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
992	KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
993	("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
994
995	pages = keg->uk_size / UMA_SLAB_SIZE;
996
997	/* Account for remainder */
998	if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
999	pages++;
1000
1001	keg->uk_ppera = pages;
1002	keg->uk_ipers = 1;
1003	keg->uk_rsize = keg->uk_size;
1004
1005	/* We can't do OFFPAGE if we're internal, bail out here. */
1006	if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
1007	return;
1008
1009	keg->uk_flags \|= UMA_ZONE_OFFPAGE;
1010	if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1011	keg->uk_flags \|= UMA_ZONE_HASH;
1012	}
1013
1014	static void
1015	keg_cachespread_init(uma_keg_t keg)
1016	{
1017	int alignsize;
1018	int trailer;
1019	int pages;
1020	int rsize;
1021
1022	alignsize = keg->uk_align + 1;
1023	rsize = keg->uk_size;
1024	/*
1025	* We want one item to start on every align boundary in a page. To
1026	* do this we will span pages. We will also extend the item by the
1027	* size of align if it is an even multiple of align. Otherwise, it
1028	* would fall on the same boundary every time.
1029	*/
1030	if (rsize & keg->uk_align)
1031	rsize = (rsize & ~keg->uk_align) + alignsize;
1032	if ((rsize & alignsize) == 0)
1033	rsize += alignsize;
1034	trailer = rsize - keg->uk_size;
1035	pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
1036	pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
1037	keg->uk_rsize = rsize;
1038	keg->uk_ppera = pages;
1039	keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
1040	//keg->uk_flags \|= UMA_ZONE_OFFPAGE \| UMA_ZONE_VTOSLAB;
1041	KASSERT(keg->uk_ipers <= uma_max_ipers,
1042	("keg_small_init: keg->uk_ipers too high(%d) increase max_ipers",
1043	keg->uk_ipers));
1044	}
1045
1046	/*
1047	* Keg header ctor. This initializes all fields, locks, etc. And inserts
1048	* the keg onto the global keg list.
1049	*
1050	* Arguments/Returns follow uma_ctor specifications
1051	* udata Actually uma_kctor_args
1052	*/
1053	static int
1054	keg_ctor(void mem, int size, void udata, int flags)
1055	{
1056	struct uma_kctor_args *arg = udata;
1057	uma_keg_t keg = mem;
1058	uma_zone_t zone;
1059
1060	bzero(keg, size);
1061	keg->uk_size = arg->size;
1062	keg->uk_init = arg->uminit;
1063	keg->uk_fini = arg->fini;
1064	keg->uk_align = arg->align;
1065	keg->uk_free = 0;
1066	keg->uk_pages = 0;
1067	keg->uk_flags = arg->flags;
1068	keg->uk_allocf = page_alloc;
1069	keg->uk_freef = page_free;
1070	keg->uk_recurse = 0;
1071	keg->uk_slabzone = NULL;
1072
1073	/*
1074	* The master zone is passed to us at keg-creation time.
1075	*/
1076	zone = arg->zone;
1077	keg->uk_name = zone->uz_name;
1078
1079	if (arg->flags & UMA_ZONE_VM)
1080	keg->uk_flags \|= UMA_ZFLAG_CACHEONLY;
1081
1082	if (arg->flags & UMA_ZONE_ZINIT)
1083	keg->uk_init = zero_init;
1084
1085	/*if (arg->flags & UMA_ZONE_REFCNT \|\| arg->flags & UMA_ZONE_MALLOC)
1086	keg->uk_flags \|= UMA_ZONE_VTOSLAB;*/
1087
1088	/*
1089	* The +UMA_FRITM_SZ added to uk_size is to account for the
1090	* linkage that is added to the size in keg_small_init(). If
1091	* we don't account for this here then we may end up in
1092	* keg_small_init() with a calculated 'ipers' of 0.
1093	*/
1094	if (keg->uk_flags & UMA_ZONE_REFCNT) {
1095	if (keg->uk_flags & UMA_ZONE_CACHESPREAD)
1096	keg_cachespread_init(keg);
1097	else if ((keg->uk_size+UMA_FRITMREF_SZ) >
1098	(UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
1099	keg_large_init(keg);
1100	else
1101	keg_small_init(keg);
1102	} else {
1103	if (keg->uk_flags & UMA_ZONE_CACHESPREAD)
1104	keg_cachespread_init(keg);
1105	else if ((keg->uk_size+UMA_FRITM_SZ) >
1106	(UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1107	keg_large_init(keg);
1108	else
1109	keg_small_init(keg);
1110	}
1111
1112	if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1113	if (keg->uk_flags & UMA_ZONE_REFCNT)
1114	keg->uk_slabzone = slabrefzone;
1115	else
1116	keg->uk_slabzone = slabzone;
1117	}
1118
1119	/*
1120	* If we haven't booted yet we need allocations to go through the
1121	* startup cache until the vm is ready.
1122	*/
1123	if (keg->uk_ppera == 1) {
1124	#ifdef UMA_MD_SMALL_ALLOC
1125	keg->uk_allocf = uma_small_alloc;
1126	keg->uk_freef = uma_small_free;
1127	#endif
1128	if (booted == 0)
1129	keg->uk_allocf = startup_alloc;
1130	} else if (booted == 0 && (keg->uk_flags & UMA_ZFLAG_INTERNAL))
1131	keg->uk_allocf = startup_alloc;
1132
1133	/*
1134	* Initialize keg's lock (shared among zones).
1135	*/
1136	if (arg->flags & UMA_ZONE_MTXCLASS)
1137	KEG_LOCK_INIT(keg, 1);
1138	else
1139	KEG_LOCK_INIT(keg, 0);
1140
1141	/*
1142	* If we're putting the slab header in the actual page we need to
1143	* figure out where in each page it goes. This calculates a right
1144	* justified offset into the memory on an ALIGN_PTR boundary.
1145	*/
1146	if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1147	u_int totsize;
1148
1149	/* Size of the slab struct and free list */
1150	if (keg->uk_flags & UMA_ZONE_REFCNT)
1151	totsize = sizeof(struct uma_slab_refcnt) +
1152	keg->uk_ipers * UMA_FRITMREF_SZ;
1153	else
1154	totsize = sizeof(struct uma_slab) +
1155	keg->uk_ipers * UMA_FRITM_SZ;
1156
1157	if (totsize & UMA_ALIGN_PTR)
1158	totsize = (totsize & ~UMA_ALIGN_PTR) +
1159	(UMA_ALIGN_PTR + 1);
1160	keg->uk_pgoff = (UMA_SLAB_SIZE * keg->uk_ppera) - totsize;
1161
1162	if (keg->uk_flags & UMA_ZONE_REFCNT)
1163	totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
1164	+ keg->uk_ipers * UMA_FRITMREF_SZ;
1165	else
1166	totsize = keg->uk_pgoff + sizeof(struct uma_slab)
1167	+ keg->uk_ipers * UMA_FRITM_SZ;
1168
1169	/*
1170	* The only way the following is possible is if with our
1171	* UMA_ALIGN_PTR adjustments we are now bigger than
1172	* UMA_SLAB_SIZE. I haven't checked whether this is
1173	* mathematically possible for all cases, so we make
1174	* sure here anyway.
1175	*/
1176	if (totsize > UMA_SLAB_SIZE * keg->uk_ppera) {
1177	printf("zone %s ipers %d rsize %d size %d\n",
1178	zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1179	keg->uk_size);
1180	panic("UMA slab won't fit.");
1181	}
1182	}
1183
1184	if (keg->uk_flags & UMA_ZONE_HASH)
1185	hash_alloc(&keg->uk_hash);
1186
1187	#ifdef UMA_DEBUG
1188	printf("UMA: %s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
1189	zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
1190	keg->uk_ipers, keg->uk_ppera,
1191	(keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
1192	#endif
1193
1194	LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1195
1196	mtx_lock(&uma_mtx);
1197	LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1198	mtx_unlock(&uma_mtx);
1199	return (0);
1200	}
1201
1202	/*
1203	* Zone header ctor. This initializes all fields, locks, etc.
1204	*
1205	* Arguments/Returns follow uma_ctor specifications
1206	* udata Actually uma_zctor_args
1207	*/
1208	static int
1209	zone_ctor(void mem, int size, void udata, int flags)
1210	{
1211	struct uma_zctor_args *arg = udata;
1212	uma_zone_t zone = mem;
1213	uma_zone_t z;
1214	uma_keg_t keg;
1215
1216	bzero(zone, size);
1217	zone->uz_name = arg->name;
1218	zone->uz_ctor = arg->ctor;
1219	zone->uz_dtor = arg->dtor;
1220	zone->uz_slab = zone_fetch_slab;
1221	zone->uz_init = NULL;
1222	zone->uz_fini = NULL;
1223	zone->uz_allocs = 0;
1224	zone->uz_frees = 0;
1225	zone->uz_fails = 0;
1226	zone->uz_fills = zone->uz_count = 0;
1227	zone->uz_flags = 0;
1228	keg = arg->keg;
1229
1230	if (arg->flags & UMA_ZONE_SECONDARY) {
1231	KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1232	zone->uz_init = arg->uminit;
1233	zone->uz_fini = arg->fini;
1234	zone->uz_lock = &keg->uk_lock;
1235	zone->uz_flags \|= UMA_ZONE_SECONDARY;
1236	mtx_lock(&uma_mtx);
1237	ZONE_LOCK(zone);
1238	LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1239	if (LIST_NEXT(z, uz_link) == NULL) {
1240	LIST_INSERT_AFTER(z, zone, uz_link);
1241	break;
1242	}
1243	}
1244	ZONE_UNLOCK(zone);
1245	mtx_unlock(&uma_mtx);
1246	} else if (keg == NULL) {
1247	if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1248	arg->align, arg->flags)) == NULL)
1249	return (ENOMEM);
1250	} else {
1251	struct uma_kctor_args karg;
1252	int error;
1253
1254	/* We should only be here from uma_startup() */
1255	karg.size = arg->size;
1256	karg.uminit = arg->uminit;
1257	karg.fini = arg->fini;
1258	karg.align = arg->align;
1259	karg.flags = arg->flags;
1260	karg.zone = zone;
1261	error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1262	flags);
1263	if (error)
1264	return (error);
1265	}
1266	/*
1267	* Link in the first keg.
1268	*/
1269	zone->uz_klink.kl_keg = keg;
1270	LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
1271	zone->uz_lock = &keg->uk_lock;
1272	zone->uz_size = keg->uk_size;
1273	zone->uz_flags \|= (keg->uk_flags &
1274	(UMA_ZONE_INHERIT \| UMA_ZFLAG_INHERIT));
1275
1276	/*
1277	* Some internal zones don't have room allocated for the per cpu
1278	* caches. If we're internal, bail out here.
1279	*/
1280	if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1281	KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
1282	("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1283	return (0);
1284	}
1285
1286	if (keg->uk_flags & UMA_ZONE_MAXBUCKET)
1287	zone->uz_count = BUCKET_MAX;
1288	else if (keg->uk_ipers <= BUCKET_MAX)
1289	zone->uz_count = keg->uk_ipers;
1290	else
1291	zone->uz_count = BUCKET_MAX;
1292	return (0);
1293	}
1294
1295	/*
1296	* Keg header dtor. This frees all data, destroys locks, frees the hash
1297	* table and removes the keg from the global list.
1298	*
1299	* Arguments/Returns follow uma_dtor specifications
1300	* udata unused
1301	*/
1302	static void
1303	keg_dtor(void arg, int size, void udata)
1304	{
1305	uma_keg_t keg;
1306
1307	keg = (uma_keg_t)arg;
1308	KEG_LOCK(keg);
1309	if (keg->uk_free != 0) {
1310	printf("Freed UMA keg was not empty (%d items). "
1311	" Lost %d pages of memory.\n",
1312	keg->uk_free, keg->uk_pages);
1313	}
1314	KEG_UNLOCK(keg);
1315
1316	hash_free(&keg->uk_hash);
1317
1318	KEG_LOCK_FINI(keg);
1319	}
1320
1321	/*
1322	* Zone header dtor.
1323	*
1324	* Arguments/Returns follow uma_dtor specifications
1325	* udata unused
1326	*/
1327	static void
1328	zone_dtor(void arg, int size, void udata)
1329	{
1330	uma_klink_t klink;
1331	uma_zone_t zone;
1332	uma_keg_t keg;
1333
1334	zone = (uma_zone_t)arg;
1335	keg = zone_first_keg(zone);
1336
1337	if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
1338	cache_drain(zone);
1339
1340	mtx_lock(&uma_mtx);
1341	LIST_REMOVE(zone, uz_link);
1342	mtx_unlock(&uma_mtx);
1343	/*
1344	* XXX there are some races here where
1345	* the zone can be drained but zone lock
1346	* released and then refilled before we
1347	* remove it... we dont care for now
1348	*/
1349	zone_drain_wait(zone, M_WAITOK);
1350	/*
1351	* Unlink all of our kegs.
1352	*/
1353	while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
1354	klink->kl_keg = NULL;
1355	LIST_REMOVE(klink, kl_link);
1356	if (klink == &zone->uz_klink)
1357	continue;
1358	free(klink, M_TEMP);
1359	}
1360	/*
1361	* We only destroy kegs from non secondary zones.
1362	*/
1363	if ((zone->uz_flags & UMA_ZONE_SECONDARY) == 0) {
1364	mtx_lock(&uma_mtx);
1365	LIST_REMOVE(keg, uk_link);
1366	mtx_unlock(&uma_mtx);
1367	zone_free_item(kegs, keg, NULL, SKIP_NONE,
1368	ZFREE_STATFREE);
1369	}
1370	}
1371
1372	/*
1373	* Traverses every zone in the system and calls a callback
1374	*
1375	* Arguments:
1376	* zfunc A pointer to a function which accepts a zone
1377	* as an argument.
1378	*
1379	* Returns:
1380	* Nothing
1381	*/
1382	static void
1383	zone_foreach(void (*zfunc)(uma_zone_t))
1384	{
1385	uma_keg_t keg;
1386	uma_zone_t zone;
1387
1388	mtx_lock(&uma_mtx);
1389	LIST_FOREACH(keg, &uma_kegs, uk_link) {
1390	LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1391	zfunc(zone);
1392	}
1393	mtx_unlock(&uma_mtx);
1394	}
1395
1396	/* Public functions */
1397	/* See uma.h */
1398	void
1399	uma_startup(void *bootmem, int boot_pages)
1400	{
1401	struct uma_zctor_args args;
1402	uma_slab_t slab;
1403	u_int slabsize;
1404	u_int objsize, totsize, wsize;
1405	int i;
1406
1407	#ifdef UMA_DEBUG
1408	printf("Creating uma keg headers zone and keg.\n");
1409	#endif
1410	mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1411
1412	/*
1413	* Figure out the maximum number of items-per-slab we'll have if
1414	* we're using the OFFPAGE slab header to track free items, given
1415	* all possible object sizes and the maximum desired wastage
1416	* (UMA_MAX_WASTE).
1417	*
1418	* We iterate until we find an object size for
1419	* which the calculated wastage in keg_small_init() will be
1420	* enough to warrant OFFPAGE. Since wastedspace versus objsize
1421	* is an overall increasing see-saw function, we find the smallest
1422	* objsize such that the wastage is always acceptable for objects
1423	* with that objsize or smaller. Since a smaller objsize always
1424	* generates a larger possible uma_max_ipers, we use this computed
1425	* objsize to calculate the largest ipers possible. Since the
1426	* ipers calculated for OFFPAGE slab headers is always larger than
1427	* the ipers initially calculated in keg_small_init(), we use
1428	* the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
1429	* obtain the maximum ipers possible for offpage slab headers.
1430	*
1431	* It should be noted that ipers versus objsize is an inversly
1432	* proportional function which drops off rather quickly so as
1433	* long as our UMA_MAX_WASTE is such that the objsize we calculate
1434	* falls into the portion of the inverse relation AFTER the steep
1435	* falloff, then uma_max_ipers shouldn't be too high (~10 on i386).
1436	*
1437	* Note that we have 8-bits (1 byte) to use as a freelist index
1438	* inside the actual slab header itself and this is enough to
1439	* accomodate us. In the worst case, a UMA_SMALLEST_UNIT sized
1440	* object with offpage slab header would have ipers =
1441	* UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is
1442	* 1 greater than what our byte-integer freelist index can
1443	* accomodate, but we know that this situation never occurs as
1444	* for UMA_SMALLEST_UNIT-sized objects, we will never calculate
1445	* that we need to go to offpage slab headers. Or, if we do,
1446	* then we trap that condition below and panic in the INVARIANTS case.
1447	*/
1448	wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
1449	totsize = wsize;
1450	objsize = UMA_SMALLEST_UNIT;
1451	while (totsize >= wsize) {
1452	totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) /
1453	(objsize + UMA_FRITM_SZ);
1454	totsize *= (UMA_FRITM_SZ + objsize);
1455	objsize++;
1456	}
1457	if (objsize > UMA_SMALLEST_UNIT)
1458	objsize--;
1459	uma_max_ipers = MAX(UMA_SLAB_SIZE / objsize, 64);
1460
1461	wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
1462	totsize = wsize;
1463	objsize = UMA_SMALLEST_UNIT;
1464	while (totsize >= wsize) {
1465	totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) /
1466	(objsize + UMA_FRITMREF_SZ);
1467	totsize *= (UMA_FRITMREF_SZ + objsize);
1468	objsize++;
1469	}
1470	if (objsize > UMA_SMALLEST_UNIT)
1471	objsize--;
1472	uma_max_ipers_ref = MAX(UMA_SLAB_SIZE / objsize, 64);
1473
1474	KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255),
1475	("uma_startup: calculated uma_max_ipers values too large!"));
1476
1477	#ifdef UMA_DEBUG
1478	printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers);
1479	printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n",
1480	uma_max_ipers_ref);
1481	#endif
1482
1483	/* "manually" create the initial zone */
1484	args.name = "UMA Kegs";
1485	args.size = sizeof(struct uma_keg);
1486	args.ctor = keg_ctor;
1487	args.dtor = keg_dtor;
1488	args.uminit = zero_init;
1489	args.fini = NULL;
1490	args.keg = &masterkeg;
1491	args.align = 32 - 1;
1492	args.flags = UMA_ZFLAG_INTERNAL;
1493	/* The initial zone has no Per cpu queues so it's smaller */
1494	zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1495
1496	#ifdef UMA_DEBUG
1497	printf("Filling boot free list.\n");
1498	#endif
1499	for (i = 0; i < boot_pages; i++) {
1500	slab = (uma_slab_t)((u_int8_t )bootmem + (i UMA_SLAB_SIZE));
1501	slab->us_data = (u_int8_t *)slab;
1502	slab->us_flags = UMA_SLAB_BOOT;
1503	LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1504	}
1505	mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
1506
1507	#ifdef UMA_DEBUG
1508	printf("Creating uma zone headers zone and keg.\n");
1509	#endif
1510	args.name = "UMA Zones";
1511	args.size = sizeof(struct uma_zone) +
1512	(sizeof(struct uma_cache) * (mp_maxid + 1));
1513	args.ctor = zone_ctor;
1514	args.dtor = zone_dtor;
1515	args.uminit = zero_init;
1516	args.fini = NULL;
1517	args.keg = NULL;
1518	args.align = 32 - 1;
1519	args.flags = UMA_ZFLAG_INTERNAL;
1520	/* The initial zone has no Per cpu queues so it's smaller */
1521	zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1522
1523	#ifdef UMA_DEBUG
1524	printf("Initializing pcpu cache locks.\n");
1525	#endif
1526	#ifdef UMA_DEBUG
1527	printf("Creating slab and hash zones.\n");
1528	#endif
1529
1530	/*
1531	* This is the max number of free list items we'll have with
1532	* offpage slabs.
1533	*/
1534	slabsize = uma_max_ipers * UMA_FRITM_SZ;
1535	slabsize += sizeof(struct uma_slab);
1536
1537	/* Now make a zone for slab headers */
1538	slabzone = uma_zcreate("UMA Slabs",
1539	slabsize,
1540	NULL, NULL, NULL, NULL,
1541	UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1542
1543	/*
1544	* We also create a zone for the bigger slabs with reference
1545	* counts in them, to accomodate UMA_ZONE_REFCNT zones.
1546	*/
1547	slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ;
1548	slabsize += sizeof(struct uma_slab_refcnt);
1549	slabrefzone = uma_zcreate("UMA RCntSlabs",
1550	slabsize,
1551	NULL, NULL, NULL, NULL,
1552	UMA_ALIGN_PTR,
1553	UMA_ZFLAG_INTERNAL);
1554
1555	hashzone = uma_zcreate("UMA Hash",
1556	sizeof(struct slabhead ) UMA_HASH_SIZE_INIT,
1557	NULL, NULL, NULL, NULL,
1558	UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1559
1560	bucket_init();
1561
1562	#if defined(UMA_MD_SMALL_ALLOC) && !defined(UMA_MD_SMALL_ALLOC_NEEDS_VM)
1563	booted = 1;
1564	#endif
1565
1566	#ifdef UMA_DEBUG
1567	printf("UMA startup complete.\n");
1568	#endif
1569	}
1570
1571	static uma_keg_t
1572	uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1573	int align, u_int32_t flags)
1574	{
1575	struct uma_kctor_args args;
1576
1577	args.size = size;
1578	args.uminit = uminit;
1579	args.fini = fini;
1580	args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
1581	args.flags = flags;
1582	args.zone = zone;
1583	return (zone_alloc_item(kegs, &args, M_WAITOK));
1584	}
1585
1586	/* See uma.h */
1587	void
1588	uma_set_align(int align)
1589	{
1590
1591	if (align != UMA_ALIGN_CACHE)
1592	uma_align_cache = align;
1593	}
1594
1595	/* See uma.h */
1596	uma_zone_t
1597	uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1598	uma_init uminit, uma_fini fini, int align, u_int32_t flags)
1599
1600	{
1601	struct uma_zctor_args args;
1602
1603	/* This stuff is essential for the zone ctor */
1604	args.name = name;
1605	args.size = size;
1606	args.ctor = ctor;
1607	args.dtor = dtor;
1608	args.uminit = uminit;
1609	args.fini = fini;
1610	args.align = align;
1611	args.flags = flags;
1612	args.keg = NULL;
1613
1614	return (zone_alloc_item(zones, &args, M_WAITOK));
1615	}
1616
1617	/* See uma.h */
1618	uma_zone_t
1619	uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1620	uma_init zinit, uma_fini zfini, uma_zone_t master)
1621	{
1622	struct uma_zctor_args args;
1623	uma_keg_t keg;
1624
1625	keg = zone_first_keg(master);
1626	args.name = name;
1627	args.size = keg->uk_size;
1628	args.ctor = ctor;
1629	args.dtor = dtor;
1630	args.uminit = zinit;
1631	args.fini = zfini;
1632	args.align = keg->uk_align;
1633	args.flags = keg->uk_flags \| UMA_ZONE_SECONDARY;
1634	args.keg = keg;
1635
1636	/* XXX Attaches only one keg of potentially many. */
1637	return (zone_alloc_item(zones, &args, M_WAITOK));
1638	}
1639
1640	static void
1641	zone_lock_pair(uma_zone_t a, uma_zone_t b)
1642	{
1643	if (a < b) {
1644	ZONE_LOCK(a);
1645	mtx_lock_flags(b->uz_lock, MTX_DUPOK);
1646	} else {
1647	ZONE_LOCK(b);
1648	mtx_lock_flags(a->uz_lock, MTX_DUPOK);
1649	}
1650	}
1651
1652	static void
1653	zone_unlock_pair(uma_zone_t a, uma_zone_t b)
1654	{
1655
1656	ZONE_UNLOCK(a);
1657	ZONE_UNLOCK(b);
1658	}
1659
1660
1661	/* See uma.h */
1662	void
1663	uma_zdestroy(uma_zone_t zone)
1664	{
1665
1666	zone_free_item(zones, zone, NULL, SKIP_NONE, ZFREE_STATFREE);
1667	}
1668
1669	/* See uma.h */
1670	void *
1671	uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1672	{
1673	void *item;
1674	uma_cache_t cache;
1675	uma_bucket_t bucket;
1676	int cpu;
1677
1678	/* This is the fast path allocation */
1679	#ifdef UMA_DEBUG_ALLOC_1
1680	printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1681	#endif
1682	CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
1683	zone->uz_name, flags);
1684
1685	if (flags & M_WAITOK) {
1686	WITNESS_WARN(WARN_GIANTOK \| WARN_SLEEPOK, NULL,
1687	"uma_zalloc_arg: zone \"%s\"", zone->uz_name);
1688	}
1689
1690	/*
1691	* If possible, allocate from the per-CPU cache. There are two
1692	* requirements for safe access to the per-CPU cache: (1) the thread
1693	* accessing the cache must not be preempted or yield during access,
1694	* and (2) the thread must not migrate CPUs without switching which
1695	* cache it accesses. We rely on a critical section to prevent
1696	* preemption and migration. We release the critical section in
1697	* order to acquire the zone mutex if we are unable to allocate from
1698	* the current cache; when we re-acquire the critical section, we
1699	* must detect and handle migration if it has occurred.
1700	*/
1701	zalloc_restart:
1702	critical_enter();
1703	cpu = curcpu;
1704	cache = &zone->uz_cpu[cpu];
1705
1706	zalloc_start:
1707	bucket = cache->uc_allocbucket;
1708
1709	if (bucket) {
1710	if (bucket->ub_cnt > 0) {
1711	bucket->ub_cnt--;
1712	item = bucket->ub_bucket[bucket->ub_cnt];
1713	#ifdef INVARIANTS
1714	bucket->ub_bucket[bucket->ub_cnt] = NULL;
1715	#endif
1716	KASSERT(item != NULL,
1717	("uma_zalloc: Bucket pointer mangled."));
1718	cache->uc_allocs++;
1719	critical_exit();
1720	#ifdef INVARIANTS
1721	ZONE_LOCK(zone);
1722	uma_dbg_alloc(zone, NULL, item);
1723	ZONE_UNLOCK(zone);
1724	#endif
1725	if (zone->uz_ctor != NULL) {
1726	if (zone->uz_ctor(item, zone->uz_size,
1727	udata, flags) != 0) {
1728	zone_free_item(zone, item, udata,
1729	SKIP_DTOR, ZFREE_STATFAIL \|
1730	ZFREE_STATFREE);
1731	return (NULL);
1732	}
1733	}
1734	if (flags & M_ZERO)
1735	bzero(item, zone->uz_size);
1736	return (item);
1737	} else if (cache->uc_freebucket) {
1738	/*
1739	* We have run out of items in our allocbucket.
1740	* See if we can switch with our free bucket.
1741	*/
1742	if (cache->uc_freebucket->ub_cnt > 0) {
1743	#ifdef UMA_DEBUG_ALLOC
1744	printf("uma_zalloc: Swapping empty with"
1745	" alloc.\n");
1746	#endif
1747	bucket = cache->uc_freebucket;
1748	cache->uc_freebucket = cache->uc_allocbucket;
1749	cache->uc_allocbucket = bucket;
1750
1751	goto zalloc_start;
1752	}
1753	}
1754	}
1755	/*
1756	* Attempt to retrieve the item from the per-CPU cache has failed, so
1757	* we must go back to the zone. This requires the zone lock, so we
1758	* must drop the critical section, then re-acquire it when we go back
1759	* to the cache. Since the critical section is released, we may be
1760	* preempted or migrate. As such, make sure not to maintain any
1761	* thread-local state specific to the cache from prior to releasing
1762	* the critical section.
1763	*/
1764	critical_exit();
1765	ZONE_LOCK(zone);
1766	critical_enter();
1767	cpu = curcpu;
1768	cache = &zone->uz_cpu[cpu];
1769	bucket = cache->uc_allocbucket;
1770	if (bucket != NULL) {
1771	if (bucket->ub_cnt > 0) {
1772	ZONE_UNLOCK(zone);
1773	goto zalloc_start;
1774	}
1775	bucket = cache->uc_freebucket;
1776	if (bucket != NULL && bucket->ub_cnt > 0) {
1777	ZONE_UNLOCK(zone);
1778	goto zalloc_start;
1779	}
1780	}
1781
1782	/* Since we have locked the zone we may as well send back our stats */
1783	zone->uz_allocs += cache->uc_allocs;
1784	cache->uc_allocs = 0;
1785	zone->uz_frees += cache->uc_frees;
1786	cache->uc_frees = 0;
1787
1788	/* Our old one is now a free bucket */
1789	if (cache->uc_allocbucket) {
1790	KASSERT(cache->uc_allocbucket->ub_cnt == 0,
1791	("uma_zalloc_arg: Freeing a non free bucket."));
1792	LIST_INSERT_HEAD(&zone->uz_free_bucket,
1793	cache->uc_allocbucket, ub_link);
1794	cache->uc_allocbucket = NULL;
1795	}
1796
1797	/* Check the free list for a new alloc bucket */
1798	if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1799	KASSERT(bucket->ub_cnt != 0,
1800	("uma_zalloc_arg: Returning an empty bucket."));
1801
1802	LIST_REMOVE(bucket, ub_link);
1803	cache->uc_allocbucket = bucket;
1804	ZONE_UNLOCK(zone);
1805	goto zalloc_start;
1806	}
1807	/* We are no longer associated with this CPU. */
1808	critical_exit();
1809
1810	/* Bump up our uz_count so we get here less */
1811	if (zone->uz_count < BUCKET_MAX)
1812	zone->uz_count++;
1813
1814	/*
1815	* Now lets just fill a bucket and put it on the free list. If that
1816	* works we'll restart the allocation from the begining.
1817	*/
1818	if (zone_alloc_bucket(zone, flags)) {
1819	ZONE_UNLOCK(zone);
1820	goto zalloc_restart;
1821	}
1822	ZONE_UNLOCK(zone);
1823	/*
1824	* We may not be able to get a bucket so return an actual item.
1825	*/
1826	#ifdef UMA_DEBUG
1827	printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1828	#endif
1829
1830	item = zone_alloc_item(zone, udata, flags);
1831	return (item);
1832	}
1833
1834	static uma_slab_t
1835	keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
1836	{
1837	uma_slab_t slab;
1838
1839	mtx_assert(&keg->uk_lock, MA_OWNED);
1840	slab = NULL;
1841
1842	for (;;) {
1843	/*
1844	* Find a slab with some space. Prefer slabs that are partially
1845	* used over those that are totally full. This helps to reduce
1846	* fragmentation.
1847	*/
1848	if (keg->uk_free != 0) {
1849	if (!LIST_EMPTY(&keg->uk_part_slab)) {
1850	slab = LIST_FIRST(&keg->uk_part_slab);
1851	} else {
1852	slab = LIST_FIRST(&keg->uk_free_slab);
1853	LIST_REMOVE(slab, us_link);
1854	LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
1855	us_link);
1856	}
1857	MPASS(slab->us_keg == keg);
1858	return (slab);
1859	}
1860
1861	/*
1862	* M_NOVM means don't ask at all!
1863	*/
1864	if (flags & M_NOVM)
1865	break;
1866
1867	if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
1868	keg->uk_flags \|= UMA_ZFLAG_FULL;
1869	/*
1870	* If this is not a multi-zone, set the FULL bit.
1871	* Otherwise slab_multi() takes care of it.
1872	*/
1873	if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0)
1874	zone->uz_flags \|= UMA_ZFLAG_FULL;
1875	if (flags & M_NOWAIT)
1876	break;
1877	msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
1878	continue;
1879	}
1880	keg->uk_recurse++;
1881	slab = keg_alloc_slab(keg, zone, flags);
1882	keg->uk_recurse--;
1883	/*
1884	* If we got a slab here it's safe to mark it partially used
1885	* and return. We assume that the caller is going to remove
1886	* at least one item.
1887	*/
1888	if (slab) {
1889	MPASS(slab->us_keg == keg);
1890	LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
1891	return (slab);
1892	}
1893	/*
1894	* We might not have been able to get a slab but another cpu
1895	* could have while we were unlocked. Check again before we
1896	* fail.
1897	*/
1898	flags \|= M_NOVM;
1899	}
1900	return (slab);
1901	}
1902
1903	static inline void
1904	zone_relock(uma_zone_t zone, uma_keg_t keg)
1905	{
1906	if (zone->uz_lock != &keg->uk_lock) {
1907	KEG_UNLOCK(keg);
1908	ZONE_LOCK(zone);
1909	}
1910	}
1911
1912	static inline void
1913	keg_relock(uma_keg_t keg, uma_zone_t zone)
1914	{
1915	if (zone->uz_lock != &keg->uk_lock) {
1916	ZONE_UNLOCK(zone);
1917	KEG_LOCK(keg);
1918	}
1919	}
1920
1921	static uma_slab_t
1922	zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
1923	{
1924	uma_slab_t slab;
1925
1926	if (keg == NULL)
1927	keg = zone_first_keg(zone);
1928	/*
1929	* This is to prevent us from recursively trying to allocate
1930	* buckets. The problem is that if an allocation forces us to
1931	* grab a new bucket we will call page_alloc, which will go off
1932	* and cause the vm to allocate vm_map_entries. If we need new
1933	* buckets there too we will recurse in kmem_alloc and bad
1934	* things happen. So instead we return a NULL bucket, and make
1935	* the code that allocates buckets smart enough to deal with it
1936	*/
1937	if (keg->uk_flags & UMA_ZFLAG_BUCKET && keg->uk_recurse != 0)
1938	return (NULL);
1939
1940	for (;;) {
1941	slab = keg_fetch_slab(keg, zone, flags);
1942	if (slab)
1943	return (slab);
1944	if (flags & (M_NOWAIT \| M_NOVM))
1945	break;
1946	}
1947	return (NULL);
1948	}
1949
1950	/*
1951	* uma_zone_fetch_slab_multi: Fetches a slab from one available keg. Returns
1952	* with the keg locked. Caller must call zone_relock() afterwards if the
1953	* zone lock is required. On NULL the zone lock is held.
1954	*
1955	* The last pointer is used to seed the search. It is not required.
1956	*/
1957	static uma_slab_t
1958	zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
1959	{
1960	uma_klink_t klink;
1961	uma_slab_t slab;
1962	uma_keg_t keg;
1963	int flags;
1964	int empty;
1965	int full;
1966
1967	/*
1968	* Don't wait on the first pass. This will skip limit tests
1969	* as well. We don't want to block if we can find a provider
1970	* without blocking.
1971	*/
1972	flags = (rflags & ~M_WAITOK) \| M_NOWAIT;
1973	/*
1974	* Use the last slab allocated as a hint for where to start
1975	* the search.
1976	*/
1977	if (last) {
1978	slab = keg_fetch_slab(last, zone, flags);
1979	if (slab)
1980	return (slab);
1981	zone_relock(zone, last);
1982	last = NULL;
1983	}
1984	/*
1985	* Loop until we have a slab incase of transient failures
1986	* while M_WAITOK is specified. I'm not sure this is 100%
1987	* required but we've done it for so long now.
1988	*/
1989	for (;;) {
1990	empty = 0;
1991	full = 0;
1992	/*
1993	* Search the available kegs for slabs. Be careful to hold the
1994	* correct lock while calling into the keg layer.
1995	*/
1996	LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
1997	keg = klink->kl_keg;
1998	keg_relock(keg, zone);
1999	if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
2000	slab = keg_fetch_slab(keg, zone, flags);
2001	if (slab)
2002	return (slab);
2003	}
2004	if (keg->uk_flags & UMA_ZFLAG_FULL)
2005	full++;
2006	else
2007	empty++;
2008	zone_relock(zone, keg);
2009	}
2010	if (rflags & (M_NOWAIT \| M_NOVM))
2011	break;
2012	flags = rflags;
2013	/*
2014	* All kegs are full. XXX We can't atomically check all kegs
2015	* and sleep so just sleep for a short period and retry.
2016	*/
2017	if (full && !empty) {
2018	zone->uz_flags \|= UMA_ZFLAG_FULL;
2019	msleep(zone, zone->uz_lock, PVM, "zonelimit", hz/100);
2020	zone->uz_flags &= ~UMA_ZFLAG_FULL;
2021	continue;
2022	}
2023	}
2024	return (NULL);
2025	}
2026
2027	static void *
2028	slab_alloc_item(uma_zone_t zone, uma_slab_t slab)
2029	{
2030	uma_keg_t keg;
2031	uma_slabrefcnt_t slabref;
2032	void *item;
2033	u_int8_t freei;
2034
2035	keg = slab->us_keg;
2036	mtx_assert(&keg->uk_lock, MA_OWNED);
2037
2038	freei = slab->us_firstfree;
2039	if (keg->uk_flags & UMA_ZONE_REFCNT) {
2040	slabref = (uma_slabrefcnt_t)slab;
2041	slab->us_firstfree = slabref->us_freelist[freei].us_item;
2042	} else {
2043	slab->us_firstfree = slab->us_freelist[freei].us_item;
2044	}
2045	item = slab->us_data + (keg->uk_rsize * freei);
2046
2047	slab->us_freecount--;
2048	keg->uk_free--;
2049	#ifdef INVARIANTS
2050	uma_dbg_alloc(zone, slab, item);
2051	#endif
2052	/* Move this slab to the full list */
2053	if (slab->us_freecount == 0) {
2054	LIST_REMOVE(slab, us_link);
2055	LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2056	}
2057
2058	return (item);
2059	}
2060
2061	static int
2062	zone_alloc_bucket(uma_zone_t zone, int flags)
2063	{
2064	uma_bucket_t bucket;
2065	uma_slab_t slab;
2066	uma_keg_t keg;
2067	int16_t saved;
2068	int max, origflags = flags;
2069
2070	/*
2071	* Try this zone's free list first so we don't allocate extra buckets.
2072	*/
2073	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2074	KASSERT(bucket->ub_cnt == 0,
2075	("zone_alloc_bucket: Bucket on free list is not empty."));
2076	LIST_REMOVE(bucket, ub_link);
2077	} else {
2078	int bflags;
2079
2080	bflags = (flags & ~M_ZERO);
2081	if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
2082	bflags \|= M_NOVM;
2083
2084	ZONE_UNLOCK(zone);
2085	bucket = bucket_alloc(zone->uz_count, bflags);
2086	ZONE_LOCK(zone);
2087	}
2088
2089	if (bucket == NULL) {
2090	return (0);
2091	}
2092
2093	#ifdef SMP
2094	/*
2095	* This code is here to limit the number of simultaneous bucket fills
2096	* for any given zone to the number of per cpu caches in this zone. This
2097	* is done so that we don't allocate more memory than we really need.
2098	*/
2099	if (zone->uz_fills >= mp_ncpus)
2100	goto done;
2101
2102	#endif
2103	zone->uz_fills++;
2104
2105	max = MIN(bucket->ub_entries, zone->uz_count);
2106	/* Try to keep the buckets totally full */
2107	saved = bucket->ub_cnt;
2108	slab = NULL;
2109	keg = NULL;
2110	while (bucket->ub_cnt < max &&
2111	(slab = zone->uz_slab(zone, keg, flags)) != NULL) {
2112	keg = slab->us_keg;
2113	while (slab->us_freecount && bucket->ub_cnt < max) {
2114	bucket->ub_bucket[bucket->ub_cnt++] =
2115	slab_alloc_item(zone, slab);
2116	}
2117
2118	/* Don't block on the next fill */
2119	flags \|= M_NOWAIT;
2120	}
2121	if (slab)
2122	zone_relock(zone, keg);
2123
2124	/*
2125	* We unlock here because we need to call the zone's init.
2126	* It should be safe to unlock because the slab dealt with
2127	* above is already on the appropriate list within the keg
2128	* and the bucket we filled is not yet on any list, so we
2129	* own it.
2130	*/
2131	if (zone->uz_init != NULL) {
2132	int i;
2133
2134	ZONE_UNLOCK(zone);
2135	for (i = saved; i < bucket->ub_cnt; i++)
2136	if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
2137	origflags) != 0)
2138	break;
2139	/*
2140	* If we couldn't initialize the whole bucket, put the
2141	* rest back onto the freelist.
2142	*/
2143	if (i != bucket->ub_cnt) {
2144	int j;
2145
2146	for (j = i; j < bucket->ub_cnt; j++) {
2147	zone_free_item(zone, bucket->ub_bucket[j],
2148	NULL, SKIP_FINI, 0);
2149	#ifdef INVARIANTS
2150	bucket->ub_bucket[j] = NULL;
2151	#endif
2152	}
2153	bucket->ub_cnt = i;
2154	}
2155	ZONE_LOCK(zone);
2156	}
2157
2158	zone->uz_fills--;
2159	if (bucket->ub_cnt != 0) {
2160	LIST_INSERT_HEAD(&zone->uz_full_bucket,
2161	bucket, ub_link);
2162	return (1);
2163	}
2164	#ifdef SMP
2165	done:
2166	#endif
2167	bucket_free(bucket);
2168
2169	return (0);
2170	}
2171	/*
2172	* Allocates an item for an internal zone
2173	*
2174	* Arguments
2175	* zone The zone to alloc for.
2176	* udata The data to be passed to the constructor.
2177	* flags M_WAITOK, M_NOWAIT, M_ZERO.
2178	*
2179	* Returns
2180	* NULL if there is no memory and M_NOWAIT is set
2181	* An item if successful
2182	*/
2183
2184	static void *
2185	zone_alloc_item(uma_zone_t zone, void *udata, int flags)
2186	{
2187	uma_slab_t slab;
2188	void *item;
2189
2190	item = NULL;
2191
2192	#ifdef UMA_DEBUG_ALLOC
2193	printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2194	#endif
2195	ZONE_LOCK(zone);
2196
2197	slab = zone->uz_slab(zone, NULL, flags);
2198	if (slab == NULL) {
2199	zone->uz_fails++;
2200	ZONE_UNLOCK(zone);
2201	return (NULL);
2202	}
2203
2204	item = slab_alloc_item(zone, slab);
2205
2206	zone_relock(zone, slab->us_keg);
2207	zone->uz_allocs++;
2208	ZONE_UNLOCK(zone);
2209
2210	/*
2211	* We have to call both the zone's init (not the keg's init)
2212	* and the zone's ctor. This is because the item is going from
2213	* a keg slab directly to the user, and the user is expecting it
2214	* to be both zone-init'd as well as zone-ctor'd.
2215	*/
2216	if (zone->uz_init != NULL) {
2217	if (zone->uz_init(item, zone->uz_size, flags) != 0) {
2218	zone_free_item(zone, item, udata, SKIP_FINI,
2219	ZFREE_STATFAIL \| ZFREE_STATFREE);
2220	return (NULL);
2221	}
2222	}
2223	if (zone->uz_ctor != NULL) {
2224	if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2225	zone_free_item(zone, item, udata, SKIP_DTOR,
2226	ZFREE_STATFAIL \| ZFREE_STATFREE);
2227	return (NULL);
2228	}
2229	}
2230	if (flags & M_ZERO)
2231	bzero(item, zone->uz_size);
2232
2233	return (item);
2234	}
2235
2236	/* See uma.h */
2237	void
2238	uma_zfree_arg(uma_zone_t zone, void item, void udata)
2239	{
2240	uma_cache_t cache;
2241	uma_bucket_t bucket;
2242	int bflags;
2243	int cpu;
2244
2245	#ifdef UMA_DEBUG_ALLOC_1
2246	printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2247	#endif
2248	CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2249	zone->uz_name);
2250
2251	/* uma_zfree(..., NULL) does nothing, to match free(9). */
2252	if (item == NULL)
2253	return;
2254
2255	if (zone->uz_dtor)
2256	zone->uz_dtor(item, zone->uz_size, udata);
2257
2258	#ifdef INVARIANTS
2259	ZONE_LOCK(zone);
2260	if (zone->uz_flags & UMA_ZONE_MALLOC)
2261	uma_dbg_free(zone, udata, item);
2262	else
2263	uma_dbg_free(zone, NULL, item);
2264	ZONE_UNLOCK(zone);
2265	#endif
2266	/*
2267	* The race here is acceptable. If we miss it we'll just have to wait
2268	* a little longer for the limits to be reset.
2269	*/
2270	if (zone->uz_flags & UMA_ZFLAG_FULL)
2271	goto zfree_internal;
2272
2273	/*
2274	* If possible, free to the per-CPU cache. There are two
2275	* requirements for safe access to the per-CPU cache: (1) the thread
2276	* accessing the cache must not be preempted or yield during access,
2277	* and (2) the thread must not migrate CPUs without switching which
2278	* cache it accesses. We rely on a critical section to prevent
2279	* preemption and migration. We release the critical section in
2280	* order to acquire the zone mutex if we are unable to free to the
2281	* current cache; when we re-acquire the critical section, we must
2282	* detect and handle migration if it has occurred.
2283	*/
2284	zfree_restart:
2285	critical_enter();
2286	cpu = curcpu;
2287	cache = &zone->uz_cpu[cpu];
2288
2289	zfree_start:
2290	bucket = cache->uc_freebucket;
2291
2292	if (bucket) {
2293	/*
2294	* Do we have room in our bucket? It is OK for this uz count
2295	* check to be slightly out of sync.
2296	*/
2297
2298	if (bucket->ub_cnt < bucket->ub_entries) {
2299	KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2300	("uma_zfree: Freeing to non free bucket index."));
2301	bucket->ub_bucket[bucket->ub_cnt] = item;
2302	bucket->ub_cnt++;
2303	cache->uc_frees++;
2304	critical_exit();
2305	return;
2306	} else if (cache->uc_allocbucket) {
2307	#ifdef UMA_DEBUG_ALLOC
2308	printf("uma_zfree: Swapping buckets.\n");
2309	#endif
2310	/*
2311	* We have run out of space in our freebucket.
2312	* See if we can switch with our alloc bucket.
2313	*/
2314	if (cache->uc_allocbucket->ub_cnt <
2315	cache->uc_freebucket->ub_cnt) {
2316	bucket = cache->uc_freebucket;
2317	cache->uc_freebucket = cache->uc_allocbucket;
2318	cache->uc_allocbucket = bucket;
2319	goto zfree_start;
2320	}
2321	}
2322	}
2323	/*
2324	* We can get here for two reasons:
2325	*
2326	* 1) The buckets are NULL
2327	* 2) The alloc and free buckets are both somewhat full.
2328	*
2329	* We must go back the zone, which requires acquiring the zone lock,
2330	* which in turn means we must release and re-acquire the critical
2331	* section. Since the critical section is released, we may be
2332	* preempted or migrate. As such, make sure not to maintain any
2333	* thread-local state specific to the cache from prior to releasing
2334	* the critical section.
2335	*/
2336	critical_exit();
2337	ZONE_LOCK(zone);
2338	critical_enter();
2339	cpu = curcpu;
2340	cache = &zone->uz_cpu[cpu];
2341	if (cache->uc_freebucket != NULL) {
2342	if (cache->uc_freebucket->ub_cnt <
2343	cache->uc_freebucket->ub_entries) {
2344	ZONE_UNLOCK(zone);
2345	goto zfree_start;
2346	}
2347	if (cache->uc_allocbucket != NULL &&
2348	(cache->uc_allocbucket->ub_cnt <
2349	cache->uc_freebucket->ub_cnt)) {
2350	ZONE_UNLOCK(zone);
2351	goto zfree_start;
2352	}
2353	}
2354
2355	/* Since we have locked the zone we may as well send back our stats */
2356	zone->uz_allocs += cache->uc_allocs;
2357	cache->uc_allocs = 0;
2358	zone->uz_frees += cache->uc_frees;
2359	cache->uc_frees = 0;
2360
2361	bucket = cache->uc_freebucket;
2362	cache->uc_freebucket = NULL;
2363
2364	/* Can we throw this on the zone full list? */
2365	if (bucket != NULL) {
2366	#ifdef UMA_DEBUG_ALLOC
2367	printf("uma_zfree: Putting old bucket on the free list.\n");
2368	#endif
2369	/* ub_cnt is pointing to the last free item */
2370	KASSERT(bucket->ub_cnt != 0,
2371	("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2372	LIST_INSERT_HEAD(&zone->uz_full_bucket,
2373	bucket, ub_link);
2374	}
2375	if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2376	LIST_REMOVE(bucket, ub_link);
2377	ZONE_UNLOCK(zone);
2378	cache->uc_freebucket = bucket;
2379	goto zfree_start;
2380	}
2381	/* We are no longer associated with this CPU. */
2382	critical_exit();
2383
2384	/* And the zone.. */
2385	ZONE_UNLOCK(zone);
2386
2387	#ifdef UMA_DEBUG_ALLOC
2388	printf("uma_zfree: Allocating new free bucket.\n");
2389	#endif
2390	bflags = M_NOWAIT;
2391
2392	if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
2393	bflags \|= M_NOVM;
2394	bucket = bucket_alloc(zone->uz_count, bflags);
2395	if (bucket) {
2396	ZONE_LOCK(zone);
2397	LIST_INSERT_HEAD(&zone->uz_free_bucket,
2398	bucket, ub_link);
2399	ZONE_UNLOCK(zone);
2400	goto zfree_restart;
2401	}
2402
2403	/*
2404	* If nothing else caught this, we'll just do an internal free.
2405	*/
2406	zfree_internal:
2407	zone_free_item(zone, item, udata, SKIP_DTOR, ZFREE_STATFREE);
2408
2409	return;
2410	}
2411
2412	/*
2413	* Frees an item to an INTERNAL zone or allocates a free bucket
2414	*
2415	* Arguments:
2416	* zone The zone to free to
2417	* item The item we're freeing
2418	* udata User supplied data for the dtor
2419	* skip Skip dtors and finis
2420	*/
2421	static void
2422	zone_free_item(uma_zone_t zone, void item, void udata,
2423	enum zfreeskip skip, int flags)
2424	{
2425	uma_slab_t slab;
2426	uma_slabrefcnt_t slabref;
2427	uma_keg_t keg;
2428	u_int8_t *mem;
2429	u_int8_t freei;
2430	int clearfull;
2431
2432	if (skip < SKIP_DTOR && zone->uz_dtor)
2433	zone->uz_dtor(item, zone->uz_size, udata);
2434
2435	if (skip < SKIP_FINI && zone->uz_fini)
2436	zone->uz_fini(item, zone->uz_size);
2437
2438	ZONE_LOCK(zone);
2439
2440	if (flags & ZFREE_STATFAIL)
2441	zone->uz_fails++;
2442	if (flags & ZFREE_STATFREE)
2443	zone->uz_frees++;
2444
2445	if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
2446	mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
2447	keg = zone_first_keg(zone); /* Must only be one. */
2448	if (zone->uz_flags & UMA_ZONE_HASH) {
2449	slab = hash_sfind(&keg->uk_hash, mem);
2450	} else {
2451	mem += keg->uk_pgoff;
2452	slab = (uma_slab_t)mem;
2453	}
2454	} else {
2455	panic("uma virtual memory not supported!" );
2456	}
2457	MPASS(keg == slab->us_keg);
2458
2459	/* Do we need to remove from any lists? */
2460	if (slab->us_freecount+1 == keg->uk_ipers) {
2461	LIST_REMOVE(slab, us_link);
2462	LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2463	} else if (slab->us_freecount == 0) {
2464	LIST_REMOVE(slab, us_link);
2465	LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2466	}
2467
2468	/* Slab management stuff */
2469	freei = ((unsigned long)item - (unsigned long)slab->us_data)
2470	/ keg->uk_rsize;
2471
2472	#ifdef INVARIANTS
2473	if (!skip)
2474	uma_dbg_free(zone, slab, item);
2475	#endif
2476
2477	if (keg->uk_flags & UMA_ZONE_REFCNT) {
2478	slabref = (uma_slabrefcnt_t)slab;
2479	slabref->us_freelist[freei].us_item = slab->us_firstfree;
2480	} else {
2481	slab->us_freelist[freei].us_item = slab->us_firstfree;
2482	}
2483	slab->us_firstfree = freei;
2484	slab->us_freecount++;
2485
2486	/* Zone statistics */
2487	keg->uk_free++;
2488
2489	clearfull = 0;
2490	if (keg->uk_flags & UMA_ZFLAG_FULL) {
2491	if (keg->uk_pages < keg->uk_maxpages) {
2492	keg->uk_flags &= ~UMA_ZFLAG_FULL;
2493	clearfull = 1;
2494	}
2495
2496	/*
2497	* We can handle one more allocation. Since we're clearing ZFLAG_FULL,
2498	* wake up all procs blocked on pages. This should be uncommon, so
2499	* keeping this simple for now (rather than adding count of blocked
2500	* threads etc).
2501	*/
2502	wakeup(keg);
2503	}
2504	if (clearfull) {
2505	zone_relock(zone, keg);
2506	zone->uz_flags &= ~UMA_ZFLAG_FULL;
2507	wakeup(zone);
2508	ZONE_UNLOCK(zone);
2509	} else
2510	KEG_UNLOCK(keg);
2511	}
2512
2513	/* See uma.h */
2514	void
2515	uma_zone_set_max(uma_zone_t zone, int nitems)
2516	{
2517	uma_keg_t keg;
2518
2519	ZONE_LOCK(zone);
2520	keg = zone_first_keg(zone);
2521	keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
2522	if (keg->uk_maxpages * keg->uk_ipers < nitems)
2523	keg->uk_maxpages += keg->uk_ppera;
2524
2525	ZONE_UNLOCK(zone);
2526	}
2527
2528	/* See uma.h */
2529	int
2530	uma_zone_get_max(uma_zone_t zone)
2531	{
2532	int nitems;
2533	uma_keg_t keg;
2534
2535	ZONE_LOCK(zone);
2536	keg = zone_first_keg(zone);
2537	nitems = keg->uk_maxpages * keg->uk_ipers;
2538	ZONE_UNLOCK(zone);
2539
2540	return (nitems);
2541	}
2542
2543	/* See uma.h */
2544	int
2545	uma_zone_get_cur(uma_zone_t zone)
2546	{
2547	int64_t nitems;
2548	u_int i;
2549
2550	ZONE_LOCK(zone);
2551	nitems = zone->uz_allocs - zone->uz_frees;
2552	CPU_FOREACH(i) {
2553	/*
2554	* See the comment in sysctl_vm_zone_stats() regarding the
2555	* safety of accessing the per-cpu caches. With the zone lock
2556	* held, it is safe, but can potentially result in stale data.
2557	*/
2558	nitems += zone->uz_cpu[i].uc_allocs -
2559	zone->uz_cpu[i].uc_frees;
2560	}
2561	ZONE_UNLOCK(zone);
2562
2563	return (nitems < 0 ? 0 : nitems);
2564	}
2565
2566	/* See uma.h */
2567	void
2568	uma_zone_set_init(uma_zone_t zone, uma_init uminit)
2569	{
2570	uma_keg_t keg;
2571
2572	ZONE_LOCK(zone);
2573	keg = zone_first_keg(zone);
2574	KASSERT(keg->uk_pages == 0,
2575	("uma_zone_set_init on non-empty keg"));
2576	keg->uk_init = uminit;
2577	ZONE_UNLOCK(zone);
2578	}
2579
2580	/* See uma.h */
2581	void
2582	uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
2583	{
2584	uma_keg_t keg;
2585
2586	ZONE_LOCK(zone);
2587	keg = zone_first_keg(zone);
2588	KASSERT(keg->uk_pages == 0,
2589	("uma_zone_set_fini on non-empty keg"));
2590	keg->uk_fini = fini;
2591	ZONE_UNLOCK(zone);
2592	}
2593
2594	/* See uma.h */
2595	void
2596	uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
2597	{
2598	ZONE_LOCK(zone);
2599	KASSERT(zone_first_keg(zone)->uk_pages == 0,
2600	("uma_zone_set_zinit on non-empty keg"));
2601	zone->uz_init = zinit;
2602	ZONE_UNLOCK(zone);
2603	}
2604
2605	/* See uma.h */
2606	void
2607	uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
2608	{
2609	ZONE_LOCK(zone);
2610	KASSERT(zone_first_keg(zone)->uk_pages == 0,
2611	("uma_zone_set_zfini on non-empty keg"));
2612	zone->uz_fini = zfini;
2613	ZONE_UNLOCK(zone);
2614	}
2615
2616	/* See uma.h */
2617	/* XXX uk_freef is not actually used with the zone locked */
2618	void
2619	uma_zone_set_freef(uma_zone_t zone, uma_free freef)
2620	{
2621
2622	ZONE_LOCK(zone);
2623	zone_first_keg(zone)->uk_freef = freef;
2624	ZONE_UNLOCK(zone);
2625	}
2626
2627	/* See uma.h */
2628	/* XXX uk_allocf is not actually used with the zone locked */
2629	void
2630	uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
2631	{
2632	uma_keg_t keg;
2633
2634	ZONE_LOCK(zone);
2635	keg = zone_first_keg(zone);
2636	keg->uk_flags \|= UMA_ZFLAG_PRIVALLOC;
2637	keg->uk_allocf = allocf;
2638	ZONE_UNLOCK(zone);
2639	}
2640
2641	/* See uma.h */
2642	void
2643	uma_prealloc(uma_zone_t zone, int items)
2644	{
2645	int slabs;
2646	uma_slab_t slab;
2647	uma_keg_t keg;
2648
2649	keg = zone_first_keg(zone);
2650	ZONE_LOCK(zone);
2651	slabs = items / keg->uk_ipers;
2652	if (slabs * keg->uk_ipers < items)
2653	slabs++;
2654	while (slabs > 0) {
2655	slab = keg_alloc_slab(keg, zone, M_WAITOK);
2656	if (slab == NULL)
2657	break;
2658	MPASS(slab->us_keg == keg);
2659	LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2660	slabs--;
2661	}
2662	ZONE_UNLOCK(zone);
2663	}
2664
2665	/* See uma.h */
2666	void
2667	uma_reclaim(void)
2668	{
2669	#ifdef UMA_DEBUG
2670	printf("UMA: vm asked us to release pages!\n");
2671	#endif
2672	zone_foreach(zone_drain);
2673	/*
2674	* Some slabs may have been freed but this zone will be visited early
2675	* we visit again so that we can free pages that are empty once other
2676	* zones are drained. We have to do the same for buckets.
2677	*/
2678	zone_drain(slabzone);
2679	zone_drain(slabrefzone);
2680	bucket_zone_drain();
2681	}
2682
2683	/* See uma.h */
2684	int
2685	uma_zone_exhausted(uma_zone_t zone)
2686	{
2687	int full;
2688
2689	ZONE_LOCK(zone);
2690	full = (zone->uz_flags & UMA_ZFLAG_FULL);
2691	ZONE_UNLOCK(zone);
2692	return (full);
2693	}
2694
2695	int
2696	uma_zone_exhausted_nolock(uma_zone_t zone)
2697	{
2698	return (zone->uz_flags & UMA_ZFLAG_FULL);
2699	}
2700
2701	void *
2702	uma_large_malloc(int size, int wait)
2703	{
2704	void *mem;
2705	uma_slab_t slab;
2706	u_int8_t flags;
2707
2708	slab = zone_alloc_item(slabzone, NULL, wait);
2709	if (slab == NULL)
2710	return (NULL);
2711	mem = page_alloc(NULL, size, &flags, wait);
2712	if (mem) {
2713	slab->us_data = mem;
2714	slab->us_flags = flags \| UMA_SLAB_MALLOC;
2715	slab->us_size = size;
2716	} else {
2717	zone_free_item(slabzone, slab, NULL, SKIP_NONE,
2718	ZFREE_STATFAIL \| ZFREE_STATFREE);
2719	}
2720
2721	return (mem);
2722	}
2723
2724	void
2725	uma_large_free(uma_slab_t slab)
2726	{
2727	page_free(slab->us_data, slab->us_size, slab->us_flags);
2728	zone_free_item(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE);
2729	}
2730
2731	void
2732	uma_print_stats(void)
2733	{
2734	zone_foreach(uma_print_zone);
2735	}
2736
2737	static void
2738	slab_print(uma_slab_t slab)
2739	{
2740	printf("slab: keg %p, data %p, freecount %d, firstfree %d\n",
2741	slab->us_keg, slab->us_data, slab->us_freecount,
2742	slab->us_firstfree);
2743	}
2744
2745	static void
2746	cache_print(uma_cache_t cache)
2747	{
2748	printf("alloc: %p(%d), free: %p(%d)\n",
2749	cache->uc_allocbucket,
2750	cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
2751	cache->uc_freebucket,
2752	cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
2753	}
2754
2755	static void
2756	uma_print_keg(uma_keg_t keg)
2757	{
2758	uma_slab_t slab;
2759
2760	printf("keg: %s(%p) size %d(%d) flags %d ipers %d ppera %d "
2761	"out %d free %d limit %d\n",
2762	keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
2763	keg->uk_ipers, keg->uk_ppera,
2764	(keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free,
2765	(keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
2766	printf("Part slabs:\n");
2767	LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
2768	slab_print(slab);
2769	printf("Free slabs:\n");
2770	LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
2771	slab_print(slab);
2772	printf("Full slabs:\n");
2773	LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
2774	slab_print(slab);
2775	}
2776
2777	void
2778	uma_print_zone(uma_zone_t zone)
2779	{
2780	uma_cache_t cache;
2781	uma_klink_t kl;
2782	int i;
2783
2784	printf("zone: %s(%p) size %d flags %d\n",
2785	zone->uz_name, zone, zone->uz_size, zone->uz_flags);
2786	LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
2787	uma_print_keg(kl->kl_keg);
2788	for (i = 0; i <= mp_maxid; i++) {
2789	if (CPU_ABSENT(i))
2790	continue;
2791	cache = &zone->uz_cpu[i];
2792	printf("CPU %d Cache:\n", i);
2793	cache_print(cache);
2794	}
2795	}
2796

Note: See TracBrowser for help on using the repository browser.

Download in other formats: