source: rtems-libbsd/freebsd/sys/vm/uma_core.c @ 5ede682

55-freebsd-126-freebsd-12
Last change on this file since 5ede682 was 5ede682, checked in by Sebastian Huber <sebastian.huber@…>, on 11/14/16 at 09:17:10

ZONE(9): Use recursive lock for the UMA drain

  • Property mode set to 100644
File size: 91.4 KB
Line 
1#include <machine/rtems-bsd-kernel-space.h>
2
3/*-
4 * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
5 * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
6 * Copyright (c) 2004-2006 Robert N. M. Watson
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice unmodified, this list of conditions, and the following
14 *    disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31/*
32 * uma_core.c  Implementation of the Universal Memory allocator
33 *
34 * This allocator is intended to replace the multitude of similar object caches
35 * in the standard FreeBSD kernel.  The intent is to be flexible as well as
36 * efficient.  A primary design goal is to return unused memory to the rest of
37 * the system.  This will make the system as a whole more flexible due to the
38 * ability to move memory to subsystems which most need it instead of leaving
39 * pools of reserved memory unused.
40 *
41 * The basic ideas stem from similar slab/zone based allocators whose algorithms
42 * are well known.
43 *
44 */
45
46/*
47 * TODO:
48 *      - Improve memory usage for large allocations
49 *      - Investigate cache size adjustments
50 */
51
52#include <sys/cdefs.h>
53__FBSDID("$FreeBSD$");
54
55/* I should really use ktr.. */
56/*
57#define UMA_DEBUG 1
58#define UMA_DEBUG_ALLOC 1
59#define UMA_DEBUG_ALLOC_1 1
60*/
61
62#include <rtems/bsd/local/opt_ddb.h>
63#include <rtems/bsd/local/opt_param.h>
64#include <rtems/bsd/local/opt_vm.h>
65
66#include <sys/param.h>
67#include <sys/systm.h>
68#include <sys/bitset.h>
69#include <sys/eventhandler.h>
70#include <sys/kernel.h>
71#include <sys/types.h>
72#include <sys/queue.h>
73#include <sys/malloc.h>
74#include <sys/ktr.h>
75#include <sys/lock.h>
76#include <sys/sysctl.h>
77#include <sys/mutex.h>
78#include <sys/proc.h>
79#include <sys/random.h>
80#include <sys/rwlock.h>
81#include <sys/sbuf.h>
82#include <sys/sched.h>
83#include <sys/smp.h>
84#include <sys/taskqueue.h>
85#include <sys/vmmeter.h>
86
87#include <vm/vm.h>
88#include <vm/vm_object.h>
89#include <vm/vm_page.h>
90#include <vm/vm_pageout.h>
91#include <vm/vm_param.h>
92#include <vm/vm_map.h>
93#include <vm/vm_kern.h>
94#include <vm/vm_extern.h>
95#include <vm/uma.h>
96#include <vm/uma_int.h>
97#include <vm/uma_dbg.h>
98
99#include <ddb/ddb.h>
100#ifdef __rtems__
101  #ifdef RTEMS_SMP
102    /*
103     * It is essential that we have a per-processor cache, otherwise the
104     * critical_enter()/critical_exit() protection would be insufficient.
105     */
106    #undef curcpu
107    #define curcpu rtems_get_current_processor()
108    #undef mp_maxid
109    #define mp_maxid (rtems_get_processor_count() - 1)
110    #define SMP
111  #endif
112#endif /* __rtems__ */
113
114#ifdef DEBUG_MEMGUARD
115#include <vm/memguard.h>
116#endif
117
118/*
119 * This is the zone and keg from which all zones are spawned.  The idea is that
120 * even the zone & keg heads are allocated from the allocator, so we use the
121 * bss section to bootstrap us.
122 */
123static struct uma_keg masterkeg;
124static struct uma_zone masterzone_k;
125static struct uma_zone masterzone_z;
126static uma_zone_t kegs = &masterzone_k;
127static uma_zone_t zones = &masterzone_z;
128
129/* This is the zone from which all of uma_slab_t's are allocated. */
130static uma_zone_t slabzone;
131
132/*
133 * The initial hash tables come out of this zone so they can be allocated
134 * prior to malloc coming up.
135 */
136static uma_zone_t hashzone;
137
138/* The boot-time adjusted value for cache line alignment. */
139int uma_align_cache = 64 - 1;
140
141static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
142
143#ifndef __rtems__
144/*
145 * Are we allowed to allocate buckets?
146 */
147static int bucketdisable = 1;
148#else /* __rtems__ */
149#define bucketdisable 0
150#endif /* __rtems__ */
151
152/* Linked list of all kegs in the system */
153static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
154
155/* Linked list of all cache-only zones in the system */
156static LIST_HEAD(,uma_zone) uma_cachezones =
157    LIST_HEAD_INITIALIZER(uma_cachezones);
158
159/* This RW lock protects the keg list */
160static struct rwlock_padalign uma_rwlock;
161
162#ifndef __rtems__
163/* Linked list of boot time pages */
164static LIST_HEAD(,uma_slab) uma_boot_pages =
165    LIST_HEAD_INITIALIZER(uma_boot_pages);
166
167/* This mutex protects the boot time pages list */
168static struct mtx_padalign uma_boot_pages_mtx;
169#endif /* __rtems__ */
170
171static struct sx uma_drain_lock;
172
173#ifndef __rtems__
174/* Is the VM done starting up? */
175static int booted = 0;
176#define UMA_STARTUP     1
177#define UMA_STARTUP2    2
178#endif /* __rtems__ */
179
180/*
181 * This is the handle used to schedule events that need to happen
182 * outside of the allocation fast path.
183 */
184static struct callout uma_callout;
185#define UMA_TIMEOUT     20              /* Seconds for callout interval. */
186
187/*
188 * This structure is passed as the zone ctor arg so that I don't have to create
189 * a special allocation function just for zones.
190 */
191struct uma_zctor_args {
192        const char *name;
193        size_t size;
194        uma_ctor ctor;
195        uma_dtor dtor;
196        uma_init uminit;
197        uma_fini fini;
198        uma_import import;
199        uma_release release;
200        void *arg;
201        uma_keg_t keg;
202        int align;
203        uint32_t flags;
204};
205
206struct uma_kctor_args {
207        uma_zone_t zone;
208        size_t size;
209        uma_init uminit;
210        uma_fini fini;
211        int align;
212        uint32_t flags;
213};
214
215struct uma_bucket_zone {
216        uma_zone_t      ubz_zone;
217        char            *ubz_name;
218        int             ubz_entries;    /* Number of items it can hold. */
219        int             ubz_maxsize;    /* Maximum allocation size per-item. */
220};
221
222/*
223 * Compute the actual number of bucket entries to pack them in power
224 * of two sizes for more efficient space utilization.
225 */
226#define BUCKET_SIZE(n)                                          \
227    (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
228
229#ifndef __rtems__
230#define BUCKET_MAX      BUCKET_SIZE(256)
231#else /* __rtems__ */
232#define BUCKET_MAX      BUCKET_SIZE(128)
233#endif /* __rtems__ */
234
235struct uma_bucket_zone bucket_zones[] = {
236        { NULL, "4 Bucket", BUCKET_SIZE(4), 4096 },
237        { NULL, "6 Bucket", BUCKET_SIZE(6), 3072 },
238        { NULL, "8 Bucket", BUCKET_SIZE(8), 2048 },
239        { NULL, "12 Bucket", BUCKET_SIZE(12), 1536 },
240        { NULL, "16 Bucket", BUCKET_SIZE(16), 1024 },
241        { NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
242        { NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
243        { NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
244#ifndef __rtems__
245        { NULL, "256 Bucket", BUCKET_SIZE(256), 64 },
246#endif /* __rtems__ */
247        { NULL, NULL, 0}
248};
249
250/*
251 * Flags and enumerations to be passed to internal functions.
252 */
253enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI };
254
255/* Prototypes.. */
256
257#ifndef __rtems__
258static void *noobj_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
259#endif /* __rtems__ */
260static void *page_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
261#ifndef __rtems__
262static void *startup_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
263#endif /* __rtems__ */
264static void page_free(void *, vm_size_t, uint8_t);
265static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
266static void cache_drain(uma_zone_t);
267static void bucket_drain(uma_zone_t, uma_bucket_t);
268static void bucket_cache_drain(uma_zone_t zone);
269static int keg_ctor(void *, int, void *, int);
270static void keg_dtor(void *, int, void *);
271static int zone_ctor(void *, int, void *, int);
272static void zone_dtor(void *, int, void *);
273static int zero_init(void *, int, int);
274static void keg_small_init(uma_keg_t keg);
275static void keg_large_init(uma_keg_t keg);
276static void zone_foreach(void (*zfunc)(uma_zone_t));
277static void zone_timeout(uma_zone_t zone);
278static int hash_alloc(struct uma_hash *);
279static int hash_expand(struct uma_hash *, struct uma_hash *);
280static void hash_free(struct uma_hash *hash);
281static void uma_timeout(void *);
282static void uma_startup3(void);
283static void *zone_alloc_item(uma_zone_t, void *, int);
284static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
285static void bucket_enable(void);
286static void bucket_init(void);
287static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
288static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
289static void bucket_zone_drain(void);
290static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, void *, int flags);
291static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
292#ifndef __rtems__
293static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
294#endif /* __rtems__ */
295static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
296static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
297static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
298    uma_fini fini, int align, uint32_t flags);
299static int zone_import(uma_zone_t zone, void **bucket, int max, int flags);
300static void zone_release(uma_zone_t zone, void **bucket, int cnt);
301static void uma_zero_item(void *item, uma_zone_t zone);
302
303void uma_print_zone(uma_zone_t);
304void uma_print_stats(void);
305static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
306static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
307
308#ifdef INVARIANTS
309static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item);
310static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item);
311#endif
312
313SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
314
315SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
316    0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
317
318SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
319    0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
320
321static int zone_warnings = 1;
322SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0,
323    "Warn when UMA zones becomes full");
324
325/*
326 * This routine checks to see whether or not it's safe to enable buckets.
327 */
328static void
329bucket_enable(void)
330{
331#ifndef __rtems__
332        bucketdisable = vm_page_count_min();
333#endif /* __rtems__ */
334}
335
336/*
337 * Initialize bucket_zones, the array of zones of buckets of various sizes.
338 *
339 * For each zone, calculate the memory required for each bucket, consisting
340 * of the header and an array of pointers.
341 */
342static void
343bucket_init(void)
344{
345        struct uma_bucket_zone *ubz;
346        int size;
347
348        for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
349                size = roundup(sizeof(struct uma_bucket), sizeof(void *));
350                size += sizeof(void *) * ubz->ubz_entries;
351                ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
352                    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
353                    UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET);
354        }
355}
356
357/*
358 * Given a desired number of entries for a bucket, return the zone from which
359 * to allocate the bucket.
360 */
361static struct uma_bucket_zone *
362bucket_zone_lookup(int entries)
363{
364        struct uma_bucket_zone *ubz;
365
366        for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
367                if (ubz->ubz_entries >= entries)
368                        return (ubz);
369        ubz--;
370        return (ubz);
371}
372
373static int
374bucket_select(int size)
375{
376        struct uma_bucket_zone *ubz;
377
378        ubz = &bucket_zones[0];
379        if (size > ubz->ubz_maxsize)
380                return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
381
382        for (; ubz->ubz_entries != 0; ubz++)
383                if (ubz->ubz_maxsize < size)
384                        break;
385        ubz--;
386        return (ubz->ubz_entries);
387}
388
389static uma_bucket_t
390bucket_alloc(uma_zone_t zone, void *udata, int flags)
391{
392        struct uma_bucket_zone *ubz;
393        uma_bucket_t bucket;
394
395#ifndef __rtems__
396        /*
397         * This is to stop us from allocating per cpu buckets while we're
398         * running out of vm.boot_pages.  Otherwise, we would exhaust the
399         * boot pages.  This also prevents us from allocating buckets in
400         * low memory situations.
401         */
402        if (bucketdisable)
403                return (NULL);
404#endif /* __rtems__ */
405        /*
406         * To limit bucket recursion we store the original zone flags
407         * in a cookie passed via zalloc_arg/zfree_arg.  This allows the
408         * NOVM flag to persist even through deep recursions.  We also
409         * store ZFLAG_BUCKET once we have recursed attempting to allocate
410         * a bucket for a bucket zone so we do not allow infinite bucket
411         * recursion.  This cookie will even persist to frees of unused
412         * buckets via the allocation path or bucket allocations in the
413         * free path.
414         */
415        if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
416                udata = (void *)(uintptr_t)zone->uz_flags;
417        else {
418                if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
419                        return (NULL);
420                udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET);
421        }
422        if ((uintptr_t)udata & UMA_ZFLAG_CACHEONLY)
423                flags |= M_NOVM;
424        ubz = bucket_zone_lookup(zone->uz_count);
425        if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0)
426                ubz++;
427        bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags);
428        if (bucket) {
429#ifdef INVARIANTS
430                bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
431#endif
432                bucket->ub_cnt = 0;
433                bucket->ub_entries = ubz->ubz_entries;
434        }
435
436        return (bucket);
437}
438
439static void
440bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
441{
442        struct uma_bucket_zone *ubz;
443
444        KASSERT(bucket->ub_cnt == 0,
445            ("bucket_free: Freeing a non free bucket."));
446        if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
447                udata = (void *)(uintptr_t)zone->uz_flags;
448        ubz = bucket_zone_lookup(bucket->ub_entries);
449        uma_zfree_arg(ubz->ubz_zone, bucket, udata);
450}
451
452static void
453bucket_zone_drain(void)
454{
455        struct uma_bucket_zone *ubz;
456
457        for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
458                zone_drain(ubz->ubz_zone);
459}
460
461static void
462zone_log_warning(uma_zone_t zone)
463{
464        static const struct timeval warninterval = { 300, 0 };
465
466        if (!zone_warnings || zone->uz_warning == NULL)
467                return;
468
469        if (ratecheck(&zone->uz_ratecheck, &warninterval))
470                printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
471}
472
473static inline void
474zone_maxaction(uma_zone_t zone)
475{
476
477        if (zone->uz_maxaction.ta_func != NULL)
478                taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction);
479}
480
481static void
482zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
483{
484        uma_klink_t klink;
485
486        LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
487                kegfn(klink->kl_keg);
488}
489
490/*
491 * Routine called by timeout which is used to fire off some time interval
492 * based calculations.  (stats, hash size, etc.)
493 *
494 * Arguments:
495 *      arg   Unused
496 *
497 * Returns:
498 *      Nothing
499 */
500static void
501uma_timeout(void *unused)
502{
503        bucket_enable();
504        zone_foreach(zone_timeout);
505
506        /* Reschedule this event */
507        callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
508}
509
510/*
511 * Routine to perform timeout driven calculations.  This expands the
512 * hashes and does per cpu statistics aggregation.
513 *
514 *  Returns nothing.
515 */
516static void
517keg_timeout(uma_keg_t keg)
518{
519
520        KEG_LOCK(keg);
521        /*
522         * Expand the keg hash table.
523         *
524         * This is done if the number of slabs is larger than the hash size.
525         * What I'm trying to do here is completely reduce collisions.  This
526         * may be a little aggressive.  Should I allow for two collisions max?
527         */
528        if (keg->uk_flags & UMA_ZONE_HASH &&
529            keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
530                struct uma_hash newhash;
531                struct uma_hash oldhash;
532                int ret;
533
534                /*
535                 * This is so involved because allocating and freeing
536                 * while the keg lock is held will lead to deadlock.
537                 * I have to do everything in stages and check for
538                 * races.
539                 */
540                newhash = keg->uk_hash;
541                KEG_UNLOCK(keg);
542                ret = hash_alloc(&newhash);
543                KEG_LOCK(keg);
544                if (ret) {
545                        if (hash_expand(&keg->uk_hash, &newhash)) {
546                                oldhash = keg->uk_hash;
547                                keg->uk_hash = newhash;
548                        } else
549                                oldhash = newhash;
550
551                        KEG_UNLOCK(keg);
552                        hash_free(&oldhash);
553                        return;
554                }
555        }
556        KEG_UNLOCK(keg);
557}
558
559static void
560zone_timeout(uma_zone_t zone)
561{
562
563        zone_foreach_keg(zone, &keg_timeout);
564}
565
566/*
567 * Allocate and zero fill the next sized hash table from the appropriate
568 * backing store.
569 *
570 * Arguments:
571 *      hash  A new hash structure with the old hash size in uh_hashsize
572 *
573 * Returns:
574 *      1 on success and 0 on failure.
575 */
576static int
577hash_alloc(struct uma_hash *hash)
578{
579        int oldsize;
580        int alloc;
581
582        oldsize = hash->uh_hashsize;
583
584        /* We're just going to go to a power of two greater */
585        if (oldsize)  {
586                hash->uh_hashsize = oldsize * 2;
587                alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
588                hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
589                    M_UMAHASH, M_NOWAIT);
590        } else {
591                alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
592                hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
593                    M_WAITOK);
594                hash->uh_hashsize = UMA_HASH_SIZE_INIT;
595        }
596        if (hash->uh_slab_hash) {
597                bzero(hash->uh_slab_hash, alloc);
598                hash->uh_hashmask = hash->uh_hashsize - 1;
599                return (1);
600        }
601
602        return (0);
603}
604
605/*
606 * Expands the hash table for HASH zones.  This is done from zone_timeout
607 * to reduce collisions.  This must not be done in the regular allocation
608 * path, otherwise, we can recurse on the vm while allocating pages.
609 *
610 * Arguments:
611 *      oldhash  The hash you want to expand
612 *      newhash  The hash structure for the new table
613 *
614 * Returns:
615 *      Nothing
616 *
617 * Discussion:
618 */
619static int
620hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
621{
622        uma_slab_t slab;
623        int hval;
624        int i;
625
626        if (!newhash->uh_slab_hash)
627                return (0);
628
629        if (oldhash->uh_hashsize >= newhash->uh_hashsize)
630                return (0);
631
632        /*
633         * I need to investigate hash algorithms for resizing without a
634         * full rehash.
635         */
636
637        for (i = 0; i < oldhash->uh_hashsize; i++)
638                while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
639                        slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
640                        SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
641                        hval = UMA_HASH(newhash, slab->us_data);
642                        SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
643                            slab, us_hlink);
644                }
645
646        return (1);
647}
648
649/*
650 * Free the hash bucket to the appropriate backing store.
651 *
652 * Arguments:
653 *      slab_hash  The hash bucket we're freeing
654 *      hashsize   The number of entries in that hash bucket
655 *
656 * Returns:
657 *      Nothing
658 */
659static void
660hash_free(struct uma_hash *hash)
661{
662        if (hash->uh_slab_hash == NULL)
663                return;
664        if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
665                zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
666        else
667                free(hash->uh_slab_hash, M_UMAHASH);
668}
669
670/*
671 * Frees all outstanding items in a bucket
672 *
673 * Arguments:
674 *      zone   The zone to free to, must be unlocked.
675 *      bucket The free/alloc bucket with items, cpu queue must be locked.
676 *
677 * Returns:
678 *      Nothing
679 */
680
681static void
682bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
683{
684        int i;
685
686        if (bucket == NULL)
687                return;
688
689        if (zone->uz_fini)
690                for (i = 0; i < bucket->ub_cnt; i++)
691                        zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
692        zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
693        bucket->ub_cnt = 0;
694}
695
696/*
697 * Drains the per cpu caches for a zone.
698 *
699 * NOTE: This may only be called while the zone is being turn down, and not
700 * during normal operation.  This is necessary in order that we do not have
701 * to migrate CPUs to drain the per-CPU caches.
702 *
703 * Arguments:
704 *      zone     The zone to drain, must be unlocked.
705 *
706 * Returns:
707 *      Nothing
708 */
709static void
710cache_drain(uma_zone_t zone)
711{
712        uma_cache_t cache;
713        int cpu;
714
715        /*
716         * XXX: It is safe to not lock the per-CPU caches, because we're
717         * tearing down the zone anyway.  I.e., there will be no further use
718         * of the caches at this point.
719         *
720         * XXX: It would good to be able to assert that the zone is being
721         * torn down to prevent improper use of cache_drain().
722         *
723         * XXX: We lock the zone before passing into bucket_cache_drain() as
724         * it is used elsewhere.  Should the tear-down path be made special
725         * there in some form?
726         */
727        CPU_FOREACH(cpu) {
728                cache = &zone->uz_cpu[cpu];
729                bucket_drain(zone, cache->uc_allocbucket);
730                bucket_drain(zone, cache->uc_freebucket);
731                if (cache->uc_allocbucket != NULL)
732                        bucket_free(zone, cache->uc_allocbucket, NULL);
733                if (cache->uc_freebucket != NULL)
734                        bucket_free(zone, cache->uc_freebucket, NULL);
735                cache->uc_allocbucket = cache->uc_freebucket = NULL;
736        }
737        ZONE_LOCK(zone);
738        bucket_cache_drain(zone);
739        ZONE_UNLOCK(zone);
740}
741
742#ifndef __rtems__
743static void
744cache_shrink(uma_zone_t zone)
745{
746
747        if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
748                return;
749
750        ZONE_LOCK(zone);
751        zone->uz_count = (zone->uz_count_min + zone->uz_count) / 2;
752        ZONE_UNLOCK(zone);
753}
754
755static void
756cache_drain_safe_cpu(uma_zone_t zone)
757{
758        uma_cache_t cache;
759        uma_bucket_t b1, b2;
760
761        if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
762                return;
763
764        b1 = b2 = NULL;
765        ZONE_LOCK(zone);
766        critical_enter();
767        cache = &zone->uz_cpu[curcpu];
768        if (cache->uc_allocbucket) {
769                if (cache->uc_allocbucket->ub_cnt != 0)
770                        LIST_INSERT_HEAD(&zone->uz_buckets,
771                            cache->uc_allocbucket, ub_link);
772                else
773                        b1 = cache->uc_allocbucket;
774                cache->uc_allocbucket = NULL;
775        }
776        if (cache->uc_freebucket) {
777                if (cache->uc_freebucket->ub_cnt != 0)
778                        LIST_INSERT_HEAD(&zone->uz_buckets,
779                            cache->uc_freebucket, ub_link);
780                else
781                        b2 = cache->uc_freebucket;
782                cache->uc_freebucket = NULL;
783        }
784        critical_exit();
785        ZONE_UNLOCK(zone);
786        if (b1)
787                bucket_free(zone, b1, NULL);
788        if (b2)
789                bucket_free(zone, b2, NULL);
790}
791
792/*
793 * Safely drain per-CPU caches of a zone(s) to alloc bucket.
794 * This is an expensive call because it needs to bind to all CPUs
795 * one by one and enter a critical section on each of them in order
796 * to safely access their cache buckets.
797 * Zone lock must not be held on call this function.
798 */
799static void
800cache_drain_safe(uma_zone_t zone)
801{
802        int cpu;
803
804        /*
805         * Polite bucket sizes shrinking was not enouth, shrink aggressively.
806         */
807        if (zone)
808                cache_shrink(zone);
809        else
810                zone_foreach(cache_shrink);
811
812        CPU_FOREACH(cpu) {
813                thread_lock(curthread);
814                sched_bind(curthread, cpu);
815                thread_unlock(curthread);
816
817                if (zone)
818                        cache_drain_safe_cpu(zone);
819                else
820                        zone_foreach(cache_drain_safe_cpu);
821        }
822        thread_lock(curthread);
823        sched_unbind(curthread);
824        thread_unlock(curthread);
825}
826#endif /* __rtems__ */
827
828/*
829 * Drain the cached buckets from a zone.  Expects a locked zone on entry.
830 */
831static void
832bucket_cache_drain(uma_zone_t zone)
833{
834        uma_bucket_t bucket;
835
836        /*
837         * Drain the bucket queues and free the buckets, we just keep two per
838         * cpu (alloc/free).
839         */
840        while ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
841                LIST_REMOVE(bucket, ub_link);
842                ZONE_UNLOCK(zone);
843                bucket_drain(zone, bucket);
844                bucket_free(zone, bucket, NULL);
845                ZONE_LOCK(zone);
846        }
847
848        /*
849         * Shrink further bucket sizes.  Price of single zone lock collision
850         * is probably lower then price of global cache drain.
851         */
852        if (zone->uz_count > zone->uz_count_min)
853                zone->uz_count--;
854}
855
856static void
857keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
858{
859        uint8_t *mem;
860        int i;
861        uint8_t flags;
862
863        mem = slab->us_data;
864        flags = slab->us_flags;
865        i = start;
866        if (keg->uk_fini != NULL) {
867                for (i--; i > -1; i--)
868                        keg->uk_fini(slab->us_data + (keg->uk_rsize * i),
869                            keg->uk_size);
870        }
871        if (keg->uk_flags & UMA_ZONE_OFFPAGE)
872                zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
873#ifdef UMA_DEBUG
874        printf("%s: Returning %d bytes.\n", keg->uk_name,
875            PAGE_SIZE * keg->uk_ppera);
876#endif
877        keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
878}
879
880/*
881 * Frees pages from a keg back to the system.  This is done on demand from
882 * the pageout daemon.
883 *
884 * Returns nothing.
885 */
886static void
887keg_drain(uma_keg_t keg)
888{
889        struct slabhead freeslabs = { 0 };
890        uma_slab_t slab, tmp;
891
892        /*
893         * We don't want to take pages from statically allocated kegs at this
894         * time
895         */
896        if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
897                return;
898
899#ifdef UMA_DEBUG
900        printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
901#endif
902        KEG_LOCK(keg);
903        if (keg->uk_free == 0)
904                goto finished;
905
906        LIST_FOREACH_SAFE(slab, &keg->uk_free_slab, us_link, tmp) {
907#ifndef __rtems__
908                /* We have nowhere to free these to. */
909                if (slab->us_flags & UMA_SLAB_BOOT)
910                        continue;
911#endif /* __rtems__ */
912
913                LIST_REMOVE(slab, us_link);
914                keg->uk_pages -= keg->uk_ppera;
915                keg->uk_free -= keg->uk_ipers;
916
917                if (keg->uk_flags & UMA_ZONE_HASH)
918                        UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
919
920                SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
921        }
922finished:
923        KEG_UNLOCK(keg);
924
925        while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
926                SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
927                keg_free_slab(keg, slab, keg->uk_ipers);
928        }
929}
930
931static void
932zone_drain_wait(uma_zone_t zone, int waitok)
933{
934
935        /*
936         * Set draining to interlock with zone_dtor() so we can release our
937         * locks as we go.  Only dtor() should do a WAITOK call since it
938         * is the only call that knows the structure will still be available
939         * when it wakes up.
940         */
941        ZONE_LOCK(zone);
942        while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
943                if (waitok == M_NOWAIT)
944                        goto out;
945                msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
946        }
947        zone->uz_flags |= UMA_ZFLAG_DRAINING;
948        bucket_cache_drain(zone);
949        ZONE_UNLOCK(zone);
950        /*
951         * The DRAINING flag protects us from being freed while
952         * we're running.  Normally the uma_rwlock would protect us but we
953         * must be able to release and acquire the right lock for each keg.
954         */
955        zone_foreach_keg(zone, &keg_drain);
956        ZONE_LOCK(zone);
957        zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
958        wakeup(zone);
959out:
960        ZONE_UNLOCK(zone);
961}
962
963void
964zone_drain(uma_zone_t zone)
965{
966
967        zone_drain_wait(zone, M_NOWAIT);
968}
969
970/*
971 * Allocate a new slab for a keg.  This does not insert the slab onto a list.
972 *
973 * Arguments:
974 *      wait  Shall we wait?
975 *
976 * Returns:
977 *      The slab that was allocated or NULL if there is no memory and the
978 *      caller specified M_NOWAIT.
979 */
980static uma_slab_t
981keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
982{
983        uma_alloc allocf;
984        uma_slab_t slab;
985        uint8_t *mem;
986        uint8_t flags;
987        int i;
988
989        mtx_assert(&keg->uk_lock, MA_OWNED);
990        slab = NULL;
991        mem = NULL;
992
993#ifdef UMA_DEBUG
994        printf("alloc_slab:  Allocating a new slab for %s\n", keg->uk_name);
995#endif
996        allocf = keg->uk_allocf;
997        KEG_UNLOCK(keg);
998
999        if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1000                slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
1001                if (slab == NULL)
1002                        goto out;
1003        }
1004
1005        /*
1006         * This reproduces the old vm_zone behavior of zero filling pages the
1007         * first time they are added to a zone.
1008         *
1009         * Malloced items are zeroed in uma_zalloc.
1010         */
1011
1012        if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1013                wait |= M_ZERO;
1014        else
1015                wait &= ~M_ZERO;
1016
1017        if (keg->uk_flags & UMA_ZONE_NODUMP)
1018                wait |= M_NODUMP;
1019
1020        /* zone is passed for legacy reasons. */
1021        mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait);
1022        if (mem == NULL) {
1023                if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1024                        zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
1025                slab = NULL;
1026                goto out;
1027        }
1028
1029        /* Point the slab into the allocated memory */
1030        if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
1031                slab = (uma_slab_t )(mem + keg->uk_pgoff);
1032
1033        if (keg->uk_flags & UMA_ZONE_VTOSLAB)
1034                for (i = 0; i < keg->uk_ppera; i++)
1035                        vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
1036
1037        slab->us_keg = keg;
1038        slab->us_data = mem;
1039        slab->us_freecount = keg->uk_ipers;
1040        slab->us_flags = flags;
1041        BIT_FILL(SLAB_SETSIZE, &slab->us_free);
1042#ifdef INVARIANTS
1043        BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree);
1044#endif
1045
1046        if (keg->uk_init != NULL) {
1047                for (i = 0; i < keg->uk_ipers; i++)
1048                        if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
1049                            keg->uk_size, wait) != 0)
1050                                break;
1051                if (i != keg->uk_ipers) {
1052                        keg_free_slab(keg, slab, i);
1053                        slab = NULL;
1054                        goto out;
1055                }
1056        }
1057out:
1058        KEG_LOCK(keg);
1059
1060        if (slab != NULL) {
1061                if (keg->uk_flags & UMA_ZONE_HASH)
1062                        UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
1063
1064                keg->uk_pages += keg->uk_ppera;
1065                keg->uk_free += keg->uk_ipers;
1066        }
1067
1068        return (slab);
1069}
1070
1071#ifndef __rtems__
1072/*
1073 * This function is intended to be used early on in place of page_alloc() so
1074 * that we may use the boot time page cache to satisfy allocations before
1075 * the VM is ready.
1076 */
1077static void *
1078startup_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
1079{
1080        uma_keg_t keg;
1081        uma_slab_t tmps;
1082        int pages, check_pages;
1083
1084        keg = zone_first_keg(zone);
1085        pages = howmany(bytes, PAGE_SIZE);
1086        check_pages = pages - 1;
1087        KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
1088
1089        /*
1090         * Check our small startup cache to see if it has pages remaining.
1091         */
1092        mtx_lock(&uma_boot_pages_mtx);
1093
1094        /* First check if we have enough room. */
1095        tmps = LIST_FIRST(&uma_boot_pages);
1096        while (tmps != NULL && check_pages-- > 0)
1097                tmps = LIST_NEXT(tmps, us_link);
1098        if (tmps != NULL) {
1099                /*
1100                 * It's ok to lose tmps references.  The last one will
1101                 * have tmps->us_data pointing to the start address of
1102                 * "pages" contiguous pages of memory.
1103                 */
1104                while (pages-- > 0) {
1105                        tmps = LIST_FIRST(&uma_boot_pages);
1106                        LIST_REMOVE(tmps, us_link);
1107                }
1108                mtx_unlock(&uma_boot_pages_mtx);
1109                *pflag = tmps->us_flags;
1110                return (tmps->us_data);
1111        }
1112        mtx_unlock(&uma_boot_pages_mtx);
1113        if (booted < UMA_STARTUP2)
1114                panic("UMA: Increase vm.boot_pages");
1115        /*
1116         * Now that we've booted reset these users to their real allocator.
1117         */
1118#ifdef UMA_MD_SMALL_ALLOC
1119        keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
1120#else
1121        keg->uk_allocf = page_alloc;
1122#endif
1123        return keg->uk_allocf(zone, bytes, pflag, wait);
1124}
1125#endif /* __rtems__ */
1126
1127/*
1128 * Allocates a number of pages from the system
1129 *
1130 * Arguments:
1131 *      bytes  The number of bytes requested
1132 *      wait  Shall we wait?
1133 *
1134 * Returns:
1135 *      A pointer to the alloced memory or possibly
1136 *      NULL if M_NOWAIT is set.
1137 */
1138static void *
1139page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
1140{
1141        void *p;        /* Returned page */
1142
1143#ifndef __rtems__
1144        *pflag = UMA_SLAB_KMEM;
1145        p = (void *) kmem_malloc(kmem_arena, bytes, wait);
1146#else /* __rtems__ */
1147        *pflag = 0;
1148        p = rtems_bsd_page_alloc(bytes, wait);
1149#endif /* __rtems__ */
1150
1151        return (p);
1152}
1153
1154#ifndef __rtems__
1155/*
1156 * Allocates a number of pages from within an object
1157 *
1158 * Arguments:
1159 *      bytes  The number of bytes requested
1160 *      wait   Shall we wait?
1161 *
1162 * Returns:
1163 *      A pointer to the alloced memory or possibly
1164 *      NULL if M_NOWAIT is set.
1165 */
1166static void *
1167noobj_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait)
1168{
1169        TAILQ_HEAD(, vm_page) alloctail;
1170        u_long npages;
1171        vm_offset_t retkva, zkva;
1172        vm_page_t p, p_next;
1173        uma_keg_t keg;
1174
1175        TAILQ_INIT(&alloctail);
1176        keg = zone_first_keg(zone);
1177
1178        npages = howmany(bytes, PAGE_SIZE);
1179        while (npages > 0) {
1180                p = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT |
1181                    VM_ALLOC_WIRED | VM_ALLOC_NOOBJ);
1182                if (p != NULL) {
1183                        /*
1184                         * Since the page does not belong to an object, its
1185                         * listq is unused.
1186                         */
1187                        TAILQ_INSERT_TAIL(&alloctail, p, listq);
1188                        npages--;
1189                        continue;
1190                }
1191                if (wait & M_WAITOK) {
1192                        VM_WAIT;
1193                        continue;
1194                }
1195
1196                /*
1197                 * Page allocation failed, free intermediate pages and
1198                 * exit.
1199                 */
1200                TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
1201                        vm_page_unwire(p, PQ_NONE);
1202                        vm_page_free(p);
1203                }
1204                return (NULL);
1205        }
1206        *flags = UMA_SLAB_PRIV;
1207        zkva = keg->uk_kva +
1208            atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
1209        retkva = zkva;
1210        TAILQ_FOREACH(p, &alloctail, listq) {
1211                pmap_qenter(zkva, &p, 1);
1212                zkva += PAGE_SIZE;
1213        }
1214
1215        return ((void *)retkva);
1216}
1217#endif /* __rtems__ */
1218
1219/*
1220 * Frees a number of pages to the system
1221 *
1222 * Arguments:
1223 *      mem   A pointer to the memory to be freed
1224 *      size  The size of the memory being freed
1225 *      flags The original p->us_flags field
1226 *
1227 * Returns:
1228 *      Nothing
1229 */
1230static void
1231page_free(void *mem, vm_size_t size, uint8_t flags)
1232{
1233#ifndef __rtems__
1234        struct vmem *vmem;
1235
1236        if (flags & UMA_SLAB_KMEM)
1237                vmem = kmem_arena;
1238        else if (flags & UMA_SLAB_KERNEL)
1239                vmem = kernel_arena;
1240        else
1241                panic("UMA: page_free used with invalid flags %x", flags);
1242
1243        kmem_free(vmem, (vm_offset_t)mem, size);
1244#else /* __rtems__ */
1245        if (flags & UMA_SLAB_KERNEL)
1246                free(mem, M_TEMP);
1247        else
1248                rtems_bsd_page_free(mem);
1249#endif /* __rtems__ */
1250}
1251
1252/*
1253 * Zero fill initializer
1254 *
1255 * Arguments/Returns follow uma_init specifications
1256 */
1257static int
1258zero_init(void *mem, int size, int flags)
1259{
1260        bzero(mem, size);
1261        return (0);
1262}
1263
1264/*
1265 * Finish creating a small uma keg.  This calculates ipers, and the keg size.
1266 *
1267 * Arguments
1268 *      keg  The zone we should initialize
1269 *
1270 * Returns
1271 *      Nothing
1272 */
1273static void
1274keg_small_init(uma_keg_t keg)
1275{
1276        u_int rsize;
1277        u_int memused;
1278        u_int wastedspace;
1279        u_int shsize;
1280        u_int slabsize;
1281
1282        if (keg->uk_flags & UMA_ZONE_PCPU) {
1283                u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU;
1284
1285                slabsize = sizeof(struct pcpu);
1286                keg->uk_ppera = howmany(ncpus * sizeof(struct pcpu),
1287                    PAGE_SIZE);
1288        } else {
1289                slabsize = UMA_SLAB_SIZE;
1290                keg->uk_ppera = 1;
1291        }
1292
1293        /*
1294         * Calculate the size of each allocation (rsize) according to
1295         * alignment.  If the requested size is smaller than we have
1296         * allocation bits for we round it up.
1297         */
1298        rsize = keg->uk_size;
1299        if (rsize < slabsize / SLAB_SETSIZE)
1300                rsize = slabsize / SLAB_SETSIZE;
1301        if (rsize & keg->uk_align)
1302                rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1303        keg->uk_rsize = rsize;
1304
1305        KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
1306            keg->uk_rsize < sizeof(struct pcpu),
1307            ("%s: size %u too large", __func__, keg->uk_rsize));
1308
1309        if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1310                shsize = 0;
1311        else
1312                shsize = sizeof(struct uma_slab);
1313
1314        keg->uk_ipers = (slabsize - shsize) / rsize;
1315        KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1316            ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1317
1318        memused = keg->uk_ipers * rsize + shsize;
1319        wastedspace = slabsize - memused;
1320
1321        /*
1322         * We can't do OFFPAGE if we're internal or if we've been
1323         * asked to not go to the VM for buckets.  If we do this we
1324         * may end up going to the VM  for slabs which we do not
1325         * want to do if we're UMA_ZFLAG_CACHEONLY as a result
1326         * of UMA_ZONE_VM, which clearly forbids it.
1327         */
1328        if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1329            (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1330                return;
1331
1332        /*
1333         * See if using an OFFPAGE slab will limit our waste.  Only do
1334         * this if it permits more items per-slab.
1335         *
1336         * XXX We could try growing slabsize to limit max waste as well.
1337         * Historically this was not done because the VM could not
1338         * efficiently handle contiguous allocations.
1339         */
1340        if ((wastedspace >= slabsize / UMA_MAX_WASTE) &&
1341            (keg->uk_ipers < (slabsize / keg->uk_rsize))) {
1342                keg->uk_ipers = slabsize / keg->uk_rsize;
1343                KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1344                    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1345#ifdef UMA_DEBUG
1346                printf("UMA decided we need offpage slab headers for "
1347                    "keg: %s, calculated wastedspace = %d, "
1348                    "maximum wasted space allowed = %d, "
1349                    "calculated ipers = %d, "
1350                    "new wasted space = %d\n", keg->uk_name, wastedspace,
1351                    slabsize / UMA_MAX_WASTE, keg->uk_ipers,
1352                    slabsize - keg->uk_ipers * keg->uk_rsize);
1353#endif
1354                keg->uk_flags |= UMA_ZONE_OFFPAGE;
1355        }
1356
1357        if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1358            (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1359                keg->uk_flags |= UMA_ZONE_HASH;
1360}
1361
1362/*
1363 * Finish creating a large (> UMA_SLAB_SIZE) uma kegs.  Just give in and do
1364 * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
1365 * more complicated.
1366 *
1367 * Arguments
1368 *      keg  The keg we should initialize
1369 *
1370 * Returns
1371 *      Nothing
1372 */
1373static void
1374keg_large_init(uma_keg_t keg)
1375{
1376        u_int shsize;
1377
1378        KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
1379        KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1380            ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
1381        KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1382            ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
1383
1384        keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
1385        keg->uk_ipers = 1;
1386        keg->uk_rsize = keg->uk_size;
1387
1388        /* We can't do OFFPAGE if we're internal, bail out here. */
1389        if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
1390                return;
1391
1392        /* Check whether we have enough space to not do OFFPAGE. */
1393        if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0) {
1394                shsize = sizeof(struct uma_slab);
1395                if (shsize & UMA_ALIGN_PTR)
1396                        shsize = (shsize & ~UMA_ALIGN_PTR) +
1397                            (UMA_ALIGN_PTR + 1);
1398
1399                if ((PAGE_SIZE * keg->uk_ppera) - keg->uk_rsize < shsize)
1400                        keg->uk_flags |= UMA_ZONE_OFFPAGE;
1401        }
1402
1403        if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1404            (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1405                keg->uk_flags |= UMA_ZONE_HASH;
1406}
1407
1408static void
1409keg_cachespread_init(uma_keg_t keg)
1410{
1411        int alignsize;
1412        int trailer;
1413        int pages;
1414        int rsize;
1415
1416        KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1417            ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
1418
1419        alignsize = keg->uk_align + 1;
1420        rsize = keg->uk_size;
1421        /*
1422         * We want one item to start on every align boundary in a page.  To
1423         * do this we will span pages.  We will also extend the item by the
1424         * size of align if it is an even multiple of align.  Otherwise, it
1425         * would fall on the same boundary every time.
1426         */
1427        if (rsize & keg->uk_align)
1428                rsize = (rsize & ~keg->uk_align) + alignsize;
1429        if ((rsize & alignsize) == 0)
1430                rsize += alignsize;
1431        trailer = rsize - keg->uk_size;
1432        pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
1433        pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
1434        keg->uk_rsize = rsize;
1435        keg->uk_ppera = pages;
1436        keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
1437        keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
1438        KASSERT(keg->uk_ipers <= SLAB_SETSIZE,
1439            ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
1440            keg->uk_ipers));
1441}
1442
1443/*
1444 * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
1445 * the keg onto the global keg list.
1446 *
1447 * Arguments/Returns follow uma_ctor specifications
1448 *      udata  Actually uma_kctor_args
1449 */
1450static int
1451keg_ctor(void *mem, int size, void *udata, int flags)
1452{
1453        struct uma_kctor_args *arg = udata;
1454        uma_keg_t keg = mem;
1455        uma_zone_t zone;
1456
1457        bzero(keg, size);
1458        keg->uk_size = arg->size;
1459        keg->uk_init = arg->uminit;
1460        keg->uk_fini = arg->fini;
1461        keg->uk_align = arg->align;
1462        keg->uk_free = 0;
1463        keg->uk_reserve = 0;
1464        keg->uk_pages = 0;
1465        keg->uk_flags = arg->flags;
1466        keg->uk_allocf = page_alloc;
1467        keg->uk_freef = page_free;
1468        keg->uk_slabzone = NULL;
1469
1470        /*
1471         * The master zone is passed to us at keg-creation time.
1472         */
1473        zone = arg->zone;
1474        keg->uk_name = zone->uz_name;
1475
1476        if (arg->flags & UMA_ZONE_VM)
1477                keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1478
1479        if (arg->flags & UMA_ZONE_ZINIT)
1480                keg->uk_init = zero_init;
1481
1482        if (arg->flags & UMA_ZONE_MALLOC)
1483                keg->uk_flags |= UMA_ZONE_VTOSLAB;
1484
1485        if (arg->flags & UMA_ZONE_PCPU)
1486#ifdef SMP
1487                keg->uk_flags |= UMA_ZONE_OFFPAGE;
1488#else
1489                keg->uk_flags &= ~UMA_ZONE_PCPU;
1490#endif
1491
1492        if (keg->uk_flags & UMA_ZONE_CACHESPREAD) {
1493                keg_cachespread_init(keg);
1494        } else {
1495                if (keg->uk_size > (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1496                        keg_large_init(keg);
1497                else
1498                        keg_small_init(keg);
1499        }
1500
1501        if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1502                keg->uk_slabzone = slabzone;
1503
1504        /*
1505         * If we haven't booted yet we need allocations to go through the
1506         * startup cache until the vm is ready.
1507         */
1508        if (keg->uk_ppera == 1) {
1509#ifdef UMA_MD_SMALL_ALLOC
1510                keg->uk_allocf = uma_small_alloc;
1511                keg->uk_freef = uma_small_free;
1512
1513#ifndef __rtems__
1514                if (booted < UMA_STARTUP)
1515                        keg->uk_allocf = startup_alloc;
1516#endif /* __rtems__ */
1517#else
1518#ifndef __rtems__
1519                if (booted < UMA_STARTUP2)
1520                        keg->uk_allocf = startup_alloc;
1521#endif /* __rtems__ */
1522#endif
1523#ifndef __rtems__
1524        } else if (booted < UMA_STARTUP2 &&
1525            (keg->uk_flags & UMA_ZFLAG_INTERNAL))
1526                keg->uk_allocf = startup_alloc;
1527#else /* __rtems__ */
1528        }
1529#endif /* __rtems__ */
1530
1531        /*
1532         * Initialize keg's lock
1533         */
1534        KEG_LOCK_INIT(keg, (arg->flags & UMA_ZONE_MTXCLASS));
1535
1536        /*
1537         * If we're putting the slab header in the actual page we need to
1538         * figure out where in each page it goes.  This calculates a right
1539         * justified offset into the memory on an ALIGN_PTR boundary.
1540         */
1541        if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1542                u_int totsize;
1543
1544                /* Size of the slab struct and free list */
1545                totsize = sizeof(struct uma_slab);
1546
1547                if (totsize & UMA_ALIGN_PTR)
1548                        totsize = (totsize & ~UMA_ALIGN_PTR) +
1549                            (UMA_ALIGN_PTR + 1);
1550                keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize;
1551
1552                /*
1553                 * The only way the following is possible is if with our
1554                 * UMA_ALIGN_PTR adjustments we are now bigger than
1555                 * UMA_SLAB_SIZE.  I haven't checked whether this is
1556                 * mathematically possible for all cases, so we make
1557                 * sure here anyway.
1558                 */
1559                totsize = keg->uk_pgoff + sizeof(struct uma_slab);
1560                if (totsize > PAGE_SIZE * keg->uk_ppera) {
1561                        printf("zone %s ipers %d rsize %d size %d\n",
1562                            zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1563                            keg->uk_size);
1564                        panic("UMA slab won't fit.");
1565                }
1566        }
1567
1568        if (keg->uk_flags & UMA_ZONE_HASH)
1569                hash_alloc(&keg->uk_hash);
1570
1571#ifdef UMA_DEBUG
1572        printf("UMA: %s(%p) size %d(%d) flags %#x ipers %d ppera %d out %d free %d\n",
1573            zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
1574            keg->uk_ipers, keg->uk_ppera,
1575            (keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
1576            keg->uk_free);
1577#endif
1578
1579        LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1580
1581        rw_wlock(&uma_rwlock);
1582        LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1583        rw_wunlock(&uma_rwlock);
1584        return (0);
1585}
1586
1587/*
1588 * Zone header ctor.  This initializes all fields, locks, etc.
1589 *
1590 * Arguments/Returns follow uma_ctor specifications
1591 *      udata  Actually uma_zctor_args
1592 */
1593static int
1594zone_ctor(void *mem, int size, void *udata, int flags)
1595{
1596        struct uma_zctor_args *arg = udata;
1597        uma_zone_t zone = mem;
1598        uma_zone_t z;
1599        uma_keg_t keg;
1600
1601        bzero(zone, size);
1602        zone->uz_name = arg->name;
1603        zone->uz_ctor = arg->ctor;
1604        zone->uz_dtor = arg->dtor;
1605        zone->uz_slab = zone_fetch_slab;
1606        zone->uz_init = NULL;
1607        zone->uz_fini = NULL;
1608        zone->uz_allocs = 0;
1609        zone->uz_frees = 0;
1610        zone->uz_fails = 0;
1611        zone->uz_sleeps = 0;
1612        zone->uz_count = 0;
1613        zone->uz_count_min = 0;
1614        zone->uz_flags = 0;
1615        zone->uz_warning = NULL;
1616        timevalclear(&zone->uz_ratecheck);
1617        keg = arg->keg;
1618
1619        ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
1620
1621        /*
1622         * This is a pure cache zone, no kegs.
1623         */
1624        if (arg->import) {
1625                if (arg->flags & UMA_ZONE_VM)
1626                        arg->flags |= UMA_ZFLAG_CACHEONLY;
1627                zone->uz_flags = arg->flags;
1628                zone->uz_size = arg->size;
1629                zone->uz_import = arg->import;
1630                zone->uz_release = arg->release;
1631                zone->uz_arg = arg->arg;
1632                zone->uz_lockptr = &zone->uz_lock;
1633                rw_wlock(&uma_rwlock);
1634                LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
1635                rw_wunlock(&uma_rwlock);
1636                goto out;
1637        }
1638
1639        /*
1640         * Use the regular zone/keg/slab allocator.
1641         */
1642        zone->uz_import = (uma_import)zone_import;
1643        zone->uz_release = (uma_release)zone_release;
1644        zone->uz_arg = zone;
1645
1646        if (arg->flags & UMA_ZONE_SECONDARY) {
1647                KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1648                zone->uz_init = arg->uminit;
1649                zone->uz_fini = arg->fini;
1650                zone->uz_lockptr = &keg->uk_lock;
1651                zone->uz_flags |= UMA_ZONE_SECONDARY;
1652                rw_wlock(&uma_rwlock);
1653                ZONE_LOCK(zone);
1654                LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1655                        if (LIST_NEXT(z, uz_link) == NULL) {
1656                                LIST_INSERT_AFTER(z, zone, uz_link);
1657                                break;
1658                        }
1659                }
1660                ZONE_UNLOCK(zone);
1661                rw_wunlock(&uma_rwlock);
1662        } else if (keg == NULL) {
1663                if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1664                    arg->align, arg->flags)) == NULL)
1665                        return (ENOMEM);
1666        } else {
1667                struct uma_kctor_args karg;
1668                int error;
1669
1670                /* We should only be here from uma_startup() */
1671                karg.size = arg->size;
1672                karg.uminit = arg->uminit;
1673                karg.fini = arg->fini;
1674                karg.align = arg->align;
1675                karg.flags = arg->flags;
1676                karg.zone = zone;
1677                error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1678                    flags);
1679                if (error)
1680                        return (error);
1681        }
1682
1683        /*
1684         * Link in the first keg.
1685         */
1686        zone->uz_klink.kl_keg = keg;
1687        LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
1688        zone->uz_lockptr = &keg->uk_lock;
1689        zone->uz_size = keg->uk_size;
1690        zone->uz_flags |= (keg->uk_flags &
1691            (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
1692
1693        /*
1694         * Some internal zones don't have room allocated for the per cpu
1695         * caches.  If we're internal, bail out here.
1696         */
1697        if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1698                KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
1699                    ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1700                return (0);
1701        }
1702
1703out:
1704        if ((arg->flags & UMA_ZONE_MAXBUCKET) == 0)
1705                zone->uz_count = bucket_select(zone->uz_size);
1706        else
1707                zone->uz_count = BUCKET_MAX;
1708        zone->uz_count_min = zone->uz_count;
1709
1710        return (0);
1711}
1712
1713/*
1714 * Keg header dtor.  This frees all data, destroys locks, frees the hash
1715 * table and removes the keg from the global list.
1716 *
1717 * Arguments/Returns follow uma_dtor specifications
1718 *      udata  unused
1719 */
1720static void
1721keg_dtor(void *arg, int size, void *udata)
1722{
1723        uma_keg_t keg;
1724
1725        keg = (uma_keg_t)arg;
1726        KEG_LOCK(keg);
1727        if (keg->uk_free != 0) {
1728                printf("Freed UMA keg (%s) was not empty (%d items). "
1729                    " Lost %d pages of memory.\n",
1730                    keg->uk_name ? keg->uk_name : "",
1731                    keg->uk_free, keg->uk_pages);
1732        }
1733        KEG_UNLOCK(keg);
1734
1735        hash_free(&keg->uk_hash);
1736
1737        KEG_LOCK_FINI(keg);
1738}
1739
1740/*
1741 * Zone header dtor.
1742 *
1743 * Arguments/Returns follow uma_dtor specifications
1744 *      udata  unused
1745 */
1746static void
1747zone_dtor(void *arg, int size, void *udata)
1748{
1749        uma_klink_t klink;
1750        uma_zone_t zone;
1751        uma_keg_t keg;
1752
1753        zone = (uma_zone_t)arg;
1754        keg = zone_first_keg(zone);
1755
1756        if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
1757                cache_drain(zone);
1758
1759        rw_wlock(&uma_rwlock);
1760        LIST_REMOVE(zone, uz_link);
1761        rw_wunlock(&uma_rwlock);
1762        /*
1763         * XXX there are some races here where
1764         * the zone can be drained but zone lock
1765         * released and then refilled before we
1766         * remove it... we dont care for now
1767         */
1768        zone_drain_wait(zone, M_WAITOK);
1769        /*
1770         * Unlink all of our kegs.
1771         */
1772        while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
1773                klink->kl_keg = NULL;
1774                LIST_REMOVE(klink, kl_link);
1775                if (klink == &zone->uz_klink)
1776                        continue;
1777                free(klink, M_TEMP);
1778        }
1779        /*
1780         * We only destroy kegs from non secondary zones.
1781         */
1782        if (keg != NULL && (zone->uz_flags & UMA_ZONE_SECONDARY) == 0)  {
1783                rw_wlock(&uma_rwlock);
1784                LIST_REMOVE(keg, uk_link);
1785                rw_wunlock(&uma_rwlock);
1786                zone_free_item(kegs, keg, NULL, SKIP_NONE);
1787        }
1788        ZONE_LOCK_FINI(zone);
1789}
1790
1791/*
1792 * Traverses every zone in the system and calls a callback
1793 *
1794 * Arguments:
1795 *      zfunc  A pointer to a function which accepts a zone
1796 *              as an argument.
1797 *
1798 * Returns:
1799 *      Nothing
1800 */
1801static void
1802zone_foreach(void (*zfunc)(uma_zone_t))
1803{
1804        uma_keg_t keg;
1805        uma_zone_t zone;
1806
1807        rw_rlock(&uma_rwlock);
1808        LIST_FOREACH(keg, &uma_kegs, uk_link) {
1809                LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1810                        zfunc(zone);
1811        }
1812        rw_runlock(&uma_rwlock);
1813}
1814
1815/* Public functions */
1816/* See uma.h */
1817void
1818uma_startup(void *bootmem, int boot_pages)
1819{
1820        struct uma_zctor_args args;
1821#ifndef __rtems__
1822        uma_slab_t slab;
1823        int i;
1824#endif /* __rtems__ */
1825
1826#ifdef UMA_DEBUG
1827        printf("Creating uma keg headers zone and keg.\n");
1828#endif
1829        rw_init(&uma_rwlock, "UMA lock");
1830
1831        /* "manually" create the initial zone */
1832        memset(&args, 0, sizeof(args));
1833        args.name = "UMA Kegs";
1834        args.size = sizeof(struct uma_keg);
1835        args.ctor = keg_ctor;
1836        args.dtor = keg_dtor;
1837        args.uminit = zero_init;
1838        args.fini = NULL;
1839        args.keg = &masterkeg;
1840        args.align = 32 - 1;
1841        args.flags = UMA_ZFLAG_INTERNAL;
1842        /* The initial zone has no Per cpu queues so it's smaller */
1843        zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1844
1845#ifndef __rtems__
1846#ifdef UMA_DEBUG
1847        printf("Filling boot free list.\n");
1848#endif
1849        for (i = 0; i < boot_pages; i++) {
1850                slab = (uma_slab_t)((uint8_t *)bootmem + (i * UMA_SLAB_SIZE));
1851                slab->us_data = (uint8_t *)slab;
1852                slab->us_flags = UMA_SLAB_BOOT;
1853                LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1854        }
1855        mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
1856#endif /* __rtems__ */
1857
1858#ifdef UMA_DEBUG
1859        printf("Creating uma zone headers zone and keg.\n");
1860#endif
1861        args.name = "UMA Zones";
1862        args.size = sizeof(struct uma_zone) +
1863            (sizeof(struct uma_cache) * (mp_maxid + 1));
1864        args.ctor = zone_ctor;
1865        args.dtor = zone_dtor;
1866        args.uminit = zero_init;
1867        args.fini = NULL;
1868        args.keg = NULL;
1869        args.align = 32 - 1;
1870        args.flags = UMA_ZFLAG_INTERNAL;
1871        /* The initial zone has no Per cpu queues so it's smaller */
1872        zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1873
1874#ifdef UMA_DEBUG
1875        printf("Creating slab and hash zones.\n");
1876#endif
1877
1878        /* Now make a zone for slab headers */
1879        slabzone = uma_zcreate("UMA Slabs",
1880                                sizeof(struct uma_slab),
1881                                NULL, NULL, NULL, NULL,
1882                                UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1883
1884        hashzone = uma_zcreate("UMA Hash",
1885            sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1886            NULL, NULL, NULL, NULL,
1887            UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1888
1889        bucket_init();
1890
1891#ifndef __rtems__
1892        booted = UMA_STARTUP;
1893#endif /* __rtems__ */
1894
1895#ifdef UMA_DEBUG
1896        printf("UMA startup complete.\n");
1897#endif
1898}
1899#ifdef __rtems__
1900static void
1901rtems_bsd_uma_startup(void *unused)
1902{
1903        (void) unused;
1904
1905        sx_init_flags(&uma_drain_lock, "umadrain", SX_RECURSE);
1906        uma_startup(NULL, 0);
1907}
1908
1909SYSINIT(rtems_bsd_uma_startup, SI_SUB_VM, SI_ORDER_SECOND,
1910    rtems_bsd_uma_startup, NULL);
1911#endif /* __rtems__ */
1912
1913#ifndef __rtems__
1914/* see uma.h */
1915void
1916uma_startup2(void)
1917{
1918        booted = UMA_STARTUP2;
1919        bucket_enable();
1920        sx_init(&uma_drain_lock, "umadrain");
1921#ifdef UMA_DEBUG
1922        printf("UMA startup2 complete.\n");
1923#endif
1924}
1925#endif /* __rtems__ */
1926
1927/*
1928 * Initialize our callout handle
1929 *
1930 */
1931
1932static void
1933uma_startup3(void)
1934{
1935#ifdef UMA_DEBUG
1936        printf("Starting callout.\n");
1937#endif
1938        callout_init(&uma_callout, 1);
1939        callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1940#ifdef UMA_DEBUG
1941        printf("UMA startup3 complete.\n");
1942#endif
1943}
1944
1945static uma_keg_t
1946uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1947                int align, uint32_t flags)
1948{
1949        struct uma_kctor_args args;
1950
1951        args.size = size;
1952        args.uminit = uminit;
1953        args.fini = fini;
1954        args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
1955        args.flags = flags;
1956        args.zone = zone;
1957        return (zone_alloc_item(kegs, &args, M_WAITOK));
1958}
1959
1960/* See uma.h */
1961void
1962uma_set_align(int align)
1963{
1964
1965        if (align != UMA_ALIGN_CACHE)
1966                uma_align_cache = align;
1967}
1968
1969/* See uma.h */
1970uma_zone_t
1971uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1972                uma_init uminit, uma_fini fini, int align, uint32_t flags)
1973
1974{
1975        struct uma_zctor_args args;
1976        uma_zone_t res;
1977#ifndef __rtems__
1978        bool locked;
1979#endif /* __rtems__ */
1980
1981        /* This stuff is essential for the zone ctor */
1982        memset(&args, 0, sizeof(args));
1983        args.name = name;
1984        args.size = size;
1985        args.ctor = ctor;
1986        args.dtor = dtor;
1987        args.uminit = uminit;
1988        args.fini = fini;
1989#ifdef  INVARIANTS
1990        /*
1991         * If a zone is being created with an empty constructor and
1992         * destructor, pass UMA constructor/destructor which checks for
1993         * memory use after free.
1994         */
1995        if ((!(flags & (UMA_ZONE_ZINIT | UMA_ZONE_NOFREE))) &&
1996            ctor == NULL && dtor == NULL && uminit == NULL && fini == NULL) {
1997                args.ctor = trash_ctor;
1998                args.dtor = trash_dtor;
1999                args.uminit = trash_init;
2000                args.fini = trash_fini;
2001        }
2002#endif
2003        args.align = align;
2004        args.flags = flags;
2005        args.keg = NULL;
2006
2007#ifndef __rtems__
2008        if (booted < UMA_STARTUP2) {
2009                locked = false;
2010        } else {
2011#endif /* __rtems__ */
2012                sx_slock(&uma_drain_lock);
2013#ifndef __rtems__
2014                locked = true;
2015        }
2016#endif /* __rtems__ */
2017        res = zone_alloc_item(zones, &args, M_WAITOK);
2018#ifndef __rtems__
2019        if (locked)
2020#endif /* __rtems__ */
2021                sx_sunlock(&uma_drain_lock);
2022        return (res);
2023}
2024
2025/* See uma.h */
2026uma_zone_t
2027uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
2028                    uma_init zinit, uma_fini zfini, uma_zone_t master)
2029{
2030        struct uma_zctor_args args;
2031        uma_keg_t keg;
2032        uma_zone_t res;
2033#ifndef __rtems__
2034        bool locked;
2035#endif /* __rtems__ */
2036
2037        keg = zone_first_keg(master);
2038        memset(&args, 0, sizeof(args));
2039        args.name = name;
2040        args.size = keg->uk_size;
2041        args.ctor = ctor;
2042        args.dtor = dtor;
2043        args.uminit = zinit;
2044        args.fini = zfini;
2045        args.align = keg->uk_align;
2046        args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
2047        args.keg = keg;
2048
2049#ifndef __rtems__
2050        if (booted < UMA_STARTUP2) {
2051                locked = false;
2052        } else {
2053#endif /* __rtems__ */
2054                sx_slock(&uma_drain_lock);
2055#ifndef __rtems__
2056                locked = true;
2057        }
2058#endif /* __rtems__ */
2059        /* XXX Attaches only one keg of potentially many. */
2060        res = zone_alloc_item(zones, &args, M_WAITOK);
2061#ifndef __rtems__
2062        if (locked)
2063#endif /* __rtems__ */
2064                sx_sunlock(&uma_drain_lock);
2065        return (res);
2066}
2067
2068/* See uma.h */
2069uma_zone_t
2070uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
2071                    uma_init zinit, uma_fini zfini, uma_import zimport,
2072                    uma_release zrelease, void *arg, int flags)
2073{
2074        struct uma_zctor_args args;
2075
2076        memset(&args, 0, sizeof(args));
2077        args.name = name;
2078        args.size = size;
2079        args.ctor = ctor;
2080        args.dtor = dtor;
2081        args.uminit = zinit;
2082        args.fini = zfini;
2083        args.import = zimport;
2084        args.release = zrelease;
2085        args.arg = arg;
2086        args.align = 0;
2087        args.flags = flags;
2088
2089        return (zone_alloc_item(zones, &args, M_WAITOK));
2090}
2091
2092#ifndef __rtems__
2093static void
2094zone_lock_pair(uma_zone_t a, uma_zone_t b)
2095{
2096        if (a < b) {
2097                ZONE_LOCK(a);
2098                mtx_lock_flags(b->uz_lockptr, MTX_DUPOK);
2099        } else {
2100                ZONE_LOCK(b);
2101                mtx_lock_flags(a->uz_lockptr, MTX_DUPOK);
2102        }
2103}
2104
2105static void
2106zone_unlock_pair(uma_zone_t a, uma_zone_t b)
2107{
2108
2109        ZONE_UNLOCK(a);
2110        ZONE_UNLOCK(b);
2111}
2112
2113int
2114uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
2115{
2116        uma_klink_t klink;
2117        uma_klink_t kl;
2118        int error;
2119
2120        error = 0;
2121        klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK | M_ZERO);
2122
2123        zone_lock_pair(zone, master);
2124        /*
2125         * zone must use vtoslab() to resolve objects and must already be
2126         * a secondary.
2127         */
2128        if ((zone->uz_flags & (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY))
2129            != (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY)) {
2130                error = EINVAL;
2131                goto out;
2132        }
2133        /*
2134         * The new master must also use vtoslab().
2135         */
2136        if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
2137                error = EINVAL;
2138                goto out;
2139        }
2140
2141        /*
2142         * The underlying object must be the same size.  rsize
2143         * may be different.
2144         */
2145        if (master->uz_size != zone->uz_size) {
2146                error = E2BIG;
2147                goto out;
2148        }
2149        /*
2150         * Put it at the end of the list.
2151         */
2152        klink->kl_keg = zone_first_keg(master);
2153        LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
2154                if (LIST_NEXT(kl, kl_link) == NULL) {
2155                        LIST_INSERT_AFTER(kl, klink, kl_link);
2156                        break;
2157                }
2158        }
2159        klink = NULL;
2160        zone->uz_flags |= UMA_ZFLAG_MULTI;
2161        zone->uz_slab = zone_fetch_slab_multi;
2162
2163out:
2164        zone_unlock_pair(zone, master);
2165        if (klink != NULL)
2166                free(klink, M_TEMP);
2167
2168        return (error);
2169}
2170#endif /* __rtems__ */
2171
2172
2173/* See uma.h */
2174void
2175uma_zdestroy(uma_zone_t zone)
2176{
2177
2178        sx_slock(&uma_drain_lock);
2179        zone_free_item(zones, zone, NULL, SKIP_NONE);
2180        sx_sunlock(&uma_drain_lock);
2181}
2182
2183/* See uma.h */
2184void *
2185uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
2186{
2187        void *item;
2188        uma_cache_t cache;
2189        uma_bucket_t bucket;
2190        int lockfail;
2191        int cpu;
2192
2193        /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
2194        random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
2195
2196        /* This is the fast path allocation */
2197#ifdef UMA_DEBUG_ALLOC_1
2198        printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
2199#endif
2200        CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
2201            zone->uz_name, flags);
2202
2203        if (flags & M_WAITOK) {
2204                WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
2205                    "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
2206        }
2207        KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
2208            ("uma_zalloc_arg: called with spinlock or critical section held"));
2209
2210#ifdef DEBUG_MEMGUARD
2211        if (memguard_cmp_zone(zone)) {
2212                item = memguard_alloc(zone->uz_size, flags);
2213                if (item != NULL) {
2214                        if (zone->uz_init != NULL &&
2215                            zone->uz_init(item, zone->uz_size, flags) != 0)
2216                                return (NULL);
2217                        if (zone->uz_ctor != NULL &&
2218                            zone->uz_ctor(item, zone->uz_size, udata,
2219                            flags) != 0) {
2220                                zone->uz_fini(item, zone->uz_size);
2221                                return (NULL);
2222                        }
2223                        return (item);
2224                }
2225                /* This is unfortunate but should not be fatal. */
2226        }
2227#endif
2228        /*
2229         * If possible, allocate from the per-CPU cache.  There are two
2230         * requirements for safe access to the per-CPU cache: (1) the thread
2231         * accessing the cache must not be preempted or yield during access,
2232         * and (2) the thread must not migrate CPUs without switching which
2233         * cache it accesses.  We rely on a critical section to prevent
2234         * preemption and migration.  We release the critical section in
2235         * order to acquire the zone mutex if we are unable to allocate from
2236         * the current cache; when we re-acquire the critical section, we
2237         * must detect and handle migration if it has occurred.
2238         */
2239        critical_enter();
2240        cpu = curcpu;
2241        cache = &zone->uz_cpu[cpu];
2242
2243zalloc_start:
2244        bucket = cache->uc_allocbucket;
2245        if (bucket != NULL && bucket->ub_cnt > 0) {
2246                bucket->ub_cnt--;
2247                item = bucket->ub_bucket[bucket->ub_cnt];
2248#ifdef INVARIANTS
2249                bucket->ub_bucket[bucket->ub_cnt] = NULL;
2250#endif
2251                KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
2252                cache->uc_allocs++;
2253                critical_exit();
2254                if (zone->uz_ctor != NULL &&
2255                    zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2256                        atomic_add_long(&zone->uz_fails, 1);
2257                        zone_free_item(zone, item, udata, SKIP_DTOR);
2258                        return (NULL);
2259                }
2260#ifdef INVARIANTS
2261                uma_dbg_alloc(zone, NULL, item);
2262#endif
2263                if (flags & M_ZERO)
2264                        uma_zero_item(item, zone);
2265                return (item);
2266        }
2267
2268        /*
2269         * We have run out of items in our alloc bucket.
2270         * See if we can switch with our free bucket.
2271         */
2272        bucket = cache->uc_freebucket;
2273        if (bucket != NULL && bucket->ub_cnt > 0) {
2274#ifdef UMA_DEBUG_ALLOC
2275                printf("uma_zalloc: Swapping empty with alloc.\n");
2276#endif
2277                cache->uc_freebucket = cache->uc_allocbucket;
2278                cache->uc_allocbucket = bucket;
2279                goto zalloc_start;
2280        }
2281
2282        /*
2283         * Discard any empty allocation bucket while we hold no locks.
2284         */
2285        bucket = cache->uc_allocbucket;
2286        cache->uc_allocbucket = NULL;
2287        critical_exit();
2288        if (bucket != NULL)
2289                bucket_free(zone, bucket, udata);
2290
2291        /* Short-circuit for zones without buckets and low memory. */
2292        if (zone->uz_count == 0 || bucketdisable)
2293                goto zalloc_item;
2294
2295        /*
2296         * Attempt to retrieve the item from the per-CPU cache has failed, so
2297         * we must go back to the zone.  This requires the zone lock, so we
2298         * must drop the critical section, then re-acquire it when we go back
2299         * to the cache.  Since the critical section is released, we may be
2300         * preempted or migrate.  As such, make sure not to maintain any
2301         * thread-local state specific to the cache from prior to releasing
2302         * the critical section.
2303         */
2304        lockfail = 0;
2305        if (ZONE_TRYLOCK(zone) == 0) {
2306                /* Record contention to size the buckets. */
2307                ZONE_LOCK(zone);
2308                lockfail = 1;
2309        }
2310        critical_enter();
2311        cpu = curcpu;
2312        cache = &zone->uz_cpu[cpu];
2313
2314        /*
2315         * Since we have locked the zone we may as well send back our stats.
2316         */
2317        atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
2318        atomic_add_long(&zone->uz_frees, cache->uc_frees);
2319        cache->uc_allocs = 0;
2320        cache->uc_frees = 0;
2321
2322        /* See if we lost the race to fill the cache. */
2323        if (cache->uc_allocbucket != NULL) {
2324                ZONE_UNLOCK(zone);
2325                goto zalloc_start;
2326        }
2327
2328        /*
2329         * Check the zone's cache of buckets.
2330         */
2331        if ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
2332                KASSERT(bucket->ub_cnt != 0,
2333                    ("uma_zalloc_arg: Returning an empty bucket."));
2334
2335                LIST_REMOVE(bucket, ub_link);
2336                cache->uc_allocbucket = bucket;
2337                ZONE_UNLOCK(zone);
2338                goto zalloc_start;
2339        }
2340        /* We are no longer associated with this CPU. */
2341        critical_exit();
2342
2343        /*
2344         * We bump the uz count when the cache size is insufficient to
2345         * handle the working set.
2346         */
2347        if (lockfail && zone->uz_count < BUCKET_MAX)
2348                zone->uz_count++;
2349        ZONE_UNLOCK(zone);
2350
2351        /*
2352         * Now lets just fill a bucket and put it on the free list.  If that
2353         * works we'll restart the allocation from the beginning and it
2354         * will use the just filled bucket.
2355         */
2356        bucket = zone_alloc_bucket(zone, udata, flags);
2357        if (bucket != NULL) {
2358                ZONE_LOCK(zone);
2359                critical_enter();
2360                cpu = curcpu;
2361                cache = &zone->uz_cpu[cpu];
2362                /*
2363                 * See if we lost the race or were migrated.  Cache the
2364                 * initialized bucket to make this less likely or claim
2365                 * the memory directly.
2366                 */
2367                if (cache->uc_allocbucket == NULL)
2368                        cache->uc_allocbucket = bucket;
2369                else
2370                        LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
2371                ZONE_UNLOCK(zone);
2372                goto zalloc_start;
2373        }
2374
2375        /*
2376         * We may not be able to get a bucket so return an actual item.
2377         */
2378#ifdef UMA_DEBUG
2379        printf("uma_zalloc_arg: Bucketzone returned NULL\n");
2380#endif
2381
2382zalloc_item:
2383        item = zone_alloc_item(zone, udata, flags);
2384
2385        return (item);
2386}
2387
2388static uma_slab_t
2389keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
2390{
2391        uma_slab_t slab;
2392        int reserve;
2393
2394        mtx_assert(&keg->uk_lock, MA_OWNED);
2395        slab = NULL;
2396        reserve = 0;
2397        if ((flags & M_USE_RESERVE) == 0)
2398                reserve = keg->uk_reserve;
2399
2400        for (;;) {
2401                /*
2402                 * Find a slab with some space.  Prefer slabs that are partially
2403                 * used over those that are totally full.  This helps to reduce
2404                 * fragmentation.
2405                 */
2406                if (keg->uk_free > reserve) {
2407                        if (!LIST_EMPTY(&keg->uk_part_slab)) {
2408                                slab = LIST_FIRST(&keg->uk_part_slab);
2409                        } else {
2410                                slab = LIST_FIRST(&keg->uk_free_slab);
2411                                LIST_REMOVE(slab, us_link);
2412                                LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
2413                                    us_link);
2414                        }
2415                        MPASS(slab->us_keg == keg);
2416                        return (slab);
2417                }
2418
2419                /*
2420                 * M_NOVM means don't ask at all!
2421                 */
2422                if (flags & M_NOVM)
2423                        break;
2424
2425                if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
2426                        keg->uk_flags |= UMA_ZFLAG_FULL;
2427                        /*
2428                         * If this is not a multi-zone, set the FULL bit.
2429                         * Otherwise slab_multi() takes care of it.
2430                         */
2431                        if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) {
2432                                zone->uz_flags |= UMA_ZFLAG_FULL;
2433                                zone_log_warning(zone);
2434                                zone_maxaction(zone);
2435                        }
2436                        if (flags & M_NOWAIT)
2437                                break;
2438                        zone->uz_sleeps++;
2439                        msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
2440                        continue;
2441                }
2442                slab = keg_alloc_slab(keg, zone, flags);
2443                /*
2444                 * If we got a slab here it's safe to mark it partially used
2445                 * and return.  We assume that the caller is going to remove
2446                 * at least one item.
2447                 */
2448                if (slab) {
2449                        MPASS(slab->us_keg == keg);
2450                        LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2451                        return (slab);
2452                }
2453                /*
2454                 * We might not have been able to get a slab but another cpu
2455                 * could have while we were unlocked.  Check again before we
2456                 * fail.
2457                 */
2458                flags |= M_NOVM;
2459        }
2460        return (slab);
2461}
2462
2463static uma_slab_t
2464zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
2465{
2466        uma_slab_t slab;
2467
2468        if (keg == NULL) {
2469                keg = zone_first_keg(zone);
2470                KEG_LOCK(keg);
2471        }
2472
2473        for (;;) {
2474                slab = keg_fetch_slab(keg, zone, flags);
2475                if (slab)
2476                        return (slab);
2477                if (flags & (M_NOWAIT | M_NOVM))
2478                        break;
2479        }
2480        KEG_UNLOCK(keg);
2481        return (NULL);
2482}
2483
2484#ifndef __rtems__
2485/*
2486 * uma_zone_fetch_slab_multi:  Fetches a slab from one available keg.  Returns
2487 * with the keg locked.  On NULL no lock is held.
2488 *
2489 * The last pointer is used to seed the search.  It is not required.
2490 */
2491static uma_slab_t
2492zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
2493{
2494        uma_klink_t klink;
2495        uma_slab_t slab;
2496        uma_keg_t keg;
2497        int flags;
2498        int empty;
2499        int full;
2500
2501        /*
2502         * Don't wait on the first pass.  This will skip limit tests
2503         * as well.  We don't want to block if we can find a provider
2504         * without blocking.
2505         */
2506        flags = (rflags & ~M_WAITOK) | M_NOWAIT;
2507        /*
2508         * Use the last slab allocated as a hint for where to start
2509         * the search.
2510         */
2511        if (last != NULL) {
2512                slab = keg_fetch_slab(last, zone, flags);
2513                if (slab)
2514                        return (slab);
2515                KEG_UNLOCK(last);
2516        }
2517        /*
2518         * Loop until we have a slab incase of transient failures
2519         * while M_WAITOK is specified.  I'm not sure this is 100%
2520         * required but we've done it for so long now.
2521         */
2522        for (;;) {
2523                empty = 0;
2524                full = 0;
2525                /*
2526                 * Search the available kegs for slabs.  Be careful to hold the
2527                 * correct lock while calling into the keg layer.
2528                 */
2529                LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
2530                        keg = klink->kl_keg;
2531                        KEG_LOCK(keg);
2532                        if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
2533                                slab = keg_fetch_slab(keg, zone, flags);
2534                                if (slab)
2535                                        return (slab);
2536                        }
2537                        if (keg->uk_flags & UMA_ZFLAG_FULL)
2538                                full++;
2539                        else
2540                                empty++;
2541                        KEG_UNLOCK(keg);
2542                }
2543                if (rflags & (M_NOWAIT | M_NOVM))
2544                        break;
2545                flags = rflags;
2546                /*
2547                 * All kegs are full.  XXX We can't atomically check all kegs
2548                 * and sleep so just sleep for a short period and retry.
2549                 */
2550                if (full && !empty) {
2551                        ZONE_LOCK(zone);
2552                        zone->uz_flags |= UMA_ZFLAG_FULL;
2553                        zone->uz_sleeps++;
2554                        zone_log_warning(zone);
2555                        zone_maxaction(zone);
2556                        msleep(zone, zone->uz_lockptr, PVM,
2557                            "zonelimit", hz/100);
2558                        zone->uz_flags &= ~UMA_ZFLAG_FULL;
2559                        ZONE_UNLOCK(zone);
2560                        continue;
2561                }
2562        }
2563        return (NULL);
2564}
2565#endif /* __rtems__ */
2566
2567static void *
2568slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
2569{
2570        void *item;
2571        uint8_t freei;
2572
2573        MPASS(keg == slab->us_keg);
2574        mtx_assert(&keg->uk_lock, MA_OWNED);
2575
2576        freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1;
2577        BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free);
2578        item = slab->us_data + (keg->uk_rsize * freei);
2579        slab->us_freecount--;
2580        keg->uk_free--;
2581
2582        /* Move this slab to the full list */
2583        if (slab->us_freecount == 0) {
2584                LIST_REMOVE(slab, us_link);
2585                LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2586        }
2587
2588        return (item);
2589}
2590
2591static int
2592zone_import(uma_zone_t zone, void **bucket, int max, int flags)
2593{
2594        uma_slab_t slab;
2595        uma_keg_t keg;
2596        int i;
2597
2598        slab = NULL;
2599        keg = NULL;
2600        /* Try to keep the buckets totally full */
2601        for (i = 0; i < max; ) {
2602                if ((slab = zone->uz_slab(zone, keg, flags)) == NULL)
2603                        break;
2604                keg = slab->us_keg;
2605                while (slab->us_freecount && i < max) {
2606                        bucket[i++] = slab_alloc_item(keg, slab);
2607                        if (keg->uk_free <= keg->uk_reserve)
2608                                break;
2609                }
2610                /* Don't grab more than one slab at a time. */
2611                flags &= ~M_WAITOK;
2612                flags |= M_NOWAIT;
2613        }
2614        if (slab != NULL)
2615                KEG_UNLOCK(keg);
2616
2617        return i;
2618}
2619
2620static uma_bucket_t
2621zone_alloc_bucket(uma_zone_t zone, void *udata, int flags)
2622{
2623        uma_bucket_t bucket;
2624        int max;
2625
2626        /* Don't wait for buckets, preserve caller's NOVM setting. */
2627        bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
2628        if (bucket == NULL)
2629                return (NULL);
2630
2631        max = MIN(bucket->ub_entries, zone->uz_count);
2632        bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
2633            max, flags);
2634
2635        /*
2636         * Initialize the memory if necessary.
2637         */
2638        if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
2639                int i;
2640
2641                for (i = 0; i < bucket->ub_cnt; i++)
2642                        if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
2643                            flags) != 0)
2644                                break;
2645                /*
2646                 * If we couldn't initialize the whole bucket, put the
2647                 * rest back onto the freelist.
2648                 */
2649                if (i != bucket->ub_cnt) {
2650                        zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
2651                            bucket->ub_cnt - i);
2652#ifdef INVARIANTS
2653                        bzero(&bucket->ub_bucket[i],
2654                            sizeof(void *) * (bucket->ub_cnt - i));
2655#endif
2656                        bucket->ub_cnt = i;
2657                }
2658        }
2659
2660        if (bucket->ub_cnt == 0) {
2661                bucket_free(zone, bucket, udata);
2662                atomic_add_long(&zone->uz_fails, 1);
2663                return (NULL);
2664        }
2665
2666        return (bucket);
2667}
2668
2669/*
2670 * Allocates a single item from a zone.
2671 *
2672 * Arguments
2673 *      zone   The zone to alloc for.
2674 *      udata  The data to be passed to the constructor.
2675 *      flags  M_WAITOK, M_NOWAIT, M_ZERO.
2676 *
2677 * Returns
2678 *      NULL if there is no memory and M_NOWAIT is set
2679 *      An item if successful
2680 */
2681
2682static void *
2683zone_alloc_item(uma_zone_t zone, void *udata, int flags)
2684{
2685        void *item;
2686
2687        item = NULL;
2688
2689#ifdef UMA_DEBUG_ALLOC
2690        printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2691#endif
2692        if (zone->uz_import(zone->uz_arg, &item, 1, flags) != 1)
2693                goto fail;
2694        atomic_add_long(&zone->uz_allocs, 1);
2695
2696        /*
2697         * We have to call both the zone's init (not the keg's init)
2698         * and the zone's ctor.  This is because the item is going from
2699         * a keg slab directly to the user, and the user is expecting it
2700         * to be both zone-init'd as well as zone-ctor'd.
2701         */
2702        if (zone->uz_init != NULL) {
2703                if (zone->uz_init(item, zone->uz_size, flags) != 0) {
2704                        zone_free_item(zone, item, udata, SKIP_FINI);
2705                        goto fail;
2706                }
2707        }
2708        if (zone->uz_ctor != NULL) {
2709                if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2710                        zone_free_item(zone, item, udata, SKIP_DTOR);
2711                        goto fail;
2712                }
2713        }
2714#ifdef INVARIANTS
2715        uma_dbg_alloc(zone, NULL, item);
2716#endif
2717        if (flags & M_ZERO)
2718                uma_zero_item(item, zone);
2719
2720        return (item);
2721
2722fail:
2723        atomic_add_long(&zone->uz_fails, 1);
2724        return (NULL);
2725}
2726
2727/* See uma.h */
2728void
2729uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
2730{
2731        uma_cache_t cache;
2732        uma_bucket_t bucket;
2733        int lockfail;
2734        int cpu;
2735
2736        /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
2737        random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
2738
2739#ifdef UMA_DEBUG_ALLOC_1
2740        printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2741#endif
2742        CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2743            zone->uz_name);
2744
2745        KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
2746            ("uma_zfree_arg: called with spinlock or critical section held"));
2747
2748        /* uma_zfree(..., NULL) does nothing, to match free(9). */
2749        if (item == NULL)
2750                return;
2751#ifdef DEBUG_MEMGUARD
2752        if (is_memguard_addr(item)) {
2753                if (zone->uz_dtor != NULL)
2754                        zone->uz_dtor(item, zone->uz_size, udata);
2755                if (zone->uz_fini != NULL)
2756                        zone->uz_fini(item, zone->uz_size);
2757                memguard_free(item);
2758                return;
2759        }
2760#endif
2761#ifdef INVARIANTS
2762        if (zone->uz_flags & UMA_ZONE_MALLOC)
2763                uma_dbg_free(zone, udata, item);
2764        else
2765                uma_dbg_free(zone, NULL, item);
2766#endif
2767        if (zone->uz_dtor != NULL)
2768                zone->uz_dtor(item, zone->uz_size, udata);
2769
2770        /*
2771         * The race here is acceptable.  If we miss it we'll just have to wait
2772         * a little longer for the limits to be reset.
2773         */
2774        if (zone->uz_flags & UMA_ZFLAG_FULL)
2775                goto zfree_item;
2776
2777        /*
2778         * If possible, free to the per-CPU cache.  There are two
2779         * requirements for safe access to the per-CPU cache: (1) the thread
2780         * accessing the cache must not be preempted or yield during access,
2781         * and (2) the thread must not migrate CPUs without switching which
2782         * cache it accesses.  We rely on a critical section to prevent
2783         * preemption and migration.  We release the critical section in
2784         * order to acquire the zone mutex if we are unable to free to the
2785         * current cache; when we re-acquire the critical section, we must
2786         * detect and handle migration if it has occurred.
2787         */
2788zfree_restart:
2789        critical_enter();
2790        cpu = curcpu;
2791        cache = &zone->uz_cpu[cpu];
2792
2793zfree_start:
2794        /*
2795         * Try to free into the allocbucket first to give LIFO ordering
2796         * for cache-hot datastructures.  Spill over into the freebucket
2797         * if necessary.  Alloc will swap them if one runs dry.
2798         */
2799        bucket = cache->uc_allocbucket;
2800        if (bucket == NULL || bucket->ub_cnt >= bucket->ub_entries)
2801                bucket = cache->uc_freebucket;
2802        if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
2803                KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2804                    ("uma_zfree: Freeing to non free bucket index."));
2805                bucket->ub_bucket[bucket->ub_cnt] = item;
2806                bucket->ub_cnt++;
2807                cache->uc_frees++;
2808                critical_exit();
2809                return;
2810        }
2811
2812        /*
2813         * We must go back the zone, which requires acquiring the zone lock,
2814         * which in turn means we must release and re-acquire the critical
2815         * section.  Since the critical section is released, we may be
2816         * preempted or migrate.  As such, make sure not to maintain any
2817         * thread-local state specific to the cache from prior to releasing
2818         * the critical section.
2819         */
2820        critical_exit();
2821        if (zone->uz_count == 0 || bucketdisable)
2822                goto zfree_item;
2823
2824        lockfail = 0;
2825        if (ZONE_TRYLOCK(zone) == 0) {
2826                /* Record contention to size the buckets. */
2827                ZONE_LOCK(zone);
2828                lockfail = 1;
2829        }
2830        critical_enter();
2831        cpu = curcpu;
2832        cache = &zone->uz_cpu[cpu];
2833
2834        /*
2835         * Since we have locked the zone we may as well send back our stats.
2836         */
2837        atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
2838        atomic_add_long(&zone->uz_frees, cache->uc_frees);
2839        cache->uc_allocs = 0;
2840        cache->uc_frees = 0;
2841
2842        bucket = cache->uc_freebucket;
2843        if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
2844                ZONE_UNLOCK(zone);
2845                goto zfree_start;
2846        }
2847        cache->uc_freebucket = NULL;
2848        /* We are no longer associated with this CPU. */
2849        critical_exit();
2850
2851        /* Can we throw this on the zone full list? */
2852        if (bucket != NULL) {
2853#ifdef UMA_DEBUG_ALLOC
2854                printf("uma_zfree: Putting old bucket on the free list.\n");
2855#endif
2856                /* ub_cnt is pointing to the last free item */
2857                KASSERT(bucket->ub_cnt != 0,
2858                    ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2859                LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
2860        }
2861
2862        /*
2863         * We bump the uz count when the cache size is insufficient to
2864         * handle the working set.
2865         */
2866        if (lockfail && zone->uz_count < BUCKET_MAX)
2867                zone->uz_count++;
2868        ZONE_UNLOCK(zone);
2869
2870#ifdef UMA_DEBUG_ALLOC
2871        printf("uma_zfree: Allocating new free bucket.\n");
2872#endif
2873        bucket = bucket_alloc(zone, udata, M_NOWAIT);
2874        if (bucket) {
2875                critical_enter();
2876                cpu = curcpu;
2877                cache = &zone->uz_cpu[cpu];
2878                if (cache->uc_freebucket == NULL) {
2879                        cache->uc_freebucket = bucket;
2880                        goto zfree_start;
2881                }
2882                /*
2883                 * We lost the race, start over.  We have to drop our
2884                 * critical section to free the bucket.
2885                 */
2886                critical_exit();
2887                bucket_free(zone, bucket, udata);
2888                goto zfree_restart;
2889        }
2890
2891        /*
2892         * If nothing else caught this, we'll just do an internal free.
2893         */
2894zfree_item:
2895        zone_free_item(zone, item, udata, SKIP_DTOR);
2896
2897        return;
2898}
2899
2900static void
2901slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item)
2902{
2903        uint8_t freei;
2904
2905        mtx_assert(&keg->uk_lock, MA_OWNED);
2906        MPASS(keg == slab->us_keg);
2907
2908        /* Do we need to remove from any lists? */
2909        if (slab->us_freecount+1 == keg->uk_ipers) {
2910                LIST_REMOVE(slab, us_link);
2911                LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2912        } else if (slab->us_freecount == 0) {
2913                LIST_REMOVE(slab, us_link);
2914                LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2915        }
2916
2917        /* Slab management. */
2918        freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
2919        BIT_SET(SLAB_SETSIZE, freei, &slab->us_free);
2920        slab->us_freecount++;
2921
2922        /* Keg statistics. */
2923        keg->uk_free++;
2924}
2925
2926static void
2927zone_release(uma_zone_t zone, void **bucket, int cnt)
2928{
2929        void *item;
2930        uma_slab_t slab;
2931        uma_keg_t keg;
2932        uint8_t *mem;
2933        int clearfull;
2934        int i;
2935
2936        clearfull = 0;
2937        keg = zone_first_keg(zone);
2938        KEG_LOCK(keg);
2939        for (i = 0; i < cnt; i++) {
2940                item = bucket[i];
2941                if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
2942                        mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
2943                        if (zone->uz_flags & UMA_ZONE_HASH) {
2944                                slab = hash_sfind(&keg->uk_hash, mem);
2945                        } else {
2946                                mem += keg->uk_pgoff;
2947                                slab = (uma_slab_t)mem;
2948                        }
2949                } else {
2950                        slab = vtoslab((vm_offset_t)item);
2951                        if (slab->us_keg != keg) {
2952                                KEG_UNLOCK(keg);
2953                                keg = slab->us_keg;
2954                                KEG_LOCK(keg);
2955                        }
2956                }
2957                slab_free_item(keg, slab, item);
2958                if (keg->uk_flags & UMA_ZFLAG_FULL) {
2959                        if (keg->uk_pages < keg->uk_maxpages) {
2960                                keg->uk_flags &= ~UMA_ZFLAG_FULL;
2961                                clearfull = 1;
2962                        }
2963
2964                        /*
2965                         * We can handle one more allocation. Since we're
2966                         * clearing ZFLAG_FULL, wake up all procs blocked
2967                         * on pages. This should be uncommon, so keeping this
2968                         * simple for now (rather than adding count of blocked
2969                         * threads etc).
2970                         */
2971                        wakeup(keg);
2972                }
2973        }
2974        KEG_UNLOCK(keg);
2975        if (clearfull) {
2976                ZONE_LOCK(zone);
2977                zone->uz_flags &= ~UMA_ZFLAG_FULL;
2978                wakeup(zone);
2979                ZONE_UNLOCK(zone);
2980        }
2981
2982}
2983
2984/*
2985 * Frees a single item to any zone.
2986 *
2987 * Arguments:
2988 *      zone   The zone to free to
2989 *      item   The item we're freeing
2990 *      udata  User supplied data for the dtor
2991 *      skip   Skip dtors and finis
2992 */
2993static void
2994zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
2995{
2996
2997#ifdef INVARIANTS
2998        if (skip == SKIP_NONE) {
2999                if (zone->uz_flags & UMA_ZONE_MALLOC)
3000                        uma_dbg_free(zone, udata, item);
3001                else
3002                        uma_dbg_free(zone, NULL, item);
3003        }
3004#endif
3005        if (skip < SKIP_DTOR && zone->uz_dtor)
3006                zone->uz_dtor(item, zone->uz_size, udata);
3007
3008        if (skip < SKIP_FINI && zone->uz_fini)
3009                zone->uz_fini(item, zone->uz_size);
3010
3011        atomic_add_long(&zone->uz_frees, 1);
3012        zone->uz_release(zone->uz_arg, &item, 1);
3013}
3014
3015/* See uma.h */
3016int
3017uma_zone_set_max(uma_zone_t zone, int nitems)
3018{
3019        uma_keg_t keg;
3020
3021        keg = zone_first_keg(zone);
3022        if (keg == NULL)
3023                return (0);
3024        KEG_LOCK(keg);
3025#ifdef __rtems__
3026#ifdef SMP
3027        /*
3028         * Ensure we have enough items to fill the per-processor caches.  This
3029         * is a heuristic approach and works not under all conditions.
3030         */
3031        nitems += 2 * BUCKET_MAX * (mp_maxid + 1);
3032#endif
3033#endif /* __rtems__ */
3034        keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
3035        if (keg->uk_maxpages * keg->uk_ipers < nitems)
3036                keg->uk_maxpages += keg->uk_ppera;
3037        nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
3038        KEG_UNLOCK(keg);
3039
3040        return (nitems);
3041}
3042
3043/* See uma.h */
3044int
3045uma_zone_get_max(uma_zone_t zone)
3046{
3047        int nitems;
3048        uma_keg_t keg;
3049
3050        keg = zone_first_keg(zone);
3051        if (keg == NULL)
3052                return (0);
3053        KEG_LOCK(keg);
3054        nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
3055        KEG_UNLOCK(keg);
3056
3057        return (nitems);
3058}
3059
3060/* See uma.h */
3061void
3062uma_zone_set_warning(uma_zone_t zone, const char *warning)
3063{
3064
3065        ZONE_LOCK(zone);
3066        zone->uz_warning = warning;
3067        ZONE_UNLOCK(zone);
3068}
3069
3070/* See uma.h */
3071void
3072uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction)
3073{
3074
3075        ZONE_LOCK(zone);
3076        TASK_INIT(&zone->uz_maxaction, 0, (task_fn_t *)maxaction, zone);
3077        ZONE_UNLOCK(zone);
3078}
3079
3080/* See uma.h */
3081int
3082uma_zone_get_cur(uma_zone_t zone)
3083{
3084        int64_t nitems;
3085        u_int i;
3086
3087        ZONE_LOCK(zone);
3088        nitems = zone->uz_allocs - zone->uz_frees;
3089        CPU_FOREACH(i) {
3090                /*
3091                 * See the comment in sysctl_vm_zone_stats() regarding the
3092                 * safety of accessing the per-cpu caches. With the zone lock
3093                 * held, it is safe, but can potentially result in stale data.
3094                 */
3095                nitems += zone->uz_cpu[i].uc_allocs -
3096                    zone->uz_cpu[i].uc_frees;
3097        }
3098        ZONE_UNLOCK(zone);
3099
3100        return (nitems < 0 ? 0 : nitems);
3101}
3102
3103/* See uma.h */
3104void
3105uma_zone_set_init(uma_zone_t zone, uma_init uminit)
3106{
3107        uma_keg_t keg;
3108
3109        keg = zone_first_keg(zone);
3110        KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
3111        KEG_LOCK(keg);
3112        KASSERT(keg->uk_pages == 0,
3113            ("uma_zone_set_init on non-empty keg"));
3114        keg->uk_init = uminit;
3115        KEG_UNLOCK(keg);
3116}
3117
3118/* See uma.h */
3119void
3120uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
3121{
3122        uma_keg_t keg;
3123
3124        keg = zone_first_keg(zone);
3125        KASSERT(keg != NULL, ("uma_zone_set_fini: Invalid zone type"));
3126        KEG_LOCK(keg);
3127        KASSERT(keg->uk_pages == 0,
3128            ("uma_zone_set_fini on non-empty keg"));
3129        keg->uk_fini = fini;
3130        KEG_UNLOCK(keg);
3131}
3132
3133/* See uma.h */
3134void
3135uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
3136{
3137
3138        ZONE_LOCK(zone);
3139        KASSERT(zone_first_keg(zone)->uk_pages == 0,
3140            ("uma_zone_set_zinit on non-empty keg"));
3141        zone->uz_init = zinit;
3142        ZONE_UNLOCK(zone);
3143}
3144
3145/* See uma.h */
3146void
3147uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
3148{
3149
3150        ZONE_LOCK(zone);
3151        KASSERT(zone_first_keg(zone)->uk_pages == 0,
3152            ("uma_zone_set_zfini on non-empty keg"));
3153        zone->uz_fini = zfini;
3154        ZONE_UNLOCK(zone);
3155}
3156
3157/* See uma.h */
3158/* XXX uk_freef is not actually used with the zone locked */
3159void
3160uma_zone_set_freef(uma_zone_t zone, uma_free freef)
3161{
3162        uma_keg_t keg;
3163
3164        keg = zone_first_keg(zone);
3165        KASSERT(keg != NULL, ("uma_zone_set_freef: Invalid zone type"));
3166        KEG_LOCK(keg);
3167        keg->uk_freef = freef;
3168        KEG_UNLOCK(keg);
3169}
3170
3171/* See uma.h */
3172/* XXX uk_allocf is not actually used with the zone locked */
3173void
3174uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
3175{
3176        uma_keg_t keg;
3177
3178        keg = zone_first_keg(zone);
3179        KEG_LOCK(keg);
3180        keg->uk_allocf = allocf;
3181        KEG_UNLOCK(keg);
3182}
3183
3184/* See uma.h */
3185void
3186uma_zone_reserve(uma_zone_t zone, int items)
3187{
3188        uma_keg_t keg;
3189
3190        keg = zone_first_keg(zone);
3191        if (keg == NULL)
3192                return;
3193        KEG_LOCK(keg);
3194        keg->uk_reserve = items;
3195        KEG_UNLOCK(keg);
3196
3197        return;
3198}
3199
3200#ifndef __rtems__
3201/* See uma.h */
3202int
3203uma_zone_reserve_kva(uma_zone_t zone, int count)
3204{
3205        uma_keg_t keg;
3206        vm_offset_t kva;
3207        u_int pages;
3208
3209        keg = zone_first_keg(zone);
3210        if (keg == NULL)
3211                return (0);
3212        pages = count / keg->uk_ipers;
3213
3214        if (pages * keg->uk_ipers < count)
3215                pages++;
3216        pages *= keg->uk_ppera;
3217
3218#ifdef UMA_MD_SMALL_ALLOC
3219        if (keg->uk_ppera > 1) {
3220#else
3221        if (1) {
3222#endif
3223                kva = kva_alloc((vm_size_t)pages * PAGE_SIZE);
3224                if (kva == 0)
3225                        return (0);
3226        } else
3227                kva = 0;
3228        KEG_LOCK(keg);
3229        keg->uk_kva = kva;
3230        keg->uk_offset = 0;
3231        keg->uk_maxpages = pages;
3232#ifdef UMA_MD_SMALL_ALLOC
3233        keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
3234#else
3235        keg->uk_allocf = noobj_alloc;
3236#endif
3237        keg->uk_flags |= UMA_ZONE_NOFREE;
3238        KEG_UNLOCK(keg);
3239
3240        return (1);
3241}
3242
3243/* See uma.h */
3244void
3245uma_prealloc(uma_zone_t zone, int items)
3246{
3247        int slabs;
3248        uma_slab_t slab;
3249        uma_keg_t keg;
3250
3251        keg = zone_first_keg(zone);
3252        if (keg == NULL)
3253                return;
3254        KEG_LOCK(keg);
3255        slabs = items / keg->uk_ipers;
3256        if (slabs * keg->uk_ipers < items)
3257                slabs++;
3258        while (slabs > 0) {
3259                slab = keg_alloc_slab(keg, zone, M_WAITOK);
3260                if (slab == NULL)
3261                        break;
3262                MPASS(slab->us_keg == keg);
3263                LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
3264                slabs--;
3265        }
3266        KEG_UNLOCK(keg);
3267}
3268#endif /* __rtems__ */
3269
3270/* See uma.h */
3271static void
3272uma_reclaim_locked(bool kmem_danger)
3273{
3274
3275#ifdef UMA_DEBUG
3276        printf("UMA: vm asked us to release pages!\n");
3277#endif
3278        sx_assert(&uma_drain_lock, SA_XLOCKED);
3279        bucket_enable();
3280        zone_foreach(zone_drain);
3281#ifndef __rtems__
3282        if (vm_page_count_min() || kmem_danger) {
3283                cache_drain_safe(NULL);
3284                zone_foreach(zone_drain);
3285        }
3286#endif /* __rtems__ */
3287        /*
3288         * Some slabs may have been freed but this zone will be visited early
3289         * we visit again so that we can free pages that are empty once other
3290         * zones are drained.  We have to do the same for buckets.
3291         */
3292        zone_drain(slabzone);
3293        bucket_zone_drain();
3294}
3295
3296void
3297uma_reclaim(void)
3298{
3299
3300        sx_xlock(&uma_drain_lock);
3301        uma_reclaim_locked(false);
3302        sx_xunlock(&uma_drain_lock);
3303}
3304
3305static int uma_reclaim_needed;
3306
3307void
3308uma_reclaim_wakeup(void)
3309{
3310
3311        uma_reclaim_needed = 1;
3312        wakeup(&uma_reclaim_needed);
3313}
3314
3315void
3316uma_reclaim_worker(void *arg __unused)
3317{
3318
3319        sx_xlock(&uma_drain_lock);
3320        for (;;) {
3321                sx_sleep(&uma_reclaim_needed, &uma_drain_lock, PVM,
3322                    "umarcl", 0);
3323                if (uma_reclaim_needed) {
3324                        uma_reclaim_needed = 0;
3325#ifndef __rtems__
3326                        sx_xunlock(&uma_drain_lock);
3327                        EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM);
3328                        sx_xlock(&uma_drain_lock);
3329#endif /* __rtems__ */
3330                        uma_reclaim_locked(true);
3331                }
3332        }
3333}
3334
3335/* See uma.h */
3336int
3337uma_zone_exhausted(uma_zone_t zone)
3338{
3339        int full;
3340
3341        ZONE_LOCK(zone);
3342        full = (zone->uz_flags & UMA_ZFLAG_FULL);
3343        ZONE_UNLOCK(zone);
3344        return (full); 
3345}
3346
3347int
3348uma_zone_exhausted_nolock(uma_zone_t zone)
3349{
3350        return (zone->uz_flags & UMA_ZFLAG_FULL);
3351}
3352
3353#ifndef __rtems__
3354void *
3355uma_large_malloc(vm_size_t size, int wait)
3356{
3357        void *mem;
3358        uma_slab_t slab;
3359        uint8_t flags;
3360
3361        slab = zone_alloc_item(slabzone, NULL, wait);
3362        if (slab == NULL)
3363                return (NULL);
3364        mem = page_alloc(NULL, size, &flags, wait);
3365        if (mem) {
3366                vsetslab((vm_offset_t)mem, slab);
3367                slab->us_data = mem;
3368                slab->us_flags = flags | UMA_SLAB_MALLOC;
3369                slab->us_size = size;
3370        } else {
3371                zone_free_item(slabzone, slab, NULL, SKIP_NONE);
3372        }
3373
3374        return (mem);
3375}
3376
3377void
3378uma_large_free(uma_slab_t slab)
3379{
3380
3381        page_free(slab->us_data, slab->us_size, slab->us_flags);
3382        zone_free_item(slabzone, slab, NULL, SKIP_NONE);
3383}
3384#endif /* __rtems__ */
3385
3386static void
3387uma_zero_item(void *item, uma_zone_t zone)
3388{
3389        int i;
3390
3391        if (zone->uz_flags & UMA_ZONE_PCPU) {
3392                CPU_FOREACH(i)
3393                        bzero(zpcpu_get_cpu(item, i), zone->uz_size);
3394        } else
3395                bzero(item, zone->uz_size);
3396}
3397
3398void
3399uma_print_stats(void)
3400{
3401        zone_foreach(uma_print_zone);
3402}
3403
3404static void
3405slab_print(uma_slab_t slab)
3406{
3407        printf("slab: keg %p, data %p, freecount %d\n",
3408                slab->us_keg, slab->us_data, slab->us_freecount);
3409}
3410
3411static void
3412cache_print(uma_cache_t cache)
3413{
3414        printf("alloc: %p(%d), free: %p(%d)\n",
3415                cache->uc_allocbucket,
3416                cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
3417                cache->uc_freebucket,
3418                cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
3419}
3420
3421static void
3422uma_print_keg(uma_keg_t keg)
3423{
3424        uma_slab_t slab;
3425
3426        printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
3427            "out %d free %d limit %d\n",
3428            keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
3429            keg->uk_ipers, keg->uk_ppera,
3430            (keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
3431            keg->uk_free, (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
3432        printf("Part slabs:\n");
3433        LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
3434                slab_print(slab);
3435        printf("Free slabs:\n");
3436        LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
3437                slab_print(slab);
3438        printf("Full slabs:\n");
3439        LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
3440                slab_print(slab);
3441}
3442
3443void
3444uma_print_zone(uma_zone_t zone)
3445{
3446        uma_cache_t cache;
3447        uma_klink_t kl;
3448        int i;
3449
3450        printf("zone: %s(%p) size %d flags %#x\n",
3451            zone->uz_name, zone, zone->uz_size, zone->uz_flags);
3452        LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
3453                uma_print_keg(kl->kl_keg);
3454        CPU_FOREACH(i) {
3455                cache = &zone->uz_cpu[i];
3456                printf("CPU %d Cache:\n", i);
3457                cache_print(cache);
3458        }
3459}
3460
3461#ifndef __rtems__
3462#ifdef DDB
3463/*
3464 * Generate statistics across both the zone and its per-cpu cache's.  Return
3465 * desired statistics if the pointer is non-NULL for that statistic.
3466 *
3467 * Note: does not update the zone statistics, as it can't safely clear the
3468 * per-CPU cache statistic.
3469 *
3470 * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
3471 * safe from off-CPU; we should modify the caches to track this information
3472 * directly so that we don't have to.
3473 */
3474static void
3475uma_zone_sumstat(uma_zone_t z, int *cachefreep, uint64_t *allocsp,
3476    uint64_t *freesp, uint64_t *sleepsp)
3477{
3478        uma_cache_t cache;
3479        uint64_t allocs, frees, sleeps;
3480        int cachefree, cpu;
3481
3482        allocs = frees = sleeps = 0;
3483        cachefree = 0;
3484        CPU_FOREACH(cpu) {
3485                cache = &z->uz_cpu[cpu];
3486                if (cache->uc_allocbucket != NULL)
3487                        cachefree += cache->uc_allocbucket->ub_cnt;
3488                if (cache->uc_freebucket != NULL)
3489                        cachefree += cache->uc_freebucket->ub_cnt;
3490                allocs += cache->uc_allocs;
3491                frees += cache->uc_frees;
3492        }
3493        allocs += z->uz_allocs;
3494        frees += z->uz_frees;
3495        sleeps += z->uz_sleeps;
3496        if (cachefreep != NULL)
3497                *cachefreep = cachefree;
3498        if (allocsp != NULL)
3499                *allocsp = allocs;
3500        if (freesp != NULL)
3501                *freesp = frees;
3502        if (sleepsp != NULL)
3503                *sleepsp = sleeps;
3504}
3505#endif /* DDB */
3506#endif /* __rtems__ */
3507
3508static int
3509sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
3510{
3511        uma_keg_t kz;
3512        uma_zone_t z;
3513        int count;
3514
3515        count = 0;
3516        rw_rlock(&uma_rwlock);
3517        LIST_FOREACH(kz, &uma_kegs, uk_link) {
3518                LIST_FOREACH(z, &kz->uk_zones, uz_link)
3519                        count++;
3520        }
3521        rw_runlock(&uma_rwlock);
3522        return (sysctl_handle_int(oidp, &count, 0, req));
3523}
3524
3525static int
3526sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
3527{
3528        struct uma_stream_header ush;
3529        struct uma_type_header uth;
3530        struct uma_percpu_stat ups;
3531        uma_bucket_t bucket;
3532        struct sbuf sbuf;
3533        uma_cache_t cache;
3534        uma_klink_t kl;
3535        uma_keg_t kz;
3536        uma_zone_t z;
3537        uma_keg_t k;
3538        int count, error, i;
3539
3540        error = sysctl_wire_old_buffer(req, 0);
3541        if (error != 0)
3542                return (error);
3543        sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
3544        sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL);
3545
3546        count = 0;
3547        rw_rlock(&uma_rwlock);
3548        LIST_FOREACH(kz, &uma_kegs, uk_link) {
3549                LIST_FOREACH(z, &kz->uk_zones, uz_link)
3550                        count++;
3551        }
3552
3553        /*
3554         * Insert stream header.
3555         */
3556        bzero(&ush, sizeof(ush));
3557        ush.ush_version = UMA_STREAM_VERSION;
3558        ush.ush_maxcpus = (mp_maxid + 1);
3559        ush.ush_count = count;
3560        (void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
3561
3562        LIST_FOREACH(kz, &uma_kegs, uk_link) {
3563                LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3564                        bzero(&uth, sizeof(uth));
3565                        ZONE_LOCK(z);
3566                        strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
3567                        uth.uth_align = kz->uk_align;
3568                        uth.uth_size = kz->uk_size;
3569                        uth.uth_rsize = kz->uk_rsize;
3570                        LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
3571                                k = kl->kl_keg;
3572                                uth.uth_maxpages += k->uk_maxpages;
3573                                uth.uth_pages += k->uk_pages;
3574                                uth.uth_keg_free += k->uk_free;
3575                                uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
3576                                    * k->uk_ipers;
3577                        }
3578
3579                        /*
3580                         * A zone is secondary is it is not the first entry
3581                         * on the keg's zone list.
3582                         */
3583                        if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
3584                            (LIST_FIRST(&kz->uk_zones) != z))
3585                                uth.uth_zone_flags = UTH_ZONE_SECONDARY;
3586
3587                        LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3588                                uth.uth_zone_free += bucket->ub_cnt;
3589                        uth.uth_allocs = z->uz_allocs;
3590                        uth.uth_frees = z->uz_frees;
3591                        uth.uth_fails = z->uz_fails;
3592                        uth.uth_sleeps = z->uz_sleeps;
3593                        (void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
3594                        /*
3595                         * While it is not normally safe to access the cache
3596                         * bucket pointers while not on the CPU that owns the
3597                         * cache, we only allow the pointers to be exchanged
3598                         * without the zone lock held, not invalidated, so
3599                         * accept the possible race associated with bucket
3600                         * exchange during monitoring.
3601                         */
3602                        for (i = 0; i < (mp_maxid + 1); i++) {
3603                                bzero(&ups, sizeof(ups));
3604                                if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
3605                                        goto skip;
3606                                if (CPU_ABSENT(i))
3607                                        goto skip;
3608                                cache = &z->uz_cpu[i];
3609                                if (cache->uc_allocbucket != NULL)
3610                                        ups.ups_cache_free +=
3611                                            cache->uc_allocbucket->ub_cnt;
3612                                if (cache->uc_freebucket != NULL)
3613                                        ups.ups_cache_free +=
3614                                            cache->uc_freebucket->ub_cnt;
3615                                ups.ups_allocs = cache->uc_allocs;
3616                                ups.ups_frees = cache->uc_frees;
3617skip:
3618                                (void)sbuf_bcat(&sbuf, &ups, sizeof(ups));
3619                        }
3620                        ZONE_UNLOCK(z);
3621                }
3622        }
3623        rw_runlock(&uma_rwlock);
3624        error = sbuf_finish(&sbuf);
3625        sbuf_delete(&sbuf);
3626        return (error);
3627}
3628
3629int
3630sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS)
3631{
3632        uma_zone_t zone = *(uma_zone_t *)arg1;
3633        int error, max;
3634
3635        max = uma_zone_get_max(zone);
3636        error = sysctl_handle_int(oidp, &max, 0, req);
3637        if (error || !req->newptr)
3638                return (error);
3639
3640        uma_zone_set_max(zone, max);
3641
3642        return (0);
3643}
3644
3645int
3646sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS)
3647{
3648        uma_zone_t zone = *(uma_zone_t *)arg1;
3649        int cur;
3650
3651        cur = uma_zone_get_cur(zone);
3652        return (sysctl_handle_int(oidp, &cur, 0, req));
3653}
3654
3655#ifdef INVARIANTS
3656static uma_slab_t
3657uma_dbg_getslab(uma_zone_t zone, void *item)
3658{
3659        uma_slab_t slab;
3660        uma_keg_t keg;
3661        uint8_t *mem;
3662
3663        mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
3664        if (zone->uz_flags & UMA_ZONE_VTOSLAB) {
3665                slab = vtoslab((vm_offset_t)mem);
3666        } else {
3667                /*
3668                 * It is safe to return the slab here even though the
3669                 * zone is unlocked because the item's allocation state
3670                 * essentially holds a reference.
3671                 */
3672                ZONE_LOCK(zone);
3673                keg = LIST_FIRST(&zone->uz_kegs)->kl_keg;
3674                if (keg->uk_flags & UMA_ZONE_HASH)
3675                        slab = hash_sfind(&keg->uk_hash, mem);
3676                else
3677                        slab = (uma_slab_t)(mem + keg->uk_pgoff);
3678                ZONE_UNLOCK(zone);
3679        }
3680
3681        return (slab);
3682}
3683
3684/*
3685 * Set up the slab's freei data such that uma_dbg_free can function.
3686 *
3687 */
3688static void
3689uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
3690{
3691        uma_keg_t keg;
3692        int freei;
3693
3694        if (zone_first_keg(zone) == NULL)
3695                return;
3696        if (slab == NULL) {
3697                slab = uma_dbg_getslab(zone, item);
3698                if (slab == NULL)
3699                        panic("uma: item %p did not belong to zone %s\n",
3700                            item, zone->uz_name);
3701        }
3702        keg = slab->us_keg;
3703        freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
3704
3705        if (BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
3706                panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)\n",
3707                    item, zone, zone->uz_name, slab, freei);
3708        BIT_SET_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
3709
3710        return;
3711}
3712
3713/*
3714 * Verifies freed addresses.  Checks for alignment, valid slab membership
3715 * and duplicate frees.
3716 *
3717 */
3718static void
3719uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
3720{
3721        uma_keg_t keg;
3722        int freei;
3723
3724        if (zone_first_keg(zone) == NULL)
3725                return;
3726        if (slab == NULL) {
3727                slab = uma_dbg_getslab(zone, item);
3728                if (slab == NULL)
3729                        panic("uma: Freed item %p did not belong to zone %s\n",
3730                            item, zone->uz_name);
3731        }
3732        keg = slab->us_keg;
3733        freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
3734
3735        if (freei >= keg->uk_ipers)
3736                panic("Invalid free of %p from zone %p(%s) slab %p(%d)\n",
3737                    item, zone, zone->uz_name, slab, freei);
3738
3739        if (((freei * keg->uk_rsize) + slab->us_data) != item)
3740                panic("Unaligned free of %p from zone %p(%s) slab %p(%d)\n",
3741                    item, zone, zone->uz_name, slab, freei);
3742
3743        if (!BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
3744                panic("Duplicate free of %p from zone %p(%s) slab %p(%d)\n",
3745                    item, zone, zone->uz_name, slab, freei);
3746
3747        BIT_CLR_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
3748}
3749#endif /* INVARIANTS */
3750
3751#ifndef __rtems__
3752#ifdef DDB
3753DB_SHOW_COMMAND(uma, db_show_uma)
3754{
3755        uint64_t allocs, frees, sleeps;
3756        uma_bucket_t bucket;
3757        uma_keg_t kz;
3758        uma_zone_t z;
3759        int cachefree;
3760
3761        db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used",
3762            "Free", "Requests", "Sleeps", "Bucket");
3763        LIST_FOREACH(kz, &uma_kegs, uk_link) {
3764                LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3765                        if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
3766                                allocs = z->uz_allocs;
3767                                frees = z->uz_frees;
3768                                sleeps = z->uz_sleeps;
3769                                cachefree = 0;
3770                        } else
3771                                uma_zone_sumstat(z, &cachefree, &allocs,
3772                                    &frees, &sleeps);
3773                        if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
3774                            (LIST_FIRST(&kz->uk_zones) != z)))
3775                                cachefree += kz->uk_free;
3776                        LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3777                                cachefree += bucket->ub_cnt;
3778                        db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n",
3779                            z->uz_name, (uintmax_t)kz->uk_size,
3780                            (intmax_t)(allocs - frees), cachefree,
3781                            (uintmax_t)allocs, sleeps, z->uz_count);
3782                        if (db_pager_quit)
3783                                return;
3784                }
3785        }
3786}
3787
3788DB_SHOW_COMMAND(umacache, db_show_umacache)
3789{
3790        uint64_t allocs, frees;
3791        uma_bucket_t bucket;
3792        uma_zone_t z;
3793        int cachefree;
3794
3795        db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
3796            "Requests", "Bucket");
3797        LIST_FOREACH(z, &uma_cachezones, uz_link) {
3798                uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL);
3799                LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3800                        cachefree += bucket->ub_cnt;
3801                db_printf("%18s %8ju %8jd %8d %12ju %8u\n",
3802                    z->uz_name, (uintmax_t)z->uz_size,
3803                    (intmax_t)(allocs - frees), cachefree,
3804                    (uintmax_t)allocs, z->uz_count);
3805                if (db_pager_quit)
3806                        return;
3807        }
3808}
3809#endif  /* DDB */
3810#endif /* __rtems__ */
Note: See TracBrowser for help on using the repository browser.