source: rtems-libbsd/freebsd/sys/vm/uma_core.c @ b988014

55-freebsd-126-freebsd-12
Last change on this file since b988014 was b988014, checked in by Sebastian Huber <sebastian.huber@…>, on 04/26/16 at 13:02:00

ZONE(9): Fix uma_zone_set_max()

Account for items provided for per-processor caches.

  • Property mode set to 100644
File size: 91.3 KB
Line 
1#include <machine/rtems-bsd-kernel-space.h>
2
3/*-
4 * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
5 * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
6 * Copyright (c) 2004-2006 Robert N. M. Watson
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice unmodified, this list of conditions, and the following
14 *    disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31/*
32 * uma_core.c  Implementation of the Universal Memory allocator
33 *
34 * This allocator is intended to replace the multitude of similar object caches
35 * in the standard FreeBSD kernel.  The intent is to be flexible as well as
36 * efficient.  A primary design goal is to return unused memory to the rest of
37 * the system.  This will make the system as a whole more flexible due to the
38 * ability to move memory to subsystems which most need it instead of leaving
39 * pools of reserved memory unused.
40 *
41 * The basic ideas stem from similar slab/zone based allocators whose algorithms
42 * are well known.
43 *
44 */
45
46/*
47 * TODO:
48 *      - Improve memory usage for large allocations
49 *      - Investigate cache size adjustments
50 */
51
52#include <sys/cdefs.h>
53__FBSDID("$FreeBSD$");
54
55/* I should really use ktr.. */
56/*
57#define UMA_DEBUG 1
58#define UMA_DEBUG_ALLOC 1
59#define UMA_DEBUG_ALLOC_1 1
60*/
61
62#include <rtems/bsd/local/opt_ddb.h>
63#include <rtems/bsd/local/opt_param.h>
64#include <rtems/bsd/local/opt_vm.h>
65
66#include <sys/param.h>
67#include <sys/systm.h>
68#include <sys/bitset.h>
69#include <sys/eventhandler.h>
70#include <sys/kernel.h>
71#include <sys/types.h>
72#include <sys/queue.h>
73#include <sys/malloc.h>
74#include <sys/ktr.h>
75#include <sys/lock.h>
76#include <sys/sysctl.h>
77#include <sys/mutex.h>
78#include <sys/proc.h>
79#include <sys/random.h>
80#include <sys/rwlock.h>
81#include <sys/sbuf.h>
82#include <sys/sched.h>
83#include <sys/smp.h>
84#include <sys/taskqueue.h>
85#include <sys/vmmeter.h>
86
87#include <vm/vm.h>
88#include <vm/vm_object.h>
89#include <vm/vm_page.h>
90#include <vm/vm_pageout.h>
91#include <vm/vm_param.h>
92#include <vm/vm_map.h>
93#include <vm/vm_kern.h>
94#include <vm/vm_extern.h>
95#include <vm/uma.h>
96#include <vm/uma_int.h>
97#include <vm/uma_dbg.h>
98
99#include <ddb/ddb.h>
100#ifdef __rtems__
101  #ifdef RTEMS_SMP
102    /*
103     * It is essential that we have a per-processor cache, otherwise the
104     * critical_enter()/critical_exit() protection would be insufficient.
105     */
106    #undef curcpu
107    #define curcpu rtems_get_current_processor()
108    #undef mp_maxid
109    #define mp_maxid (rtems_get_processor_count() - 1)
110    #define SMP
111  #endif
112#endif /* __rtems__ */
113
114#ifdef DEBUG_MEMGUARD
115#include <vm/memguard.h>
116#endif
117
118/*
119 * This is the zone and keg from which all zones are spawned.  The idea is that
120 * even the zone & keg heads are allocated from the allocator, so we use the
121 * bss section to bootstrap us.
122 */
123static struct uma_keg masterkeg;
124static struct uma_zone masterzone_k;
125static struct uma_zone masterzone_z;
126static uma_zone_t kegs = &masterzone_k;
127static uma_zone_t zones = &masterzone_z;
128
129/* This is the zone from which all of uma_slab_t's are allocated. */
130static uma_zone_t slabzone;
131
132/*
133 * The initial hash tables come out of this zone so they can be allocated
134 * prior to malloc coming up.
135 */
136static uma_zone_t hashzone;
137
138/* The boot-time adjusted value for cache line alignment. */
139int uma_align_cache = 64 - 1;
140
141static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
142
143#ifndef __rtems__
144/*
145 * Are we allowed to allocate buckets?
146 */
147static int bucketdisable = 1;
148#else /* __rtems__ */
149#define bucketdisable 0
150#endif /* __rtems__ */
151
152/* Linked list of all kegs in the system */
153static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
154
155/* Linked list of all cache-only zones in the system */
156static LIST_HEAD(,uma_zone) uma_cachezones =
157    LIST_HEAD_INITIALIZER(uma_cachezones);
158
159/* This RW lock protects the keg list */
160static struct rwlock_padalign uma_rwlock;
161
162#ifndef __rtems__
163/* Linked list of boot time pages */
164static LIST_HEAD(,uma_slab) uma_boot_pages =
165    LIST_HEAD_INITIALIZER(uma_boot_pages);
166
167/* This mutex protects the boot time pages list */
168static struct mtx_padalign uma_boot_pages_mtx;
169#endif /* __rtems__ */
170
171static struct sx uma_drain_lock;
172
173#ifndef __rtems__
174/* Is the VM done starting up? */
175static int booted = 0;
176#define UMA_STARTUP     1
177#define UMA_STARTUP2    2
178#endif /* __rtems__ */
179
180/*
181 * This is the handle used to schedule events that need to happen
182 * outside of the allocation fast path.
183 */
184static struct callout uma_callout;
185#define UMA_TIMEOUT     20              /* Seconds for callout interval. */
186
187/*
188 * This structure is passed as the zone ctor arg so that I don't have to create
189 * a special allocation function just for zones.
190 */
191struct uma_zctor_args {
192        const char *name;
193        size_t size;
194        uma_ctor ctor;
195        uma_dtor dtor;
196        uma_init uminit;
197        uma_fini fini;
198        uma_import import;
199        uma_release release;
200        void *arg;
201        uma_keg_t keg;
202        int align;
203        uint32_t flags;
204};
205
206struct uma_kctor_args {
207        uma_zone_t zone;
208        size_t size;
209        uma_init uminit;
210        uma_fini fini;
211        int align;
212        uint32_t flags;
213};
214
215struct uma_bucket_zone {
216        uma_zone_t      ubz_zone;
217        char            *ubz_name;
218        int             ubz_entries;    /* Number of items it can hold. */
219        int             ubz_maxsize;    /* Maximum allocation size per-item. */
220};
221
222/*
223 * Compute the actual number of bucket entries to pack them in power
224 * of two sizes for more efficient space utilization.
225 */
226#define BUCKET_SIZE(n)                                          \
227    (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
228
229#define BUCKET_MAX      BUCKET_SIZE(256)
230
231struct uma_bucket_zone bucket_zones[] = {
232        { NULL, "4 Bucket", BUCKET_SIZE(4), 4096 },
233        { NULL, "6 Bucket", BUCKET_SIZE(6), 3072 },
234        { NULL, "8 Bucket", BUCKET_SIZE(8), 2048 },
235        { NULL, "12 Bucket", BUCKET_SIZE(12), 1536 },
236        { NULL, "16 Bucket", BUCKET_SIZE(16), 1024 },
237        { NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
238        { NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
239        { NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
240        { NULL, "256 Bucket", BUCKET_SIZE(256), 64 },
241        { NULL, NULL, 0}
242};
243
244/*
245 * Flags and enumerations to be passed to internal functions.
246 */
247enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI };
248
249/* Prototypes.. */
250
251#ifndef __rtems__
252static void *noobj_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
253#endif /* __rtems__ */
254static void *page_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
255#ifndef __rtems__
256static void *startup_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
257#endif /* __rtems__ */
258static void page_free(void *, vm_size_t, uint8_t);
259static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
260static void cache_drain(uma_zone_t);
261static void bucket_drain(uma_zone_t, uma_bucket_t);
262static void bucket_cache_drain(uma_zone_t zone);
263static int keg_ctor(void *, int, void *, int);
264static void keg_dtor(void *, int, void *);
265static int zone_ctor(void *, int, void *, int);
266static void zone_dtor(void *, int, void *);
267static int zero_init(void *, int, int);
268static void keg_small_init(uma_keg_t keg);
269static void keg_large_init(uma_keg_t keg);
270static void zone_foreach(void (*zfunc)(uma_zone_t));
271static void zone_timeout(uma_zone_t zone);
272static int hash_alloc(struct uma_hash *);
273static int hash_expand(struct uma_hash *, struct uma_hash *);
274static void hash_free(struct uma_hash *hash);
275static void uma_timeout(void *);
276static void uma_startup3(void);
277static void *zone_alloc_item(uma_zone_t, void *, int);
278static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
279static void bucket_enable(void);
280static void bucket_init(void);
281static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
282static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
283static void bucket_zone_drain(void);
284static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, void *, int flags);
285static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
286#ifndef __rtems__
287static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
288#endif /* __rtems__ */
289static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
290static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
291static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
292    uma_fini fini, int align, uint32_t flags);
293static int zone_import(uma_zone_t zone, void **bucket, int max, int flags);
294static void zone_release(uma_zone_t zone, void **bucket, int cnt);
295static void uma_zero_item(void *item, uma_zone_t zone);
296
297void uma_print_zone(uma_zone_t);
298void uma_print_stats(void);
299static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
300static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
301
302#ifdef INVARIANTS
303static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item);
304static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item);
305#endif
306
307SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
308
309SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
310    0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
311
312SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
313    0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
314
315static int zone_warnings = 1;
316SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0,
317    "Warn when UMA zones becomes full");
318
319/*
320 * This routine checks to see whether or not it's safe to enable buckets.
321 */
322static void
323bucket_enable(void)
324{
325#ifndef __rtems__
326        bucketdisable = vm_page_count_min();
327#endif /* __rtems__ */
328}
329
330/*
331 * Initialize bucket_zones, the array of zones of buckets of various sizes.
332 *
333 * For each zone, calculate the memory required for each bucket, consisting
334 * of the header and an array of pointers.
335 */
336static void
337bucket_init(void)
338{
339        struct uma_bucket_zone *ubz;
340        int size;
341
342        for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
343                size = roundup(sizeof(struct uma_bucket), sizeof(void *));
344                size += sizeof(void *) * ubz->ubz_entries;
345                ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
346                    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
347                    UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET);
348        }
349}
350
351/*
352 * Given a desired number of entries for a bucket, return the zone from which
353 * to allocate the bucket.
354 */
355static struct uma_bucket_zone *
356bucket_zone_lookup(int entries)
357{
358        struct uma_bucket_zone *ubz;
359
360        for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
361                if (ubz->ubz_entries >= entries)
362                        return (ubz);
363        ubz--;
364        return (ubz);
365}
366
367static int
368bucket_select(int size)
369{
370        struct uma_bucket_zone *ubz;
371
372        ubz = &bucket_zones[0];
373        if (size > ubz->ubz_maxsize)
374                return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
375
376        for (; ubz->ubz_entries != 0; ubz++)
377                if (ubz->ubz_maxsize < size)
378                        break;
379        ubz--;
380        return (ubz->ubz_entries);
381}
382
383static uma_bucket_t
384bucket_alloc(uma_zone_t zone, void *udata, int flags)
385{
386        struct uma_bucket_zone *ubz;
387        uma_bucket_t bucket;
388
389#ifndef __rtems__
390        /*
391         * This is to stop us from allocating per cpu buckets while we're
392         * running out of vm.boot_pages.  Otherwise, we would exhaust the
393         * boot pages.  This also prevents us from allocating buckets in
394         * low memory situations.
395         */
396        if (bucketdisable)
397                return (NULL);
398#endif /* __rtems__ */
399        /*
400         * To limit bucket recursion we store the original zone flags
401         * in a cookie passed via zalloc_arg/zfree_arg.  This allows the
402         * NOVM flag to persist even through deep recursions.  We also
403         * store ZFLAG_BUCKET once we have recursed attempting to allocate
404         * a bucket for a bucket zone so we do not allow infinite bucket
405         * recursion.  This cookie will even persist to frees of unused
406         * buckets via the allocation path or bucket allocations in the
407         * free path.
408         */
409        if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
410                udata = (void *)(uintptr_t)zone->uz_flags;
411        else {
412                if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
413                        return (NULL);
414                udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET);
415        }
416        if ((uintptr_t)udata & UMA_ZFLAG_CACHEONLY)
417                flags |= M_NOVM;
418        ubz = bucket_zone_lookup(zone->uz_count);
419        if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0)
420                ubz++;
421        bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags);
422        if (bucket) {
423#ifdef INVARIANTS
424                bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
425#endif
426                bucket->ub_cnt = 0;
427                bucket->ub_entries = ubz->ubz_entries;
428        }
429
430        return (bucket);
431}
432
433static void
434bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
435{
436        struct uma_bucket_zone *ubz;
437
438        KASSERT(bucket->ub_cnt == 0,
439            ("bucket_free: Freeing a non free bucket."));
440        if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
441                udata = (void *)(uintptr_t)zone->uz_flags;
442        ubz = bucket_zone_lookup(bucket->ub_entries);
443        uma_zfree_arg(ubz->ubz_zone, bucket, udata);
444}
445
446static void
447bucket_zone_drain(void)
448{
449        struct uma_bucket_zone *ubz;
450
451        for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
452                zone_drain(ubz->ubz_zone);
453}
454
455static void
456zone_log_warning(uma_zone_t zone)
457{
458        static const struct timeval warninterval = { 300, 0 };
459
460        if (!zone_warnings || zone->uz_warning == NULL)
461                return;
462
463        if (ratecheck(&zone->uz_ratecheck, &warninterval))
464                printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
465}
466
467static inline void
468zone_maxaction(uma_zone_t zone)
469{
470
471        if (zone->uz_maxaction.ta_func != NULL)
472                taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction);
473}
474
475static void
476zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
477{
478        uma_klink_t klink;
479
480        LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
481                kegfn(klink->kl_keg);
482}
483
484/*
485 * Routine called by timeout which is used to fire off some time interval
486 * based calculations.  (stats, hash size, etc.)
487 *
488 * Arguments:
489 *      arg   Unused
490 *
491 * Returns:
492 *      Nothing
493 */
494static void
495uma_timeout(void *unused)
496{
497        bucket_enable();
498        zone_foreach(zone_timeout);
499
500        /* Reschedule this event */
501        callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
502}
503
504/*
505 * Routine to perform timeout driven calculations.  This expands the
506 * hashes and does per cpu statistics aggregation.
507 *
508 *  Returns nothing.
509 */
510static void
511keg_timeout(uma_keg_t keg)
512{
513
514        KEG_LOCK(keg);
515        /*
516         * Expand the keg hash table.
517         *
518         * This is done if the number of slabs is larger than the hash size.
519         * What I'm trying to do here is completely reduce collisions.  This
520         * may be a little aggressive.  Should I allow for two collisions max?
521         */
522        if (keg->uk_flags & UMA_ZONE_HASH &&
523            keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
524                struct uma_hash newhash;
525                struct uma_hash oldhash;
526                int ret;
527
528                /*
529                 * This is so involved because allocating and freeing
530                 * while the keg lock is held will lead to deadlock.
531                 * I have to do everything in stages and check for
532                 * races.
533                 */
534                newhash = keg->uk_hash;
535                KEG_UNLOCK(keg);
536                ret = hash_alloc(&newhash);
537                KEG_LOCK(keg);
538                if (ret) {
539                        if (hash_expand(&keg->uk_hash, &newhash)) {
540                                oldhash = keg->uk_hash;
541                                keg->uk_hash = newhash;
542                        } else
543                                oldhash = newhash;
544
545                        KEG_UNLOCK(keg);
546                        hash_free(&oldhash);
547                        return;
548                }
549        }
550        KEG_UNLOCK(keg);
551}
552
553static void
554zone_timeout(uma_zone_t zone)
555{
556
557        zone_foreach_keg(zone, &keg_timeout);
558}
559
560/*
561 * Allocate and zero fill the next sized hash table from the appropriate
562 * backing store.
563 *
564 * Arguments:
565 *      hash  A new hash structure with the old hash size in uh_hashsize
566 *
567 * Returns:
568 *      1 on success and 0 on failure.
569 */
570static int
571hash_alloc(struct uma_hash *hash)
572{
573        int oldsize;
574        int alloc;
575
576        oldsize = hash->uh_hashsize;
577
578        /* We're just going to go to a power of two greater */
579        if (oldsize)  {
580                hash->uh_hashsize = oldsize * 2;
581                alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
582                hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
583                    M_UMAHASH, M_NOWAIT);
584        } else {
585                alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
586                hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
587                    M_WAITOK);
588                hash->uh_hashsize = UMA_HASH_SIZE_INIT;
589        }
590        if (hash->uh_slab_hash) {
591                bzero(hash->uh_slab_hash, alloc);
592                hash->uh_hashmask = hash->uh_hashsize - 1;
593                return (1);
594        }
595
596        return (0);
597}
598
599/*
600 * Expands the hash table for HASH zones.  This is done from zone_timeout
601 * to reduce collisions.  This must not be done in the regular allocation
602 * path, otherwise, we can recurse on the vm while allocating pages.
603 *
604 * Arguments:
605 *      oldhash  The hash you want to expand
606 *      newhash  The hash structure for the new table
607 *
608 * Returns:
609 *      Nothing
610 *
611 * Discussion:
612 */
613static int
614hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
615{
616        uma_slab_t slab;
617        int hval;
618        int i;
619
620        if (!newhash->uh_slab_hash)
621                return (0);
622
623        if (oldhash->uh_hashsize >= newhash->uh_hashsize)
624                return (0);
625
626        /*
627         * I need to investigate hash algorithms for resizing without a
628         * full rehash.
629         */
630
631        for (i = 0; i < oldhash->uh_hashsize; i++)
632                while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
633                        slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
634                        SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
635                        hval = UMA_HASH(newhash, slab->us_data);
636                        SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
637                            slab, us_hlink);
638                }
639
640        return (1);
641}
642
643/*
644 * Free the hash bucket to the appropriate backing store.
645 *
646 * Arguments:
647 *      slab_hash  The hash bucket we're freeing
648 *      hashsize   The number of entries in that hash bucket
649 *
650 * Returns:
651 *      Nothing
652 */
653static void
654hash_free(struct uma_hash *hash)
655{
656        if (hash->uh_slab_hash == NULL)
657                return;
658        if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
659                zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
660        else
661                free(hash->uh_slab_hash, M_UMAHASH);
662}
663
664/*
665 * Frees all outstanding items in a bucket
666 *
667 * Arguments:
668 *      zone   The zone to free to, must be unlocked.
669 *      bucket The free/alloc bucket with items, cpu queue must be locked.
670 *
671 * Returns:
672 *      Nothing
673 */
674
675static void
676bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
677{
678        int i;
679
680        if (bucket == NULL)
681                return;
682
683        if (zone->uz_fini)
684                for (i = 0; i < bucket->ub_cnt; i++)
685                        zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
686        zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
687        bucket->ub_cnt = 0;
688}
689
690/*
691 * Drains the per cpu caches for a zone.
692 *
693 * NOTE: This may only be called while the zone is being turn down, and not
694 * during normal operation.  This is necessary in order that we do not have
695 * to migrate CPUs to drain the per-CPU caches.
696 *
697 * Arguments:
698 *      zone     The zone to drain, must be unlocked.
699 *
700 * Returns:
701 *      Nothing
702 */
703static void
704cache_drain(uma_zone_t zone)
705{
706        uma_cache_t cache;
707        int cpu;
708
709        /*
710         * XXX: It is safe to not lock the per-CPU caches, because we're
711         * tearing down the zone anyway.  I.e., there will be no further use
712         * of the caches at this point.
713         *
714         * XXX: It would good to be able to assert that the zone is being
715         * torn down to prevent improper use of cache_drain().
716         *
717         * XXX: We lock the zone before passing into bucket_cache_drain() as
718         * it is used elsewhere.  Should the tear-down path be made special
719         * there in some form?
720         */
721        CPU_FOREACH(cpu) {
722                cache = &zone->uz_cpu[cpu];
723                bucket_drain(zone, cache->uc_allocbucket);
724                bucket_drain(zone, cache->uc_freebucket);
725                if (cache->uc_allocbucket != NULL)
726                        bucket_free(zone, cache->uc_allocbucket, NULL);
727                if (cache->uc_freebucket != NULL)
728                        bucket_free(zone, cache->uc_freebucket, NULL);
729                cache->uc_allocbucket = cache->uc_freebucket = NULL;
730        }
731        ZONE_LOCK(zone);
732        bucket_cache_drain(zone);
733        ZONE_UNLOCK(zone);
734}
735
736static void
737cache_shrink(uma_zone_t zone)
738{
739
740        if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
741                return;
742
743        ZONE_LOCK(zone);
744        zone->uz_count = (zone->uz_count_min + zone->uz_count) / 2;
745        ZONE_UNLOCK(zone);
746}
747
748static void
749cache_drain_safe_cpu(uma_zone_t zone)
750{
751        uma_cache_t cache;
752        uma_bucket_t b1, b2;
753
754        if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
755                return;
756
757        b1 = b2 = NULL;
758        ZONE_LOCK(zone);
759        critical_enter();
760        cache = &zone->uz_cpu[curcpu];
761        if (cache->uc_allocbucket) {
762                if (cache->uc_allocbucket->ub_cnt != 0)
763                        LIST_INSERT_HEAD(&zone->uz_buckets,
764                            cache->uc_allocbucket, ub_link);
765                else
766                        b1 = cache->uc_allocbucket;
767                cache->uc_allocbucket = NULL;
768        }
769        if (cache->uc_freebucket) {
770                if (cache->uc_freebucket->ub_cnt != 0)
771                        LIST_INSERT_HEAD(&zone->uz_buckets,
772                            cache->uc_freebucket, ub_link);
773                else
774                        b2 = cache->uc_freebucket;
775                cache->uc_freebucket = NULL;
776        }
777        critical_exit();
778        ZONE_UNLOCK(zone);
779        if (b1)
780                bucket_free(zone, b1, NULL);
781        if (b2)
782                bucket_free(zone, b2, NULL);
783}
784
785#ifndef __rtems__
786/*
787 * Safely drain per-CPU caches of a zone(s) to alloc bucket.
788 * This is an expensive call because it needs to bind to all CPUs
789 * one by one and enter a critical section on each of them in order
790 * to safely access their cache buckets.
791 * Zone lock must not be held on call this function.
792 */
793static void
794cache_drain_safe(uma_zone_t zone)
795{
796        int cpu;
797
798        /*
799         * Polite bucket sizes shrinking was not enouth, shrink aggressively.
800         */
801        if (zone)
802                cache_shrink(zone);
803        else
804                zone_foreach(cache_shrink);
805
806        CPU_FOREACH(cpu) {
807                thread_lock(curthread);
808                sched_bind(curthread, cpu);
809                thread_unlock(curthread);
810
811                if (zone)
812                        cache_drain_safe_cpu(zone);
813                else
814                        zone_foreach(cache_drain_safe_cpu);
815        }
816        thread_lock(curthread);
817        sched_unbind(curthread);
818        thread_unlock(curthread);
819}
820#endif /* __rtems__ */
821
822/*
823 * Drain the cached buckets from a zone.  Expects a locked zone on entry.
824 */
825static void
826bucket_cache_drain(uma_zone_t zone)
827{
828        uma_bucket_t bucket;
829
830        /*
831         * Drain the bucket queues and free the buckets, we just keep two per
832         * cpu (alloc/free).
833         */
834        while ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
835                LIST_REMOVE(bucket, ub_link);
836                ZONE_UNLOCK(zone);
837                bucket_drain(zone, bucket);
838                bucket_free(zone, bucket, NULL);
839                ZONE_LOCK(zone);
840        }
841
842        /*
843         * Shrink further bucket sizes.  Price of single zone lock collision
844         * is probably lower then price of global cache drain.
845         */
846        if (zone->uz_count > zone->uz_count_min)
847                zone->uz_count--;
848}
849
850static void
851keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
852{
853        uint8_t *mem;
854        int i;
855        uint8_t flags;
856
857        mem = slab->us_data;
858        flags = slab->us_flags;
859        i = start;
860        if (keg->uk_fini != NULL) {
861                for (i--; i > -1; i--)
862                        keg->uk_fini(slab->us_data + (keg->uk_rsize * i),
863                            keg->uk_size);
864        }
865        if (keg->uk_flags & UMA_ZONE_OFFPAGE)
866                zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
867#ifdef UMA_DEBUG
868        printf("%s: Returning %d bytes.\n", keg->uk_name,
869            PAGE_SIZE * keg->uk_ppera);
870#endif
871        keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
872}
873
874/*
875 * Frees pages from a keg back to the system.  This is done on demand from
876 * the pageout daemon.
877 *
878 * Returns nothing.
879 */
880static void
881keg_drain(uma_keg_t keg)
882{
883        struct slabhead freeslabs = { 0 };
884        uma_slab_t slab, tmp;
885
886        /*
887         * We don't want to take pages from statically allocated kegs at this
888         * time
889         */
890        if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
891                return;
892
893#ifdef UMA_DEBUG
894        printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
895#endif
896        KEG_LOCK(keg);
897        if (keg->uk_free == 0)
898                goto finished;
899
900        LIST_FOREACH_SAFE(slab, &keg->uk_free_slab, us_link, tmp) {
901#ifndef __rtems__
902                /* We have nowhere to free these to. */
903                if (slab->us_flags & UMA_SLAB_BOOT)
904                        continue;
905#endif /* __rtems__ */
906
907                LIST_REMOVE(slab, us_link);
908                keg->uk_pages -= keg->uk_ppera;
909                keg->uk_free -= keg->uk_ipers;
910
911                if (keg->uk_flags & UMA_ZONE_HASH)
912                        UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
913
914                SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
915        }
916finished:
917        KEG_UNLOCK(keg);
918
919        while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
920                SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
921                keg_free_slab(keg, slab, keg->uk_ipers);
922        }
923}
924
925static void
926zone_drain_wait(uma_zone_t zone, int waitok)
927{
928
929        /*
930         * Set draining to interlock with zone_dtor() so we can release our
931         * locks as we go.  Only dtor() should do a WAITOK call since it
932         * is the only call that knows the structure will still be available
933         * when it wakes up.
934         */
935        ZONE_LOCK(zone);
936        while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
937                if (waitok == M_NOWAIT)
938                        goto out;
939                msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
940        }
941        zone->uz_flags |= UMA_ZFLAG_DRAINING;
942        bucket_cache_drain(zone);
943        ZONE_UNLOCK(zone);
944        /*
945         * The DRAINING flag protects us from being freed while
946         * we're running.  Normally the uma_rwlock would protect us but we
947         * must be able to release and acquire the right lock for each keg.
948         */
949        zone_foreach_keg(zone, &keg_drain);
950        ZONE_LOCK(zone);
951        zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
952        wakeup(zone);
953out:
954        ZONE_UNLOCK(zone);
955}
956
957void
958zone_drain(uma_zone_t zone)
959{
960
961        zone_drain_wait(zone, M_NOWAIT);
962}
963
964/*
965 * Allocate a new slab for a keg.  This does not insert the slab onto a list.
966 *
967 * Arguments:
968 *      wait  Shall we wait?
969 *
970 * Returns:
971 *      The slab that was allocated or NULL if there is no memory and the
972 *      caller specified M_NOWAIT.
973 */
974static uma_slab_t
975keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
976{
977        uma_alloc allocf;
978        uma_slab_t slab;
979        uint8_t *mem;
980        uint8_t flags;
981        int i;
982
983        mtx_assert(&keg->uk_lock, MA_OWNED);
984        slab = NULL;
985        mem = NULL;
986
987#ifdef UMA_DEBUG
988        printf("alloc_slab:  Allocating a new slab for %s\n", keg->uk_name);
989#endif
990        allocf = keg->uk_allocf;
991        KEG_UNLOCK(keg);
992
993        if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
994                slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
995                if (slab == NULL)
996                        goto out;
997        }
998
999        /*
1000         * This reproduces the old vm_zone behavior of zero filling pages the
1001         * first time they are added to a zone.
1002         *
1003         * Malloced items are zeroed in uma_zalloc.
1004         */
1005
1006        if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1007                wait |= M_ZERO;
1008        else
1009                wait &= ~M_ZERO;
1010
1011        if (keg->uk_flags & UMA_ZONE_NODUMP)
1012                wait |= M_NODUMP;
1013
1014        /* zone is passed for legacy reasons. */
1015        mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait);
1016        if (mem == NULL) {
1017                if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1018                        zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
1019                slab = NULL;
1020                goto out;
1021        }
1022
1023        /* Point the slab into the allocated memory */
1024        if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
1025                slab = (uma_slab_t )(mem + keg->uk_pgoff);
1026
1027        if (keg->uk_flags & UMA_ZONE_VTOSLAB)
1028                for (i = 0; i < keg->uk_ppera; i++)
1029                        vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
1030
1031        slab->us_keg = keg;
1032        slab->us_data = mem;
1033        slab->us_freecount = keg->uk_ipers;
1034        slab->us_flags = flags;
1035        BIT_FILL(SLAB_SETSIZE, &slab->us_free);
1036#ifdef INVARIANTS
1037        BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree);
1038#endif
1039
1040        if (keg->uk_init != NULL) {
1041                for (i = 0; i < keg->uk_ipers; i++)
1042                        if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
1043                            keg->uk_size, wait) != 0)
1044                                break;
1045                if (i != keg->uk_ipers) {
1046                        keg_free_slab(keg, slab, i);
1047                        slab = NULL;
1048                        goto out;
1049                }
1050        }
1051out:
1052        KEG_LOCK(keg);
1053
1054        if (slab != NULL) {
1055                if (keg->uk_flags & UMA_ZONE_HASH)
1056                        UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
1057
1058                keg->uk_pages += keg->uk_ppera;
1059                keg->uk_free += keg->uk_ipers;
1060        }
1061
1062        return (slab);
1063}
1064
1065#ifndef __rtems__
1066/*
1067 * This function is intended to be used early on in place of page_alloc() so
1068 * that we may use the boot time page cache to satisfy allocations before
1069 * the VM is ready.
1070 */
1071static void *
1072startup_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
1073{
1074        uma_keg_t keg;
1075        uma_slab_t tmps;
1076        int pages, check_pages;
1077
1078        keg = zone_first_keg(zone);
1079        pages = howmany(bytes, PAGE_SIZE);
1080        check_pages = pages - 1;
1081        KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
1082
1083        /*
1084         * Check our small startup cache to see if it has pages remaining.
1085         */
1086        mtx_lock(&uma_boot_pages_mtx);
1087
1088        /* First check if we have enough room. */
1089        tmps = LIST_FIRST(&uma_boot_pages);
1090        while (tmps != NULL && check_pages-- > 0)
1091                tmps = LIST_NEXT(tmps, us_link);
1092        if (tmps != NULL) {
1093                /*
1094                 * It's ok to lose tmps references.  The last one will
1095                 * have tmps->us_data pointing to the start address of
1096                 * "pages" contiguous pages of memory.
1097                 */
1098                while (pages-- > 0) {
1099                        tmps = LIST_FIRST(&uma_boot_pages);
1100                        LIST_REMOVE(tmps, us_link);
1101                }
1102                mtx_unlock(&uma_boot_pages_mtx);
1103                *pflag = tmps->us_flags;
1104                return (tmps->us_data);
1105        }
1106        mtx_unlock(&uma_boot_pages_mtx);
1107        if (booted < UMA_STARTUP2)
1108                panic("UMA: Increase vm.boot_pages");
1109        /*
1110         * Now that we've booted reset these users to their real allocator.
1111         */
1112#ifdef UMA_MD_SMALL_ALLOC
1113        keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
1114#else
1115        keg->uk_allocf = page_alloc;
1116#endif
1117        return keg->uk_allocf(zone, bytes, pflag, wait);
1118}
1119#endif /* __rtems__ */
1120
1121/*
1122 * Allocates a number of pages from the system
1123 *
1124 * Arguments:
1125 *      bytes  The number of bytes requested
1126 *      wait  Shall we wait?
1127 *
1128 * Returns:
1129 *      A pointer to the alloced memory or possibly
1130 *      NULL if M_NOWAIT is set.
1131 */
1132static void *
1133page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
1134{
1135        void *p;        /* Returned page */
1136
1137#ifndef __rtems__
1138        *pflag = UMA_SLAB_KMEM;
1139        p = (void *) kmem_malloc(kmem_arena, bytes, wait);
1140#else /* __rtems__ */
1141        *pflag = 0;
1142        p = rtems_bsd_page_alloc(bytes, wait);
1143#endif /* __rtems__ */
1144
1145        return (p);
1146}
1147
1148#ifndef __rtems__
1149/*
1150 * Allocates a number of pages from within an object
1151 *
1152 * Arguments:
1153 *      bytes  The number of bytes requested
1154 *      wait   Shall we wait?
1155 *
1156 * Returns:
1157 *      A pointer to the alloced memory or possibly
1158 *      NULL if M_NOWAIT is set.
1159 */
1160static void *
1161noobj_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait)
1162{
1163        TAILQ_HEAD(, vm_page) alloctail;
1164        u_long npages;
1165        vm_offset_t retkva, zkva;
1166        vm_page_t p, p_next;
1167        uma_keg_t keg;
1168
1169        TAILQ_INIT(&alloctail);
1170        keg = zone_first_keg(zone);
1171
1172        npages = howmany(bytes, PAGE_SIZE);
1173        while (npages > 0) {
1174                p = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT |
1175                    VM_ALLOC_WIRED | VM_ALLOC_NOOBJ);
1176                if (p != NULL) {
1177                        /*
1178                         * Since the page does not belong to an object, its
1179                         * listq is unused.
1180                         */
1181                        TAILQ_INSERT_TAIL(&alloctail, p, listq);
1182                        npages--;
1183                        continue;
1184                }
1185                if (wait & M_WAITOK) {
1186                        VM_WAIT;
1187                        continue;
1188                }
1189
1190                /*
1191                 * Page allocation failed, free intermediate pages and
1192                 * exit.
1193                 */
1194                TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
1195                        vm_page_unwire(p, PQ_NONE);
1196                        vm_page_free(p);
1197                }
1198                return (NULL);
1199        }
1200        *flags = UMA_SLAB_PRIV;
1201        zkva = keg->uk_kva +
1202            atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
1203        retkva = zkva;
1204        TAILQ_FOREACH(p, &alloctail, listq) {
1205                pmap_qenter(zkva, &p, 1);
1206                zkva += PAGE_SIZE;
1207        }
1208
1209        return ((void *)retkva);
1210}
1211#endif /* __rtems__ */
1212
1213/*
1214 * Frees a number of pages to the system
1215 *
1216 * Arguments:
1217 *      mem   A pointer to the memory to be freed
1218 *      size  The size of the memory being freed
1219 *      flags The original p->us_flags field
1220 *
1221 * Returns:
1222 *      Nothing
1223 */
1224static void
1225page_free(void *mem, vm_size_t size, uint8_t flags)
1226{
1227#ifndef __rtems__
1228        struct vmem *vmem;
1229
1230        if (flags & UMA_SLAB_KMEM)
1231                vmem = kmem_arena;
1232        else if (flags & UMA_SLAB_KERNEL)
1233                vmem = kernel_arena;
1234        else
1235                panic("UMA: page_free used with invalid flags %x", flags);
1236
1237        kmem_free(vmem, (vm_offset_t)mem, size);
1238#else /* __rtems__ */
1239        if (flags & UMA_SLAB_KERNEL)
1240                free(mem, M_TEMP);
1241        else
1242                rtems_bsd_page_free(mem);
1243#endif /* __rtems__ */
1244}
1245
1246/*
1247 * Zero fill initializer
1248 *
1249 * Arguments/Returns follow uma_init specifications
1250 */
1251static int
1252zero_init(void *mem, int size, int flags)
1253{
1254        bzero(mem, size);
1255        return (0);
1256}
1257
1258/*
1259 * Finish creating a small uma keg.  This calculates ipers, and the keg size.
1260 *
1261 * Arguments
1262 *      keg  The zone we should initialize
1263 *
1264 * Returns
1265 *      Nothing
1266 */
1267static void
1268keg_small_init(uma_keg_t keg)
1269{
1270        u_int rsize;
1271        u_int memused;
1272        u_int wastedspace;
1273        u_int shsize;
1274        u_int slabsize;
1275
1276        if (keg->uk_flags & UMA_ZONE_PCPU) {
1277                u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU;
1278
1279                slabsize = sizeof(struct pcpu);
1280                keg->uk_ppera = howmany(ncpus * sizeof(struct pcpu),
1281                    PAGE_SIZE);
1282        } else {
1283                slabsize = UMA_SLAB_SIZE;
1284                keg->uk_ppera = 1;
1285        }
1286
1287        /*
1288         * Calculate the size of each allocation (rsize) according to
1289         * alignment.  If the requested size is smaller than we have
1290         * allocation bits for we round it up.
1291         */
1292        rsize = keg->uk_size;
1293        if (rsize < slabsize / SLAB_SETSIZE)
1294                rsize = slabsize / SLAB_SETSIZE;
1295        if (rsize & keg->uk_align)
1296                rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1297        keg->uk_rsize = rsize;
1298
1299        KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
1300            keg->uk_rsize < sizeof(struct pcpu),
1301            ("%s: size %u too large", __func__, keg->uk_rsize));
1302
1303        if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1304                shsize = 0;
1305        else
1306                shsize = sizeof(struct uma_slab);
1307
1308        keg->uk_ipers = (slabsize - shsize) / rsize;
1309        KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1310            ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1311
1312        memused = keg->uk_ipers * rsize + shsize;
1313        wastedspace = slabsize - memused;
1314
1315        /*
1316         * We can't do OFFPAGE if we're internal or if we've been
1317         * asked to not go to the VM for buckets.  If we do this we
1318         * may end up going to the VM  for slabs which we do not
1319         * want to do if we're UMA_ZFLAG_CACHEONLY as a result
1320         * of UMA_ZONE_VM, which clearly forbids it.
1321         */
1322        if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1323            (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1324                return;
1325
1326        /*
1327         * See if using an OFFPAGE slab will limit our waste.  Only do
1328         * this if it permits more items per-slab.
1329         *
1330         * XXX We could try growing slabsize to limit max waste as well.
1331         * Historically this was not done because the VM could not
1332         * efficiently handle contiguous allocations.
1333         */
1334        if ((wastedspace >= slabsize / UMA_MAX_WASTE) &&
1335            (keg->uk_ipers < (slabsize / keg->uk_rsize))) {
1336                keg->uk_ipers = slabsize / keg->uk_rsize;
1337                KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1338                    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1339#ifdef UMA_DEBUG
1340                printf("UMA decided we need offpage slab headers for "
1341                    "keg: %s, calculated wastedspace = %d, "
1342                    "maximum wasted space allowed = %d, "
1343                    "calculated ipers = %d, "
1344                    "new wasted space = %d\n", keg->uk_name, wastedspace,
1345                    slabsize / UMA_MAX_WASTE, keg->uk_ipers,
1346                    slabsize - keg->uk_ipers * keg->uk_rsize);
1347#endif
1348                keg->uk_flags |= UMA_ZONE_OFFPAGE;
1349        }
1350
1351        if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1352            (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1353                keg->uk_flags |= UMA_ZONE_HASH;
1354}
1355
1356/*
1357 * Finish creating a large (> UMA_SLAB_SIZE) uma kegs.  Just give in and do
1358 * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
1359 * more complicated.
1360 *
1361 * Arguments
1362 *      keg  The keg we should initialize
1363 *
1364 * Returns
1365 *      Nothing
1366 */
1367static void
1368keg_large_init(uma_keg_t keg)
1369{
1370        u_int shsize;
1371
1372        KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
1373        KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1374            ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
1375        KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1376            ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
1377
1378        keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
1379        keg->uk_ipers = 1;
1380        keg->uk_rsize = keg->uk_size;
1381
1382        /* We can't do OFFPAGE if we're internal, bail out here. */
1383        if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
1384                return;
1385
1386        /* Check whether we have enough space to not do OFFPAGE. */
1387        if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0) {
1388                shsize = sizeof(struct uma_slab);
1389                if (shsize & UMA_ALIGN_PTR)
1390                        shsize = (shsize & ~UMA_ALIGN_PTR) +
1391                            (UMA_ALIGN_PTR + 1);
1392
1393                if ((PAGE_SIZE * keg->uk_ppera) - keg->uk_rsize < shsize)
1394                        keg->uk_flags |= UMA_ZONE_OFFPAGE;
1395        }
1396
1397        if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1398            (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1399                keg->uk_flags |= UMA_ZONE_HASH;
1400}
1401
1402static void
1403keg_cachespread_init(uma_keg_t keg)
1404{
1405        int alignsize;
1406        int trailer;
1407        int pages;
1408        int rsize;
1409
1410        KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1411            ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
1412
1413        alignsize = keg->uk_align + 1;
1414        rsize = keg->uk_size;
1415        /*
1416         * We want one item to start on every align boundary in a page.  To
1417         * do this we will span pages.  We will also extend the item by the
1418         * size of align if it is an even multiple of align.  Otherwise, it
1419         * would fall on the same boundary every time.
1420         */
1421        if (rsize & keg->uk_align)
1422                rsize = (rsize & ~keg->uk_align) + alignsize;
1423        if ((rsize & alignsize) == 0)
1424                rsize += alignsize;
1425        trailer = rsize - keg->uk_size;
1426        pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
1427        pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
1428        keg->uk_rsize = rsize;
1429        keg->uk_ppera = pages;
1430        keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
1431        keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
1432        KASSERT(keg->uk_ipers <= SLAB_SETSIZE,
1433            ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
1434            keg->uk_ipers));
1435}
1436
1437/*
1438 * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
1439 * the keg onto the global keg list.
1440 *
1441 * Arguments/Returns follow uma_ctor specifications
1442 *      udata  Actually uma_kctor_args
1443 */
1444static int
1445keg_ctor(void *mem, int size, void *udata, int flags)
1446{
1447        struct uma_kctor_args *arg = udata;
1448        uma_keg_t keg = mem;
1449        uma_zone_t zone;
1450
1451        bzero(keg, size);
1452        keg->uk_size = arg->size;
1453        keg->uk_init = arg->uminit;
1454        keg->uk_fini = arg->fini;
1455        keg->uk_align = arg->align;
1456        keg->uk_free = 0;
1457        keg->uk_reserve = 0;
1458        keg->uk_pages = 0;
1459        keg->uk_flags = arg->flags;
1460        keg->uk_allocf = page_alloc;
1461        keg->uk_freef = page_free;
1462        keg->uk_slabzone = NULL;
1463
1464        /*
1465         * The master zone is passed to us at keg-creation time.
1466         */
1467        zone = arg->zone;
1468        keg->uk_name = zone->uz_name;
1469
1470        if (arg->flags & UMA_ZONE_VM)
1471                keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1472
1473        if (arg->flags & UMA_ZONE_ZINIT)
1474                keg->uk_init = zero_init;
1475
1476        if (arg->flags & UMA_ZONE_MALLOC)
1477                keg->uk_flags |= UMA_ZONE_VTOSLAB;
1478
1479        if (arg->flags & UMA_ZONE_PCPU)
1480#ifdef SMP
1481                keg->uk_flags |= UMA_ZONE_OFFPAGE;
1482#else
1483                keg->uk_flags &= ~UMA_ZONE_PCPU;
1484#endif
1485
1486        if (keg->uk_flags & UMA_ZONE_CACHESPREAD) {
1487                keg_cachespread_init(keg);
1488        } else {
1489                if (keg->uk_size > (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1490                        keg_large_init(keg);
1491                else
1492                        keg_small_init(keg);
1493        }
1494
1495        if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1496                keg->uk_slabzone = slabzone;
1497
1498        /*
1499         * If we haven't booted yet we need allocations to go through the
1500         * startup cache until the vm is ready.
1501         */
1502        if (keg->uk_ppera == 1) {
1503#ifdef UMA_MD_SMALL_ALLOC
1504                keg->uk_allocf = uma_small_alloc;
1505                keg->uk_freef = uma_small_free;
1506
1507#ifndef __rtems__
1508                if (booted < UMA_STARTUP)
1509                        keg->uk_allocf = startup_alloc;
1510#endif /* __rtems__ */
1511#else
1512#ifndef __rtems__
1513                if (booted < UMA_STARTUP2)
1514                        keg->uk_allocf = startup_alloc;
1515#endif /* __rtems__ */
1516#endif
1517#ifndef __rtems__
1518        } else if (booted < UMA_STARTUP2 &&
1519            (keg->uk_flags & UMA_ZFLAG_INTERNAL))
1520                keg->uk_allocf = startup_alloc;
1521#else /* __rtems__ */
1522        }
1523#endif /* __rtems__ */
1524
1525        /*
1526         * Initialize keg's lock
1527         */
1528        KEG_LOCK_INIT(keg, (arg->flags & UMA_ZONE_MTXCLASS));
1529
1530        /*
1531         * If we're putting the slab header in the actual page we need to
1532         * figure out where in each page it goes.  This calculates a right
1533         * justified offset into the memory on an ALIGN_PTR boundary.
1534         */
1535        if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1536                u_int totsize;
1537
1538                /* Size of the slab struct and free list */
1539                totsize = sizeof(struct uma_slab);
1540
1541                if (totsize & UMA_ALIGN_PTR)
1542                        totsize = (totsize & ~UMA_ALIGN_PTR) +
1543                            (UMA_ALIGN_PTR + 1);
1544                keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize;
1545
1546                /*
1547                 * The only way the following is possible is if with our
1548                 * UMA_ALIGN_PTR adjustments we are now bigger than
1549                 * UMA_SLAB_SIZE.  I haven't checked whether this is
1550                 * mathematically possible for all cases, so we make
1551                 * sure here anyway.
1552                 */
1553                totsize = keg->uk_pgoff + sizeof(struct uma_slab);
1554                if (totsize > PAGE_SIZE * keg->uk_ppera) {
1555                        printf("zone %s ipers %d rsize %d size %d\n",
1556                            zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1557                            keg->uk_size);
1558                        panic("UMA slab won't fit.");
1559                }
1560        }
1561
1562        if (keg->uk_flags & UMA_ZONE_HASH)
1563                hash_alloc(&keg->uk_hash);
1564
1565#ifdef UMA_DEBUG
1566        printf("UMA: %s(%p) size %d(%d) flags %#x ipers %d ppera %d out %d free %d\n",
1567            zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
1568            keg->uk_ipers, keg->uk_ppera,
1569            (keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
1570            keg->uk_free);
1571#endif
1572
1573        LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1574
1575        rw_wlock(&uma_rwlock);
1576        LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1577        rw_wunlock(&uma_rwlock);
1578        return (0);
1579}
1580
1581/*
1582 * Zone header ctor.  This initializes all fields, locks, etc.
1583 *
1584 * Arguments/Returns follow uma_ctor specifications
1585 *      udata  Actually uma_zctor_args
1586 */
1587static int
1588zone_ctor(void *mem, int size, void *udata, int flags)
1589{
1590        struct uma_zctor_args *arg = udata;
1591        uma_zone_t zone = mem;
1592        uma_zone_t z;
1593        uma_keg_t keg;
1594
1595        bzero(zone, size);
1596        zone->uz_name = arg->name;
1597        zone->uz_ctor = arg->ctor;
1598        zone->uz_dtor = arg->dtor;
1599        zone->uz_slab = zone_fetch_slab;
1600        zone->uz_init = NULL;
1601        zone->uz_fini = NULL;
1602        zone->uz_allocs = 0;
1603        zone->uz_frees = 0;
1604        zone->uz_fails = 0;
1605        zone->uz_sleeps = 0;
1606        zone->uz_count = 0;
1607        zone->uz_count_min = 0;
1608        zone->uz_flags = 0;
1609        zone->uz_warning = NULL;
1610        timevalclear(&zone->uz_ratecheck);
1611        keg = arg->keg;
1612
1613        ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
1614
1615        /*
1616         * This is a pure cache zone, no kegs.
1617         */
1618        if (arg->import) {
1619                if (arg->flags & UMA_ZONE_VM)
1620                        arg->flags |= UMA_ZFLAG_CACHEONLY;
1621                zone->uz_flags = arg->flags;
1622                zone->uz_size = arg->size;
1623                zone->uz_import = arg->import;
1624                zone->uz_release = arg->release;
1625                zone->uz_arg = arg->arg;
1626                zone->uz_lockptr = &zone->uz_lock;
1627                rw_wlock(&uma_rwlock);
1628                LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
1629                rw_wunlock(&uma_rwlock);
1630                goto out;
1631        }
1632
1633        /*
1634         * Use the regular zone/keg/slab allocator.
1635         */
1636        zone->uz_import = (uma_import)zone_import;
1637        zone->uz_release = (uma_release)zone_release;
1638        zone->uz_arg = zone;
1639
1640        if (arg->flags & UMA_ZONE_SECONDARY) {
1641                KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1642                zone->uz_init = arg->uminit;
1643                zone->uz_fini = arg->fini;
1644                zone->uz_lockptr = &keg->uk_lock;
1645                zone->uz_flags |= UMA_ZONE_SECONDARY;
1646                rw_wlock(&uma_rwlock);
1647                ZONE_LOCK(zone);
1648                LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1649                        if (LIST_NEXT(z, uz_link) == NULL) {
1650                                LIST_INSERT_AFTER(z, zone, uz_link);
1651                                break;
1652                        }
1653                }
1654                ZONE_UNLOCK(zone);
1655                rw_wunlock(&uma_rwlock);
1656        } else if (keg == NULL) {
1657                if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1658                    arg->align, arg->flags)) == NULL)
1659                        return (ENOMEM);
1660        } else {
1661                struct uma_kctor_args karg;
1662                int error;
1663
1664                /* We should only be here from uma_startup() */
1665                karg.size = arg->size;
1666                karg.uminit = arg->uminit;
1667                karg.fini = arg->fini;
1668                karg.align = arg->align;
1669                karg.flags = arg->flags;
1670                karg.zone = zone;
1671                error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1672                    flags);
1673                if (error)
1674                        return (error);
1675        }
1676
1677        /*
1678         * Link in the first keg.
1679         */
1680        zone->uz_klink.kl_keg = keg;
1681        LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
1682        zone->uz_lockptr = &keg->uk_lock;
1683        zone->uz_size = keg->uk_size;
1684        zone->uz_flags |= (keg->uk_flags &
1685            (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
1686
1687        /*
1688         * Some internal zones don't have room allocated for the per cpu
1689         * caches.  If we're internal, bail out here.
1690         */
1691        if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1692                KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
1693                    ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1694                return (0);
1695        }
1696
1697out:
1698        if ((arg->flags & UMA_ZONE_MAXBUCKET) == 0)
1699                zone->uz_count = bucket_select(zone->uz_size);
1700        else
1701                zone->uz_count = BUCKET_MAX;
1702        zone->uz_count_min = zone->uz_count;
1703
1704        return (0);
1705}
1706
1707/*
1708 * Keg header dtor.  This frees all data, destroys locks, frees the hash
1709 * table and removes the keg from the global list.
1710 *
1711 * Arguments/Returns follow uma_dtor specifications
1712 *      udata  unused
1713 */
1714static void
1715keg_dtor(void *arg, int size, void *udata)
1716{
1717        uma_keg_t keg;
1718
1719        keg = (uma_keg_t)arg;
1720        KEG_LOCK(keg);
1721        if (keg->uk_free != 0) {
1722                printf("Freed UMA keg (%s) was not empty (%d items). "
1723                    " Lost %d pages of memory.\n",
1724                    keg->uk_name ? keg->uk_name : "",
1725                    keg->uk_free, keg->uk_pages);
1726        }
1727        KEG_UNLOCK(keg);
1728
1729        hash_free(&keg->uk_hash);
1730
1731        KEG_LOCK_FINI(keg);
1732}
1733
1734/*
1735 * Zone header dtor.
1736 *
1737 * Arguments/Returns follow uma_dtor specifications
1738 *      udata  unused
1739 */
1740static void
1741zone_dtor(void *arg, int size, void *udata)
1742{
1743        uma_klink_t klink;
1744        uma_zone_t zone;
1745        uma_keg_t keg;
1746
1747        zone = (uma_zone_t)arg;
1748        keg = zone_first_keg(zone);
1749
1750        if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
1751                cache_drain(zone);
1752
1753        rw_wlock(&uma_rwlock);
1754        LIST_REMOVE(zone, uz_link);
1755        rw_wunlock(&uma_rwlock);
1756        /*
1757         * XXX there are some races here where
1758         * the zone can be drained but zone lock
1759         * released and then refilled before we
1760         * remove it... we dont care for now
1761         */
1762        zone_drain_wait(zone, M_WAITOK);
1763        /*
1764         * Unlink all of our kegs.
1765         */
1766        while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
1767                klink->kl_keg = NULL;
1768                LIST_REMOVE(klink, kl_link);
1769                if (klink == &zone->uz_klink)
1770                        continue;
1771                free(klink, M_TEMP);
1772        }
1773        /*
1774         * We only destroy kegs from non secondary zones.
1775         */
1776        if (keg != NULL && (zone->uz_flags & UMA_ZONE_SECONDARY) == 0)  {
1777                rw_wlock(&uma_rwlock);
1778                LIST_REMOVE(keg, uk_link);
1779                rw_wunlock(&uma_rwlock);
1780                zone_free_item(kegs, keg, NULL, SKIP_NONE);
1781        }
1782        ZONE_LOCK_FINI(zone);
1783}
1784
1785/*
1786 * Traverses every zone in the system and calls a callback
1787 *
1788 * Arguments:
1789 *      zfunc  A pointer to a function which accepts a zone
1790 *              as an argument.
1791 *
1792 * Returns:
1793 *      Nothing
1794 */
1795static void
1796zone_foreach(void (*zfunc)(uma_zone_t))
1797{
1798        uma_keg_t keg;
1799        uma_zone_t zone;
1800
1801        rw_rlock(&uma_rwlock);
1802        LIST_FOREACH(keg, &uma_kegs, uk_link) {
1803                LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1804                        zfunc(zone);
1805        }
1806        rw_runlock(&uma_rwlock);
1807}
1808
1809/* Public functions */
1810/* See uma.h */
1811void
1812uma_startup(void *bootmem, int boot_pages)
1813{
1814        struct uma_zctor_args args;
1815#ifndef __rtems__
1816        uma_slab_t slab;
1817        int i;
1818#endif /* __rtems__ */
1819
1820#ifdef UMA_DEBUG
1821        printf("Creating uma keg headers zone and keg.\n");
1822#endif
1823        rw_init(&uma_rwlock, "UMA lock");
1824
1825        /* "manually" create the initial zone */
1826        memset(&args, 0, sizeof(args));
1827        args.name = "UMA Kegs";
1828        args.size = sizeof(struct uma_keg);
1829        args.ctor = keg_ctor;
1830        args.dtor = keg_dtor;
1831        args.uminit = zero_init;
1832        args.fini = NULL;
1833        args.keg = &masterkeg;
1834        args.align = 32 - 1;
1835        args.flags = UMA_ZFLAG_INTERNAL;
1836        /* The initial zone has no Per cpu queues so it's smaller */
1837        zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1838
1839#ifndef __rtems__
1840#ifdef UMA_DEBUG
1841        printf("Filling boot free list.\n");
1842#endif
1843        for (i = 0; i < boot_pages; i++) {
1844                slab = (uma_slab_t)((uint8_t *)bootmem + (i * UMA_SLAB_SIZE));
1845                slab->us_data = (uint8_t *)slab;
1846                slab->us_flags = UMA_SLAB_BOOT;
1847                LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1848        }
1849        mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
1850#endif /* __rtems__ */
1851
1852#ifdef UMA_DEBUG
1853        printf("Creating uma zone headers zone and keg.\n");
1854#endif
1855        args.name = "UMA Zones";
1856        args.size = sizeof(struct uma_zone) +
1857            (sizeof(struct uma_cache) * (mp_maxid + 1));
1858        args.ctor = zone_ctor;
1859        args.dtor = zone_dtor;
1860        args.uminit = zero_init;
1861        args.fini = NULL;
1862        args.keg = NULL;
1863        args.align = 32 - 1;
1864        args.flags = UMA_ZFLAG_INTERNAL;
1865        /* The initial zone has no Per cpu queues so it's smaller */
1866        zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1867
1868#ifdef UMA_DEBUG
1869        printf("Creating slab and hash zones.\n");
1870#endif
1871
1872        /* Now make a zone for slab headers */
1873        slabzone = uma_zcreate("UMA Slabs",
1874                                sizeof(struct uma_slab),
1875                                NULL, NULL, NULL, NULL,
1876                                UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1877
1878        hashzone = uma_zcreate("UMA Hash",
1879            sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1880            NULL, NULL, NULL, NULL,
1881            UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1882
1883        bucket_init();
1884
1885#ifndef __rtems__
1886        booted = UMA_STARTUP;
1887#endif /* __rtems__ */
1888
1889#ifdef UMA_DEBUG
1890        printf("UMA startup complete.\n");
1891#endif
1892}
1893#ifdef __rtems__
1894static void
1895rtems_bsd_uma_startup(void *unused)
1896{
1897        (void) unused;
1898
1899        sx_init(&uma_drain_lock, "umadrain");
1900        uma_startup(NULL, 0);
1901}
1902
1903SYSINIT(rtems_bsd_uma_startup, SI_SUB_VM, SI_ORDER_SECOND,
1904    rtems_bsd_uma_startup, NULL);
1905#endif /* __rtems__ */
1906
1907#ifndef __rtems__
1908/* see uma.h */
1909void
1910uma_startup2(void)
1911{
1912        booted = UMA_STARTUP2;
1913        bucket_enable();
1914        sx_init(&uma_drain_lock, "umadrain");
1915#ifdef UMA_DEBUG
1916        printf("UMA startup2 complete.\n");
1917#endif
1918}
1919#endif /* __rtems__ */
1920
1921/*
1922 * Initialize our callout handle
1923 *
1924 */
1925
1926static void
1927uma_startup3(void)
1928{
1929#ifdef UMA_DEBUG
1930        printf("Starting callout.\n");
1931#endif
1932        callout_init(&uma_callout, 1);
1933        callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1934#ifdef UMA_DEBUG
1935        printf("UMA startup3 complete.\n");
1936#endif
1937}
1938
1939static uma_keg_t
1940uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1941                int align, uint32_t flags)
1942{
1943        struct uma_kctor_args args;
1944
1945        args.size = size;
1946        args.uminit = uminit;
1947        args.fini = fini;
1948        args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
1949        args.flags = flags;
1950        args.zone = zone;
1951        return (zone_alloc_item(kegs, &args, M_WAITOK));
1952}
1953
1954/* See uma.h */
1955void
1956uma_set_align(int align)
1957{
1958
1959        if (align != UMA_ALIGN_CACHE)
1960                uma_align_cache = align;
1961}
1962
1963/* See uma.h */
1964uma_zone_t
1965uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1966                uma_init uminit, uma_fini fini, int align, uint32_t flags)
1967
1968{
1969        struct uma_zctor_args args;
1970        uma_zone_t res;
1971#ifndef __rtems__
1972        bool locked;
1973#endif /* __rtems__ */
1974
1975        /* This stuff is essential for the zone ctor */
1976        memset(&args, 0, sizeof(args));
1977        args.name = name;
1978        args.size = size;
1979        args.ctor = ctor;
1980        args.dtor = dtor;
1981        args.uminit = uminit;
1982        args.fini = fini;
1983#ifdef  INVARIANTS
1984        /*
1985         * If a zone is being created with an empty constructor and
1986         * destructor, pass UMA constructor/destructor which checks for
1987         * memory use after free.
1988         */
1989        if ((!(flags & (UMA_ZONE_ZINIT | UMA_ZONE_NOFREE))) &&
1990            ctor == NULL && dtor == NULL && uminit == NULL && fini == NULL) {
1991                args.ctor = trash_ctor;
1992                args.dtor = trash_dtor;
1993                args.uminit = trash_init;
1994                args.fini = trash_fini;
1995        }
1996#endif
1997        args.align = align;
1998        args.flags = flags;
1999        args.keg = NULL;
2000
2001#ifndef __rtems__
2002        if (booted < UMA_STARTUP2) {
2003                locked = false;
2004        } else {
2005#endif /* __rtems__ */
2006                sx_slock(&uma_drain_lock);
2007#ifndef __rtems__
2008                locked = true;
2009        }
2010#endif /* __rtems__ */
2011        res = zone_alloc_item(zones, &args, M_WAITOK);
2012#ifndef __rtems__
2013        if (locked)
2014#endif /* __rtems__ */
2015                sx_sunlock(&uma_drain_lock);
2016        return (res);
2017}
2018
2019/* See uma.h */
2020uma_zone_t
2021uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
2022                    uma_init zinit, uma_fini zfini, uma_zone_t master)
2023{
2024        struct uma_zctor_args args;
2025        uma_keg_t keg;
2026        uma_zone_t res;
2027#ifndef __rtems__
2028        bool locked;
2029#endif /* __rtems__ */
2030
2031        keg = zone_first_keg(master);
2032        memset(&args, 0, sizeof(args));
2033        args.name = name;
2034        args.size = keg->uk_size;
2035        args.ctor = ctor;
2036        args.dtor = dtor;
2037        args.uminit = zinit;
2038        args.fini = zfini;
2039        args.align = keg->uk_align;
2040        args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
2041        args.keg = keg;
2042
2043#ifndef __rtems__
2044        if (booted < UMA_STARTUP2) {
2045                locked = false;
2046        } else {
2047#endif /* __rtems__ */
2048                sx_slock(&uma_drain_lock);
2049#ifndef __rtems__
2050                locked = true;
2051        }
2052#endif /* __rtems__ */
2053        /* XXX Attaches only one keg of potentially many. */
2054        res = zone_alloc_item(zones, &args, M_WAITOK);
2055#ifndef __rtems__
2056        if (locked)
2057#endif /* __rtems__ */
2058                sx_sunlock(&uma_drain_lock);
2059        return (res);
2060}
2061
2062/* See uma.h */
2063uma_zone_t
2064uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
2065                    uma_init zinit, uma_fini zfini, uma_import zimport,
2066                    uma_release zrelease, void *arg, int flags)
2067{
2068        struct uma_zctor_args args;
2069
2070        memset(&args, 0, sizeof(args));
2071        args.name = name;
2072        args.size = size;
2073        args.ctor = ctor;
2074        args.dtor = dtor;
2075        args.uminit = zinit;
2076        args.fini = zfini;
2077        args.import = zimport;
2078        args.release = zrelease;
2079        args.arg = arg;
2080        args.align = 0;
2081        args.flags = flags;
2082
2083        return (zone_alloc_item(zones, &args, M_WAITOK));
2084}
2085
2086#ifndef __rtems__
2087static void
2088zone_lock_pair(uma_zone_t a, uma_zone_t b)
2089{
2090        if (a < b) {
2091                ZONE_LOCK(a);
2092                mtx_lock_flags(b->uz_lockptr, MTX_DUPOK);
2093        } else {
2094                ZONE_LOCK(b);
2095                mtx_lock_flags(a->uz_lockptr, MTX_DUPOK);
2096        }
2097}
2098
2099static void
2100zone_unlock_pair(uma_zone_t a, uma_zone_t b)
2101{
2102
2103        ZONE_UNLOCK(a);
2104        ZONE_UNLOCK(b);
2105}
2106
2107int
2108uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
2109{
2110        uma_klink_t klink;
2111        uma_klink_t kl;
2112        int error;
2113
2114        error = 0;
2115        klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK | M_ZERO);
2116
2117        zone_lock_pair(zone, master);
2118        /*
2119         * zone must use vtoslab() to resolve objects and must already be
2120         * a secondary.
2121         */
2122        if ((zone->uz_flags & (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY))
2123            != (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY)) {
2124                error = EINVAL;
2125                goto out;
2126        }
2127        /*
2128         * The new master must also use vtoslab().
2129         */
2130        if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
2131                error = EINVAL;
2132                goto out;
2133        }
2134
2135        /*
2136         * The underlying object must be the same size.  rsize
2137         * may be different.
2138         */
2139        if (master->uz_size != zone->uz_size) {
2140                error = E2BIG;
2141                goto out;
2142        }
2143        /*
2144         * Put it at the end of the list.
2145         */
2146        klink->kl_keg = zone_first_keg(master);
2147        LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
2148                if (LIST_NEXT(kl, kl_link) == NULL) {
2149                        LIST_INSERT_AFTER(kl, klink, kl_link);
2150                        break;
2151                }
2152        }
2153        klink = NULL;
2154        zone->uz_flags |= UMA_ZFLAG_MULTI;
2155        zone->uz_slab = zone_fetch_slab_multi;
2156
2157out:
2158        zone_unlock_pair(zone, master);
2159        if (klink != NULL)
2160                free(klink, M_TEMP);
2161
2162        return (error);
2163}
2164#endif /* __rtems__ */
2165
2166
2167/* See uma.h */
2168void
2169uma_zdestroy(uma_zone_t zone)
2170{
2171
2172        sx_slock(&uma_drain_lock);
2173        zone_free_item(zones, zone, NULL, SKIP_NONE);
2174        sx_sunlock(&uma_drain_lock);
2175}
2176
2177/* See uma.h */
2178void *
2179uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
2180{
2181        void *item;
2182        uma_cache_t cache;
2183        uma_bucket_t bucket;
2184        int lockfail;
2185        int cpu;
2186
2187        /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
2188        random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
2189
2190        /* This is the fast path allocation */
2191#ifdef UMA_DEBUG_ALLOC_1
2192        printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
2193#endif
2194        CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
2195            zone->uz_name, flags);
2196
2197        if (flags & M_WAITOK) {
2198                WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
2199                    "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
2200        }
2201        KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
2202            ("uma_zalloc_arg: called with spinlock or critical section held"));
2203
2204#ifdef DEBUG_MEMGUARD
2205        if (memguard_cmp_zone(zone)) {
2206                item = memguard_alloc(zone->uz_size, flags);
2207                if (item != NULL) {
2208                        if (zone->uz_init != NULL &&
2209                            zone->uz_init(item, zone->uz_size, flags) != 0)
2210                                return (NULL);
2211                        if (zone->uz_ctor != NULL &&
2212                            zone->uz_ctor(item, zone->uz_size, udata,
2213                            flags) != 0) {
2214                                zone->uz_fini(item, zone->uz_size);
2215                                return (NULL);
2216                        }
2217                        return (item);
2218                }
2219                /* This is unfortunate but should not be fatal. */
2220        }
2221#endif
2222        /*
2223         * If possible, allocate from the per-CPU cache.  There are two
2224         * requirements for safe access to the per-CPU cache: (1) the thread
2225         * accessing the cache must not be preempted or yield during access,
2226         * and (2) the thread must not migrate CPUs without switching which
2227         * cache it accesses.  We rely on a critical section to prevent
2228         * preemption and migration.  We release the critical section in
2229         * order to acquire the zone mutex if we are unable to allocate from
2230         * the current cache; when we re-acquire the critical section, we
2231         * must detect and handle migration if it has occurred.
2232         */
2233        critical_enter();
2234        cpu = curcpu;
2235        cache = &zone->uz_cpu[cpu];
2236
2237zalloc_start:
2238        bucket = cache->uc_allocbucket;
2239        if (bucket != NULL && bucket->ub_cnt > 0) {
2240                bucket->ub_cnt--;
2241                item = bucket->ub_bucket[bucket->ub_cnt];
2242#ifdef INVARIANTS
2243                bucket->ub_bucket[bucket->ub_cnt] = NULL;
2244#endif
2245                KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
2246                cache->uc_allocs++;
2247                critical_exit();
2248                if (zone->uz_ctor != NULL &&
2249                    zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2250                        atomic_add_long(&zone->uz_fails, 1);
2251                        zone_free_item(zone, item, udata, SKIP_DTOR);
2252                        return (NULL);
2253                }
2254#ifdef INVARIANTS
2255                uma_dbg_alloc(zone, NULL, item);
2256#endif
2257                if (flags & M_ZERO)
2258                        uma_zero_item(item, zone);
2259                return (item);
2260        }
2261
2262        /*
2263         * We have run out of items in our alloc bucket.
2264         * See if we can switch with our free bucket.
2265         */
2266        bucket = cache->uc_freebucket;
2267        if (bucket != NULL && bucket->ub_cnt > 0) {
2268#ifdef UMA_DEBUG_ALLOC
2269                printf("uma_zalloc: Swapping empty with alloc.\n");
2270#endif
2271                cache->uc_freebucket = cache->uc_allocbucket;
2272                cache->uc_allocbucket = bucket;
2273                goto zalloc_start;
2274        }
2275
2276        /*
2277         * Discard any empty allocation bucket while we hold no locks.
2278         */
2279        bucket = cache->uc_allocbucket;
2280        cache->uc_allocbucket = NULL;
2281        critical_exit();
2282        if (bucket != NULL)
2283                bucket_free(zone, bucket, udata);
2284
2285        /* Short-circuit for zones without buckets and low memory. */
2286        if (zone->uz_count == 0 || bucketdisable)
2287                goto zalloc_item;
2288
2289        /*
2290         * Attempt to retrieve the item from the per-CPU cache has failed, so
2291         * we must go back to the zone.  This requires the zone lock, so we
2292         * must drop the critical section, then re-acquire it when we go back
2293         * to the cache.  Since the critical section is released, we may be
2294         * preempted or migrate.  As such, make sure not to maintain any
2295         * thread-local state specific to the cache from prior to releasing
2296         * the critical section.
2297         */
2298        lockfail = 0;
2299        if (ZONE_TRYLOCK(zone) == 0) {
2300                /* Record contention to size the buckets. */
2301                ZONE_LOCK(zone);
2302                lockfail = 1;
2303        }
2304        critical_enter();
2305        cpu = curcpu;
2306        cache = &zone->uz_cpu[cpu];
2307
2308        /*
2309         * Since we have locked the zone we may as well send back our stats.
2310         */
2311        atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
2312        atomic_add_long(&zone->uz_frees, cache->uc_frees);
2313        cache->uc_allocs = 0;
2314        cache->uc_frees = 0;
2315
2316        /* See if we lost the race to fill the cache. */
2317        if (cache->uc_allocbucket != NULL) {
2318                ZONE_UNLOCK(zone);
2319                goto zalloc_start;
2320        }
2321
2322        /*
2323         * Check the zone's cache of buckets.
2324         */
2325        if ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
2326                KASSERT(bucket->ub_cnt != 0,
2327                    ("uma_zalloc_arg: Returning an empty bucket."));
2328
2329                LIST_REMOVE(bucket, ub_link);
2330                cache->uc_allocbucket = bucket;
2331                ZONE_UNLOCK(zone);
2332                goto zalloc_start;
2333        }
2334        /* We are no longer associated with this CPU. */
2335        critical_exit();
2336
2337        /*
2338         * We bump the uz count when the cache size is insufficient to
2339         * handle the working set.
2340         */
2341        if (lockfail && zone->uz_count < BUCKET_MAX)
2342                zone->uz_count++;
2343        ZONE_UNLOCK(zone);
2344
2345        /*
2346         * Now lets just fill a bucket and put it on the free list.  If that
2347         * works we'll restart the allocation from the beginning and it
2348         * will use the just filled bucket.
2349         */
2350        bucket = zone_alloc_bucket(zone, udata, flags);
2351        if (bucket != NULL) {
2352                ZONE_LOCK(zone);
2353                critical_enter();
2354                cpu = curcpu;
2355                cache = &zone->uz_cpu[cpu];
2356                /*
2357                 * See if we lost the race or were migrated.  Cache the
2358                 * initialized bucket to make this less likely or claim
2359                 * the memory directly.
2360                 */
2361                if (cache->uc_allocbucket == NULL)
2362                        cache->uc_allocbucket = bucket;
2363                else
2364                        LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
2365                ZONE_UNLOCK(zone);
2366                goto zalloc_start;
2367        }
2368
2369        /*
2370         * We may not be able to get a bucket so return an actual item.
2371         */
2372#ifdef UMA_DEBUG
2373        printf("uma_zalloc_arg: Bucketzone returned NULL\n");
2374#endif
2375
2376zalloc_item:
2377        item = zone_alloc_item(zone, udata, flags);
2378
2379        return (item);
2380}
2381
2382static uma_slab_t
2383keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
2384{
2385        uma_slab_t slab;
2386        int reserve;
2387
2388        mtx_assert(&keg->uk_lock, MA_OWNED);
2389        slab = NULL;
2390        reserve = 0;
2391        if ((flags & M_USE_RESERVE) == 0)
2392                reserve = keg->uk_reserve;
2393
2394        for (;;) {
2395                /*
2396                 * Find a slab with some space.  Prefer slabs that are partially
2397                 * used over those that are totally full.  This helps to reduce
2398                 * fragmentation.
2399                 */
2400                if (keg->uk_free > reserve) {
2401                        if (!LIST_EMPTY(&keg->uk_part_slab)) {
2402                                slab = LIST_FIRST(&keg->uk_part_slab);
2403                        } else {
2404                                slab = LIST_FIRST(&keg->uk_free_slab);
2405                                LIST_REMOVE(slab, us_link);
2406                                LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
2407                                    us_link);
2408                        }
2409                        MPASS(slab->us_keg == keg);
2410                        return (slab);
2411                }
2412
2413                /*
2414                 * M_NOVM means don't ask at all!
2415                 */
2416                if (flags & M_NOVM)
2417                        break;
2418
2419                if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
2420                        keg->uk_flags |= UMA_ZFLAG_FULL;
2421                        /*
2422                         * If this is not a multi-zone, set the FULL bit.
2423                         * Otherwise slab_multi() takes care of it.
2424                         */
2425                        if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) {
2426                                zone->uz_flags |= UMA_ZFLAG_FULL;
2427                                zone_log_warning(zone);
2428                                zone_maxaction(zone);
2429                        }
2430                        if (flags & M_NOWAIT)
2431                                break;
2432                        zone->uz_sleeps++;
2433                        msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
2434                        continue;
2435                }
2436                slab = keg_alloc_slab(keg, zone, flags);
2437                /*
2438                 * If we got a slab here it's safe to mark it partially used
2439                 * and return.  We assume that the caller is going to remove
2440                 * at least one item.
2441                 */
2442                if (slab) {
2443                        MPASS(slab->us_keg == keg);
2444                        LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2445                        return (slab);
2446                }
2447                /*
2448                 * We might not have been able to get a slab but another cpu
2449                 * could have while we were unlocked.  Check again before we
2450                 * fail.
2451                 */
2452                flags |= M_NOVM;
2453        }
2454        return (slab);
2455}
2456
2457static uma_slab_t
2458zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
2459{
2460        uma_slab_t slab;
2461
2462        if (keg == NULL) {
2463                keg = zone_first_keg(zone);
2464                KEG_LOCK(keg);
2465        }
2466
2467        for (;;) {
2468                slab = keg_fetch_slab(keg, zone, flags);
2469                if (slab)
2470                        return (slab);
2471                if (flags & (M_NOWAIT | M_NOVM))
2472                        break;
2473        }
2474        KEG_UNLOCK(keg);
2475        return (NULL);
2476}
2477
2478#ifndef __rtems__
2479/*
2480 * uma_zone_fetch_slab_multi:  Fetches a slab from one available keg.  Returns
2481 * with the keg locked.  On NULL no lock is held.
2482 *
2483 * The last pointer is used to seed the search.  It is not required.
2484 */
2485static uma_slab_t
2486zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
2487{
2488        uma_klink_t klink;
2489        uma_slab_t slab;
2490        uma_keg_t keg;
2491        int flags;
2492        int empty;
2493        int full;
2494
2495        /*
2496         * Don't wait on the first pass.  This will skip limit tests
2497         * as well.  We don't want to block if we can find a provider
2498         * without blocking.
2499         */
2500        flags = (rflags & ~M_WAITOK) | M_NOWAIT;
2501        /*
2502         * Use the last slab allocated as a hint for where to start
2503         * the search.
2504         */
2505        if (last != NULL) {
2506                slab = keg_fetch_slab(last, zone, flags);
2507                if (slab)
2508                        return (slab);
2509                KEG_UNLOCK(last);
2510        }
2511        /*
2512         * Loop until we have a slab incase of transient failures
2513         * while M_WAITOK is specified.  I'm not sure this is 100%
2514         * required but we've done it for so long now.
2515         */
2516        for (;;) {
2517                empty = 0;
2518                full = 0;
2519                /*
2520                 * Search the available kegs for slabs.  Be careful to hold the
2521                 * correct lock while calling into the keg layer.
2522                 */
2523                LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
2524                        keg = klink->kl_keg;
2525                        KEG_LOCK(keg);
2526                        if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
2527                                slab = keg_fetch_slab(keg, zone, flags);
2528                                if (slab)
2529                                        return (slab);
2530                        }
2531                        if (keg->uk_flags & UMA_ZFLAG_FULL)
2532                                full++;
2533                        else
2534                                empty++;
2535                        KEG_UNLOCK(keg);
2536                }
2537                if (rflags & (M_NOWAIT | M_NOVM))
2538                        break;
2539                flags = rflags;
2540                /*
2541                 * All kegs are full.  XXX We can't atomically check all kegs
2542                 * and sleep so just sleep for a short period and retry.
2543                 */
2544                if (full && !empty) {
2545                        ZONE_LOCK(zone);
2546                        zone->uz_flags |= UMA_ZFLAG_FULL;
2547                        zone->uz_sleeps++;
2548                        zone_log_warning(zone);
2549                        zone_maxaction(zone);
2550                        msleep(zone, zone->uz_lockptr, PVM,
2551                            "zonelimit", hz/100);
2552                        zone->uz_flags &= ~UMA_ZFLAG_FULL;
2553                        ZONE_UNLOCK(zone);
2554                        continue;
2555                }
2556        }
2557        return (NULL);
2558}
2559#endif /* __rtems__ */
2560
2561static void *
2562slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
2563{
2564        void *item;
2565        uint8_t freei;
2566
2567        MPASS(keg == slab->us_keg);
2568        mtx_assert(&keg->uk_lock, MA_OWNED);
2569
2570        freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1;
2571        BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free);
2572        item = slab->us_data + (keg->uk_rsize * freei);
2573        slab->us_freecount--;
2574        keg->uk_free--;
2575
2576        /* Move this slab to the full list */
2577        if (slab->us_freecount == 0) {
2578                LIST_REMOVE(slab, us_link);
2579                LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2580        }
2581
2582        return (item);
2583}
2584
2585static int
2586zone_import(uma_zone_t zone, void **bucket, int max, int flags)
2587{
2588        uma_slab_t slab;
2589        uma_keg_t keg;
2590        int i;
2591
2592        slab = NULL;
2593        keg = NULL;
2594        /* Try to keep the buckets totally full */
2595        for (i = 0; i < max; ) {
2596                if ((slab = zone->uz_slab(zone, keg, flags)) == NULL)
2597                        break;
2598                keg = slab->us_keg;
2599                while (slab->us_freecount && i < max) {
2600                        bucket[i++] = slab_alloc_item(keg, slab);
2601                        if (keg->uk_free <= keg->uk_reserve)
2602                                break;
2603                }
2604                /* Don't grab more than one slab at a time. */
2605                flags &= ~M_WAITOK;
2606                flags |= M_NOWAIT;
2607        }
2608        if (slab != NULL)
2609                KEG_UNLOCK(keg);
2610
2611        return i;
2612}
2613
2614static uma_bucket_t
2615zone_alloc_bucket(uma_zone_t zone, void *udata, int flags)
2616{
2617        uma_bucket_t bucket;
2618        int max;
2619
2620        /* Don't wait for buckets, preserve caller's NOVM setting. */
2621        bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
2622        if (bucket == NULL)
2623                return (NULL);
2624
2625        max = MIN(bucket->ub_entries, zone->uz_count);
2626        bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
2627            max, flags);
2628
2629        /*
2630         * Initialize the memory if necessary.
2631         */
2632        if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
2633                int i;
2634
2635                for (i = 0; i < bucket->ub_cnt; i++)
2636                        if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
2637                            flags) != 0)
2638                                break;
2639                /*
2640                 * If we couldn't initialize the whole bucket, put the
2641                 * rest back onto the freelist.
2642                 */
2643                if (i != bucket->ub_cnt) {
2644                        zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
2645                            bucket->ub_cnt - i);
2646#ifdef INVARIANTS
2647                        bzero(&bucket->ub_bucket[i],
2648                            sizeof(void *) * (bucket->ub_cnt - i));
2649#endif
2650                        bucket->ub_cnt = i;
2651                }
2652        }
2653
2654        if (bucket->ub_cnt == 0) {
2655                bucket_free(zone, bucket, udata);
2656                atomic_add_long(&zone->uz_fails, 1);
2657                return (NULL);
2658        }
2659
2660        return (bucket);
2661}
2662
2663/*
2664 * Allocates a single item from a zone.
2665 *
2666 * Arguments
2667 *      zone   The zone to alloc for.
2668 *      udata  The data to be passed to the constructor.
2669 *      flags  M_WAITOK, M_NOWAIT, M_ZERO.
2670 *
2671 * Returns
2672 *      NULL if there is no memory and M_NOWAIT is set
2673 *      An item if successful
2674 */
2675
2676static void *
2677zone_alloc_item(uma_zone_t zone, void *udata, int flags)
2678{
2679        void *item;
2680
2681        item = NULL;
2682
2683#ifdef UMA_DEBUG_ALLOC
2684        printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2685#endif
2686        if (zone->uz_import(zone->uz_arg, &item, 1, flags) != 1)
2687                goto fail;
2688        atomic_add_long(&zone->uz_allocs, 1);
2689
2690        /*
2691         * We have to call both the zone's init (not the keg's init)
2692         * and the zone's ctor.  This is because the item is going from
2693         * a keg slab directly to the user, and the user is expecting it
2694         * to be both zone-init'd as well as zone-ctor'd.
2695         */
2696        if (zone->uz_init != NULL) {
2697                if (zone->uz_init(item, zone->uz_size, flags) != 0) {
2698                        zone_free_item(zone, item, udata, SKIP_FINI);
2699                        goto fail;
2700                }
2701        }
2702        if (zone->uz_ctor != NULL) {
2703                if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2704                        zone_free_item(zone, item, udata, SKIP_DTOR);
2705                        goto fail;
2706                }
2707        }
2708#ifdef INVARIANTS
2709        uma_dbg_alloc(zone, NULL, item);
2710#endif
2711        if (flags & M_ZERO)
2712                uma_zero_item(item, zone);
2713
2714        return (item);
2715
2716fail:
2717        atomic_add_long(&zone->uz_fails, 1);
2718        return (NULL);
2719}
2720
2721/* See uma.h */
2722void
2723uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
2724{
2725        uma_cache_t cache;
2726        uma_bucket_t bucket;
2727        int lockfail;
2728        int cpu;
2729
2730        /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
2731        random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
2732
2733#ifdef UMA_DEBUG_ALLOC_1
2734        printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2735#endif
2736        CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2737            zone->uz_name);
2738
2739        KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
2740            ("uma_zfree_arg: called with spinlock or critical section held"));
2741
2742        /* uma_zfree(..., NULL) does nothing, to match free(9). */
2743        if (item == NULL)
2744                return;
2745#ifdef DEBUG_MEMGUARD
2746        if (is_memguard_addr(item)) {
2747                if (zone->uz_dtor != NULL)
2748                        zone->uz_dtor(item, zone->uz_size, udata);
2749                if (zone->uz_fini != NULL)
2750                        zone->uz_fini(item, zone->uz_size);
2751                memguard_free(item);
2752                return;
2753        }
2754#endif
2755#ifdef INVARIANTS
2756        if (zone->uz_flags & UMA_ZONE_MALLOC)
2757                uma_dbg_free(zone, udata, item);
2758        else
2759                uma_dbg_free(zone, NULL, item);
2760#endif
2761        if (zone->uz_dtor != NULL)
2762                zone->uz_dtor(item, zone->uz_size, udata);
2763
2764        /*
2765         * The race here is acceptable.  If we miss it we'll just have to wait
2766         * a little longer for the limits to be reset.
2767         */
2768        if (zone->uz_flags & UMA_ZFLAG_FULL)
2769                goto zfree_item;
2770
2771        /*
2772         * If possible, free to the per-CPU cache.  There are two
2773         * requirements for safe access to the per-CPU cache: (1) the thread
2774         * accessing the cache must not be preempted or yield during access,
2775         * and (2) the thread must not migrate CPUs without switching which
2776         * cache it accesses.  We rely on a critical section to prevent
2777         * preemption and migration.  We release the critical section in
2778         * order to acquire the zone mutex if we are unable to free to the
2779         * current cache; when we re-acquire the critical section, we must
2780         * detect and handle migration if it has occurred.
2781         */
2782zfree_restart:
2783        critical_enter();
2784        cpu = curcpu;
2785        cache = &zone->uz_cpu[cpu];
2786
2787zfree_start:
2788        /*
2789         * Try to free into the allocbucket first to give LIFO ordering
2790         * for cache-hot datastructures.  Spill over into the freebucket
2791         * if necessary.  Alloc will swap them if one runs dry.
2792         */
2793        bucket = cache->uc_allocbucket;
2794        if (bucket == NULL || bucket->ub_cnt >= bucket->ub_entries)
2795                bucket = cache->uc_freebucket;
2796        if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
2797                KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2798                    ("uma_zfree: Freeing to non free bucket index."));
2799                bucket->ub_bucket[bucket->ub_cnt] = item;
2800                bucket->ub_cnt++;
2801                cache->uc_frees++;
2802                critical_exit();
2803                return;
2804        }
2805
2806        /*
2807         * We must go back the zone, which requires acquiring the zone lock,
2808         * which in turn means we must release and re-acquire the critical
2809         * section.  Since the critical section is released, we may be
2810         * preempted or migrate.  As such, make sure not to maintain any
2811         * thread-local state specific to the cache from prior to releasing
2812         * the critical section.
2813         */
2814        critical_exit();
2815        if (zone->uz_count == 0 || bucketdisable)
2816                goto zfree_item;
2817
2818        lockfail = 0;
2819        if (ZONE_TRYLOCK(zone) == 0) {
2820                /* Record contention to size the buckets. */
2821                ZONE_LOCK(zone);
2822                lockfail = 1;
2823        }
2824        critical_enter();
2825        cpu = curcpu;
2826        cache = &zone->uz_cpu[cpu];
2827
2828        /*
2829         * Since we have locked the zone we may as well send back our stats.
2830         */
2831        atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
2832        atomic_add_long(&zone->uz_frees, cache->uc_frees);
2833        cache->uc_allocs = 0;
2834        cache->uc_frees = 0;
2835
2836        bucket = cache->uc_freebucket;
2837        if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
2838                ZONE_UNLOCK(zone);
2839                goto zfree_start;
2840        }
2841        cache->uc_freebucket = NULL;
2842        /* We are no longer associated with this CPU. */
2843        critical_exit();
2844
2845        /* Can we throw this on the zone full list? */
2846        if (bucket != NULL) {
2847#ifdef UMA_DEBUG_ALLOC
2848                printf("uma_zfree: Putting old bucket on the free list.\n");
2849#endif
2850                /* ub_cnt is pointing to the last free item */
2851                KASSERT(bucket->ub_cnt != 0,
2852                    ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2853                LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
2854        }
2855
2856        /*
2857         * We bump the uz count when the cache size is insufficient to
2858         * handle the working set.
2859         */
2860        if (lockfail && zone->uz_count < BUCKET_MAX)
2861                zone->uz_count++;
2862        ZONE_UNLOCK(zone);
2863
2864#ifdef UMA_DEBUG_ALLOC
2865        printf("uma_zfree: Allocating new free bucket.\n");
2866#endif
2867        bucket = bucket_alloc(zone, udata, M_NOWAIT);
2868        if (bucket) {
2869                critical_enter();
2870                cpu = curcpu;
2871                cache = &zone->uz_cpu[cpu];
2872                if (cache->uc_freebucket == NULL) {
2873                        cache->uc_freebucket = bucket;
2874                        goto zfree_start;
2875                }
2876                /*
2877                 * We lost the race, start over.  We have to drop our
2878                 * critical section to free the bucket.
2879                 */
2880                critical_exit();
2881                bucket_free(zone, bucket, udata);
2882                goto zfree_restart;
2883        }
2884
2885        /*
2886         * If nothing else caught this, we'll just do an internal free.
2887         */
2888zfree_item:
2889        zone_free_item(zone, item, udata, SKIP_DTOR);
2890
2891        return;
2892}
2893
2894static void
2895slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item)
2896{
2897        uint8_t freei;
2898
2899        mtx_assert(&keg->uk_lock, MA_OWNED);
2900        MPASS(keg == slab->us_keg);
2901
2902        /* Do we need to remove from any lists? */
2903        if (slab->us_freecount+1 == keg->uk_ipers) {
2904                LIST_REMOVE(slab, us_link);
2905                LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2906        } else if (slab->us_freecount == 0) {
2907                LIST_REMOVE(slab, us_link);
2908                LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2909        }
2910
2911        /* Slab management. */
2912        freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
2913        BIT_SET(SLAB_SETSIZE, freei, &slab->us_free);
2914        slab->us_freecount++;
2915
2916        /* Keg statistics. */
2917        keg->uk_free++;
2918}
2919
2920static void
2921zone_release(uma_zone_t zone, void **bucket, int cnt)
2922{
2923        void *item;
2924        uma_slab_t slab;
2925        uma_keg_t keg;
2926        uint8_t *mem;
2927        int clearfull;
2928        int i;
2929
2930        clearfull = 0;
2931        keg = zone_first_keg(zone);
2932        KEG_LOCK(keg);
2933        for (i = 0; i < cnt; i++) {
2934                item = bucket[i];
2935                if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
2936                        mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
2937                        if (zone->uz_flags & UMA_ZONE_HASH) {
2938                                slab = hash_sfind(&keg->uk_hash, mem);
2939                        } else {
2940                                mem += keg->uk_pgoff;
2941                                slab = (uma_slab_t)mem;
2942                        }
2943                } else {
2944                        slab = vtoslab((vm_offset_t)item);
2945                        if (slab->us_keg != keg) {
2946                                KEG_UNLOCK(keg);
2947                                keg = slab->us_keg;
2948                                KEG_LOCK(keg);
2949                        }
2950                }
2951                slab_free_item(keg, slab, item);
2952                if (keg->uk_flags & UMA_ZFLAG_FULL) {
2953                        if (keg->uk_pages < keg->uk_maxpages) {
2954                                keg->uk_flags &= ~UMA_ZFLAG_FULL;
2955                                clearfull = 1;
2956                        }
2957
2958                        /*
2959                         * We can handle one more allocation. Since we're
2960                         * clearing ZFLAG_FULL, wake up all procs blocked
2961                         * on pages. This should be uncommon, so keeping this
2962                         * simple for now (rather than adding count of blocked
2963                         * threads etc).
2964                         */
2965                        wakeup(keg);
2966                }
2967        }
2968        KEG_UNLOCK(keg);
2969        if (clearfull) {
2970                ZONE_LOCK(zone);
2971                zone->uz_flags &= ~UMA_ZFLAG_FULL;
2972                wakeup(zone);
2973                ZONE_UNLOCK(zone);
2974        }
2975
2976}
2977
2978/*
2979 * Frees a single item to any zone.
2980 *
2981 * Arguments:
2982 *      zone   The zone to free to
2983 *      item   The item we're freeing
2984 *      udata  User supplied data for the dtor
2985 *      skip   Skip dtors and finis
2986 */
2987static void
2988zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
2989{
2990
2991#ifdef INVARIANTS
2992        if (skip == SKIP_NONE) {
2993                if (zone->uz_flags & UMA_ZONE_MALLOC)
2994                        uma_dbg_free(zone, udata, item);
2995                else
2996                        uma_dbg_free(zone, NULL, item);
2997        }
2998#endif
2999        if (skip < SKIP_DTOR && zone->uz_dtor)
3000                zone->uz_dtor(item, zone->uz_size, udata);
3001
3002        if (skip < SKIP_FINI && zone->uz_fini)
3003                zone->uz_fini(item, zone->uz_size);
3004
3005        atomic_add_long(&zone->uz_frees, 1);
3006        zone->uz_release(zone->uz_arg, &item, 1);
3007}
3008
3009/* See uma.h */
3010int
3011uma_zone_set_max(uma_zone_t zone, int nitems)
3012{
3013        uma_keg_t keg;
3014
3015        keg = zone_first_keg(zone);
3016        if (keg == NULL)
3017                return (0);
3018        KEG_LOCK(keg);
3019#ifdef __rtems__
3020#ifdef SMP
3021        /*
3022         * Ensure we have enough items to fill the per-processor caches.  This
3023         * is a heuristic approach and works not under all conditions.
3024         */
3025        nitems += 2 * BUCKET_MAX * (mp_maxid + 1);
3026#endif
3027#endif /* __rtems__ */
3028        keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
3029        if (keg->uk_maxpages * keg->uk_ipers < nitems)
3030                keg->uk_maxpages += keg->uk_ppera;
3031        nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
3032        KEG_UNLOCK(keg);
3033
3034        return (nitems);
3035}
3036
3037/* See uma.h */
3038int
3039uma_zone_get_max(uma_zone_t zone)
3040{
3041        int nitems;
3042        uma_keg_t keg;
3043
3044        keg = zone_first_keg(zone);
3045        if (keg == NULL)
3046                return (0);
3047        KEG_LOCK(keg);
3048        nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
3049        KEG_UNLOCK(keg);
3050
3051        return (nitems);
3052}
3053
3054/* See uma.h */
3055void
3056uma_zone_set_warning(uma_zone_t zone, const char *warning)
3057{
3058
3059        ZONE_LOCK(zone);
3060        zone->uz_warning = warning;
3061        ZONE_UNLOCK(zone);
3062}
3063
3064/* See uma.h */
3065void
3066uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction)
3067{
3068
3069        ZONE_LOCK(zone);
3070        TASK_INIT(&zone->uz_maxaction, 0, (task_fn_t *)maxaction, zone);
3071        ZONE_UNLOCK(zone);
3072}
3073
3074/* See uma.h */
3075int
3076uma_zone_get_cur(uma_zone_t zone)
3077{
3078        int64_t nitems;
3079        u_int i;
3080
3081        ZONE_LOCK(zone);
3082        nitems = zone->uz_allocs - zone->uz_frees;
3083        CPU_FOREACH(i) {
3084                /*
3085                 * See the comment in sysctl_vm_zone_stats() regarding the
3086                 * safety of accessing the per-cpu caches. With the zone lock
3087                 * held, it is safe, but can potentially result in stale data.
3088                 */
3089                nitems += zone->uz_cpu[i].uc_allocs -
3090                    zone->uz_cpu[i].uc_frees;
3091        }
3092        ZONE_UNLOCK(zone);
3093
3094        return (nitems < 0 ? 0 : nitems);
3095}
3096
3097/* See uma.h */
3098void
3099uma_zone_set_init(uma_zone_t zone, uma_init uminit)
3100{
3101        uma_keg_t keg;
3102
3103        keg = zone_first_keg(zone);
3104        KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
3105        KEG_LOCK(keg);
3106        KASSERT(keg->uk_pages == 0,
3107            ("uma_zone_set_init on non-empty keg"));
3108        keg->uk_init = uminit;
3109        KEG_UNLOCK(keg);
3110}
3111
3112/* See uma.h */
3113void
3114uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
3115{
3116        uma_keg_t keg;
3117
3118        keg = zone_first_keg(zone);
3119        KASSERT(keg != NULL, ("uma_zone_set_fini: Invalid zone type"));
3120        KEG_LOCK(keg);
3121        KASSERT(keg->uk_pages == 0,
3122            ("uma_zone_set_fini on non-empty keg"));
3123        keg->uk_fini = fini;
3124        KEG_UNLOCK(keg);
3125}
3126
3127/* See uma.h */
3128void
3129uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
3130{
3131
3132        ZONE_LOCK(zone);
3133        KASSERT(zone_first_keg(zone)->uk_pages == 0,
3134            ("uma_zone_set_zinit on non-empty keg"));
3135        zone->uz_init = zinit;
3136        ZONE_UNLOCK(zone);
3137}
3138
3139/* See uma.h */
3140void
3141uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
3142{
3143
3144        ZONE_LOCK(zone);
3145        KASSERT(zone_first_keg(zone)->uk_pages == 0,
3146            ("uma_zone_set_zfini on non-empty keg"));
3147        zone->uz_fini = zfini;
3148        ZONE_UNLOCK(zone);
3149}
3150
3151/* See uma.h */
3152/* XXX uk_freef is not actually used with the zone locked */
3153void
3154uma_zone_set_freef(uma_zone_t zone, uma_free freef)
3155{
3156        uma_keg_t keg;
3157
3158        keg = zone_first_keg(zone);
3159        KASSERT(keg != NULL, ("uma_zone_set_freef: Invalid zone type"));
3160        KEG_LOCK(keg);
3161        keg->uk_freef = freef;
3162        KEG_UNLOCK(keg);
3163}
3164
3165/* See uma.h */
3166/* XXX uk_allocf is not actually used with the zone locked */
3167void
3168uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
3169{
3170        uma_keg_t keg;
3171
3172        keg = zone_first_keg(zone);
3173        KEG_LOCK(keg);
3174        keg->uk_allocf = allocf;
3175        KEG_UNLOCK(keg);
3176}
3177
3178/* See uma.h */
3179void
3180uma_zone_reserve(uma_zone_t zone, int items)
3181{
3182        uma_keg_t keg;
3183
3184        keg = zone_first_keg(zone);
3185        if (keg == NULL)
3186                return;
3187        KEG_LOCK(keg);
3188        keg->uk_reserve = items;
3189        KEG_UNLOCK(keg);
3190
3191        return;
3192}
3193
3194#ifndef __rtems__
3195/* See uma.h */
3196int
3197uma_zone_reserve_kva(uma_zone_t zone, int count)
3198{
3199        uma_keg_t keg;
3200        vm_offset_t kva;
3201        u_int pages;
3202
3203        keg = zone_first_keg(zone);
3204        if (keg == NULL)
3205                return (0);
3206        pages = count / keg->uk_ipers;
3207
3208        if (pages * keg->uk_ipers < count)
3209                pages++;
3210        pages *= keg->uk_ppera;
3211
3212#ifdef UMA_MD_SMALL_ALLOC
3213        if (keg->uk_ppera > 1) {
3214#else
3215        if (1) {
3216#endif
3217                kva = kva_alloc((vm_size_t)pages * PAGE_SIZE);
3218                if (kva == 0)
3219                        return (0);
3220        } else
3221                kva = 0;
3222        KEG_LOCK(keg);
3223        keg->uk_kva = kva;
3224        keg->uk_offset = 0;
3225        keg->uk_maxpages = pages;
3226#ifdef UMA_MD_SMALL_ALLOC
3227        keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
3228#else
3229        keg->uk_allocf = noobj_alloc;
3230#endif
3231        keg->uk_flags |= UMA_ZONE_NOFREE;
3232        KEG_UNLOCK(keg);
3233
3234        return (1);
3235}
3236
3237/* See uma.h */
3238void
3239uma_prealloc(uma_zone_t zone, int items)
3240{
3241        int slabs;
3242        uma_slab_t slab;
3243        uma_keg_t keg;
3244
3245        keg = zone_first_keg(zone);
3246        if (keg == NULL)
3247                return;
3248        KEG_LOCK(keg);
3249        slabs = items / keg->uk_ipers;
3250        if (slabs * keg->uk_ipers < items)
3251                slabs++;
3252        while (slabs > 0) {
3253                slab = keg_alloc_slab(keg, zone, M_WAITOK);
3254                if (slab == NULL)
3255                        break;
3256                MPASS(slab->us_keg == keg);
3257                LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
3258                slabs--;
3259        }
3260        KEG_UNLOCK(keg);
3261}
3262#endif /* __rtems__ */
3263
3264/* See uma.h */
3265static void
3266uma_reclaim_locked(bool kmem_danger)
3267{
3268
3269#ifdef UMA_DEBUG
3270        printf("UMA: vm asked us to release pages!\n");
3271#endif
3272        sx_assert(&uma_drain_lock, SA_XLOCKED);
3273        bucket_enable();
3274        zone_foreach(zone_drain);
3275#ifndef __rtems__
3276        if (vm_page_count_min() || kmem_danger) {
3277                cache_drain_safe(NULL);
3278                zone_foreach(zone_drain);
3279        }
3280#endif /* __rtems__ */
3281        /*
3282         * Some slabs may have been freed but this zone will be visited early
3283         * we visit again so that we can free pages that are empty once other
3284         * zones are drained.  We have to do the same for buckets.
3285         */
3286        zone_drain(slabzone);
3287        bucket_zone_drain();
3288}
3289
3290void
3291uma_reclaim(void)
3292{
3293
3294        sx_xlock(&uma_drain_lock);
3295        uma_reclaim_locked(false);
3296        sx_xunlock(&uma_drain_lock);
3297}
3298
3299static int uma_reclaim_needed;
3300
3301void
3302uma_reclaim_wakeup(void)
3303{
3304
3305        uma_reclaim_needed = 1;
3306        wakeup(&uma_reclaim_needed);
3307}
3308
3309void
3310uma_reclaim_worker(void *arg __unused)
3311{
3312
3313        sx_xlock(&uma_drain_lock);
3314        for (;;) {
3315                sx_sleep(&uma_reclaim_needed, &uma_drain_lock, PVM,
3316                    "umarcl", 0);
3317                if (uma_reclaim_needed) {
3318                        uma_reclaim_needed = 0;
3319#ifndef __rtems__
3320                        sx_xunlock(&uma_drain_lock);
3321                        EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM);
3322                        sx_xlock(&uma_drain_lock);
3323#endif /* __rtems__ */
3324                        uma_reclaim_locked(true);
3325                }
3326        }
3327}
3328
3329/* See uma.h */
3330int
3331uma_zone_exhausted(uma_zone_t zone)
3332{
3333        int full;
3334
3335        ZONE_LOCK(zone);
3336        full = (zone->uz_flags & UMA_ZFLAG_FULL);
3337        ZONE_UNLOCK(zone);
3338        return (full); 
3339}
3340
3341int
3342uma_zone_exhausted_nolock(uma_zone_t zone)
3343{
3344        return (zone->uz_flags & UMA_ZFLAG_FULL);
3345}
3346
3347#ifndef __rtems__
3348void *
3349uma_large_malloc(vm_size_t size, int wait)
3350{
3351        void *mem;
3352        uma_slab_t slab;
3353        uint8_t flags;
3354
3355        slab = zone_alloc_item(slabzone, NULL, wait);
3356        if (slab == NULL)
3357                return (NULL);
3358        mem = page_alloc(NULL, size, &flags, wait);
3359        if (mem) {
3360                vsetslab((vm_offset_t)mem, slab);
3361                slab->us_data = mem;
3362                slab->us_flags = flags | UMA_SLAB_MALLOC;
3363                slab->us_size = size;
3364        } else {
3365                zone_free_item(slabzone, slab, NULL, SKIP_NONE);
3366        }
3367
3368        return (mem);
3369}
3370
3371void
3372uma_large_free(uma_slab_t slab)
3373{
3374
3375        page_free(slab->us_data, slab->us_size, slab->us_flags);
3376        zone_free_item(slabzone, slab, NULL, SKIP_NONE);
3377}
3378#endif /* __rtems__ */
3379
3380static void
3381uma_zero_item(void *item, uma_zone_t zone)
3382{
3383        int i;
3384
3385        if (zone->uz_flags & UMA_ZONE_PCPU) {
3386                CPU_FOREACH(i)
3387                        bzero(zpcpu_get_cpu(item, i), zone->uz_size);
3388        } else
3389                bzero(item, zone->uz_size);
3390}
3391
3392void
3393uma_print_stats(void)
3394{
3395        zone_foreach(uma_print_zone);
3396}
3397
3398static void
3399slab_print(uma_slab_t slab)
3400{
3401        printf("slab: keg %p, data %p, freecount %d\n",
3402                slab->us_keg, slab->us_data, slab->us_freecount);
3403}
3404
3405static void
3406cache_print(uma_cache_t cache)
3407{
3408        printf("alloc: %p(%d), free: %p(%d)\n",
3409                cache->uc_allocbucket,
3410                cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
3411                cache->uc_freebucket,
3412                cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
3413}
3414
3415static void
3416uma_print_keg(uma_keg_t keg)
3417{
3418        uma_slab_t slab;
3419
3420        printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
3421            "out %d free %d limit %d\n",
3422            keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
3423            keg->uk_ipers, keg->uk_ppera,
3424            (keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
3425            keg->uk_free, (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
3426        printf("Part slabs:\n");
3427        LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
3428                slab_print(slab);
3429        printf("Free slabs:\n");
3430        LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
3431                slab_print(slab);
3432        printf("Full slabs:\n");
3433        LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
3434                slab_print(slab);
3435}
3436
3437void
3438uma_print_zone(uma_zone_t zone)
3439{
3440        uma_cache_t cache;
3441        uma_klink_t kl;
3442        int i;
3443
3444        printf("zone: %s(%p) size %d flags %#x\n",
3445            zone->uz_name, zone, zone->uz_size, zone->uz_flags);
3446        LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
3447                uma_print_keg(kl->kl_keg);
3448        CPU_FOREACH(i) {
3449                cache = &zone->uz_cpu[i];
3450                printf("CPU %d Cache:\n", i);
3451                cache_print(cache);
3452        }
3453}
3454
3455#ifndef __rtems__
3456#ifdef DDB
3457/*
3458 * Generate statistics across both the zone and its per-cpu cache's.  Return
3459 * desired statistics if the pointer is non-NULL for that statistic.
3460 *
3461 * Note: does not update the zone statistics, as it can't safely clear the
3462 * per-CPU cache statistic.
3463 *
3464 * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
3465 * safe from off-CPU; we should modify the caches to track this information
3466 * directly so that we don't have to.
3467 */
3468static void
3469uma_zone_sumstat(uma_zone_t z, int *cachefreep, uint64_t *allocsp,
3470    uint64_t *freesp, uint64_t *sleepsp)
3471{
3472        uma_cache_t cache;
3473        uint64_t allocs, frees, sleeps;
3474        int cachefree, cpu;
3475
3476        allocs = frees = sleeps = 0;
3477        cachefree = 0;
3478        CPU_FOREACH(cpu) {
3479                cache = &z->uz_cpu[cpu];
3480                if (cache->uc_allocbucket != NULL)
3481                        cachefree += cache->uc_allocbucket->ub_cnt;
3482                if (cache->uc_freebucket != NULL)
3483                        cachefree += cache->uc_freebucket->ub_cnt;
3484                allocs += cache->uc_allocs;
3485                frees += cache->uc_frees;
3486        }
3487        allocs += z->uz_allocs;
3488        frees += z->uz_frees;
3489        sleeps += z->uz_sleeps;
3490        if (cachefreep != NULL)
3491                *cachefreep = cachefree;
3492        if (allocsp != NULL)
3493                *allocsp = allocs;
3494        if (freesp != NULL)
3495                *freesp = frees;
3496        if (sleepsp != NULL)
3497                *sleepsp = sleeps;
3498}
3499#endif /* DDB */
3500#endif /* __rtems__ */
3501
3502static int
3503sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
3504{
3505        uma_keg_t kz;
3506        uma_zone_t z;
3507        int count;
3508
3509        count = 0;
3510        rw_rlock(&uma_rwlock);
3511        LIST_FOREACH(kz, &uma_kegs, uk_link) {
3512                LIST_FOREACH(z, &kz->uk_zones, uz_link)
3513                        count++;
3514        }
3515        rw_runlock(&uma_rwlock);
3516        return (sysctl_handle_int(oidp, &count, 0, req));
3517}
3518
3519static int
3520sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
3521{
3522        struct uma_stream_header ush;
3523        struct uma_type_header uth;
3524        struct uma_percpu_stat ups;
3525        uma_bucket_t bucket;
3526        struct sbuf sbuf;
3527        uma_cache_t cache;
3528        uma_klink_t kl;
3529        uma_keg_t kz;
3530        uma_zone_t z;
3531        uma_keg_t k;
3532        int count, error, i;
3533
3534        error = sysctl_wire_old_buffer(req, 0);
3535        if (error != 0)
3536                return (error);
3537        sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
3538        sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL);
3539
3540        count = 0;
3541        rw_rlock(&uma_rwlock);
3542        LIST_FOREACH(kz, &uma_kegs, uk_link) {
3543                LIST_FOREACH(z, &kz->uk_zones, uz_link)
3544                        count++;
3545        }
3546
3547        /*
3548         * Insert stream header.
3549         */
3550        bzero(&ush, sizeof(ush));
3551        ush.ush_version = UMA_STREAM_VERSION;
3552        ush.ush_maxcpus = (mp_maxid + 1);
3553        ush.ush_count = count;
3554        (void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
3555
3556        LIST_FOREACH(kz, &uma_kegs, uk_link) {
3557                LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3558                        bzero(&uth, sizeof(uth));
3559                        ZONE_LOCK(z);
3560                        strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
3561                        uth.uth_align = kz->uk_align;
3562                        uth.uth_size = kz->uk_size;
3563                        uth.uth_rsize = kz->uk_rsize;
3564                        LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
3565                                k = kl->kl_keg;
3566                                uth.uth_maxpages += k->uk_maxpages;
3567                                uth.uth_pages += k->uk_pages;
3568                                uth.uth_keg_free += k->uk_free;
3569                                uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
3570                                    * k->uk_ipers;
3571                        }
3572
3573                        /*
3574                         * A zone is secondary is it is not the first entry
3575                         * on the keg's zone list.
3576                         */
3577                        if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
3578                            (LIST_FIRST(&kz->uk_zones) != z))
3579                                uth.uth_zone_flags = UTH_ZONE_SECONDARY;
3580
3581                        LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3582                                uth.uth_zone_free += bucket->ub_cnt;
3583                        uth.uth_allocs = z->uz_allocs;
3584                        uth.uth_frees = z->uz_frees;
3585                        uth.uth_fails = z->uz_fails;
3586                        uth.uth_sleeps = z->uz_sleeps;
3587                        (void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
3588                        /*
3589                         * While it is not normally safe to access the cache
3590                         * bucket pointers while not on the CPU that owns the
3591                         * cache, we only allow the pointers to be exchanged
3592                         * without the zone lock held, not invalidated, so
3593                         * accept the possible race associated with bucket
3594                         * exchange during monitoring.
3595                         */
3596                        for (i = 0; i < (mp_maxid + 1); i++) {
3597                                bzero(&ups, sizeof(ups));
3598                                if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
3599                                        goto skip;
3600                                if (CPU_ABSENT(i))
3601                                        goto skip;
3602                                cache = &z->uz_cpu[i];
3603                                if (cache->uc_allocbucket != NULL)
3604                                        ups.ups_cache_free +=
3605                                            cache->uc_allocbucket->ub_cnt;
3606                                if (cache->uc_freebucket != NULL)
3607                                        ups.ups_cache_free +=
3608                                            cache->uc_freebucket->ub_cnt;
3609                                ups.ups_allocs = cache->uc_allocs;
3610                                ups.ups_frees = cache->uc_frees;
3611skip:
3612                                (void)sbuf_bcat(&sbuf, &ups, sizeof(ups));
3613                        }
3614                        ZONE_UNLOCK(z);
3615                }
3616        }
3617        rw_runlock(&uma_rwlock);
3618        error = sbuf_finish(&sbuf);
3619        sbuf_delete(&sbuf);
3620        return (error);
3621}
3622
3623int
3624sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS)
3625{
3626        uma_zone_t zone = *(uma_zone_t *)arg1;
3627        int error, max;
3628
3629        max = uma_zone_get_max(zone);
3630        error = sysctl_handle_int(oidp, &max, 0, req);
3631        if (error || !req->newptr)
3632                return (error);
3633
3634        uma_zone_set_max(zone, max);
3635
3636        return (0);
3637}
3638
3639int
3640sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS)
3641{
3642        uma_zone_t zone = *(uma_zone_t *)arg1;
3643        int cur;
3644
3645        cur = uma_zone_get_cur(zone);
3646        return (sysctl_handle_int(oidp, &cur, 0, req));
3647}
3648
3649#ifdef INVARIANTS
3650static uma_slab_t
3651uma_dbg_getslab(uma_zone_t zone, void *item)
3652{
3653        uma_slab_t slab;
3654        uma_keg_t keg;
3655        uint8_t *mem;
3656
3657        mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
3658        if (zone->uz_flags & UMA_ZONE_VTOSLAB) {
3659                slab = vtoslab((vm_offset_t)mem);
3660        } else {
3661                /*
3662                 * It is safe to return the slab here even though the
3663                 * zone is unlocked because the item's allocation state
3664                 * essentially holds a reference.
3665                 */
3666                ZONE_LOCK(zone);
3667                keg = LIST_FIRST(&zone->uz_kegs)->kl_keg;
3668                if (keg->uk_flags & UMA_ZONE_HASH)
3669                        slab = hash_sfind(&keg->uk_hash, mem);
3670                else
3671                        slab = (uma_slab_t)(mem + keg->uk_pgoff);
3672                ZONE_UNLOCK(zone);
3673        }
3674
3675        return (slab);
3676}
3677
3678/*
3679 * Set up the slab's freei data such that uma_dbg_free can function.
3680 *
3681 */
3682static void
3683uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
3684{
3685        uma_keg_t keg;
3686        int freei;
3687
3688        if (zone_first_keg(zone) == NULL)
3689                return;
3690        if (slab == NULL) {
3691                slab = uma_dbg_getslab(zone, item);
3692                if (slab == NULL)
3693                        panic("uma: item %p did not belong to zone %s\n",
3694                            item, zone->uz_name);
3695        }
3696        keg = slab->us_keg;
3697        freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
3698
3699        if (BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
3700                panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)\n",
3701                    item, zone, zone->uz_name, slab, freei);
3702        BIT_SET_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
3703
3704        return;
3705}
3706
3707/*
3708 * Verifies freed addresses.  Checks for alignment, valid slab membership
3709 * and duplicate frees.
3710 *
3711 */
3712static void
3713uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
3714{
3715        uma_keg_t keg;
3716        int freei;
3717
3718        if (zone_first_keg(zone) == NULL)
3719                return;
3720        if (slab == NULL) {
3721                slab = uma_dbg_getslab(zone, item);
3722                if (slab == NULL)
3723                        panic("uma: Freed item %p did not belong to zone %s\n",
3724                            item, zone->uz_name);
3725        }
3726        keg = slab->us_keg;
3727        freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
3728
3729        if (freei >= keg->uk_ipers)
3730                panic("Invalid free of %p from zone %p(%s) slab %p(%d)\n",
3731                    item, zone, zone->uz_name, slab, freei);
3732
3733        if (((freei * keg->uk_rsize) + slab->us_data) != item)
3734                panic("Unaligned free of %p from zone %p(%s) slab %p(%d)\n",
3735                    item, zone, zone->uz_name, slab, freei);
3736
3737        if (!BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
3738                panic("Duplicate free of %p from zone %p(%s) slab %p(%d)\n",
3739                    item, zone, zone->uz_name, slab, freei);
3740
3741        BIT_CLR_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
3742}
3743#endif /* INVARIANTS */
3744
3745#ifndef __rtems__
3746#ifdef DDB
3747DB_SHOW_COMMAND(uma, db_show_uma)
3748{
3749        uint64_t allocs, frees, sleeps;
3750        uma_bucket_t bucket;
3751        uma_keg_t kz;
3752        uma_zone_t z;
3753        int cachefree;
3754
3755        db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used",
3756            "Free", "Requests", "Sleeps", "Bucket");
3757        LIST_FOREACH(kz, &uma_kegs, uk_link) {
3758                LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3759                        if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
3760                                allocs = z->uz_allocs;
3761                                frees = z->uz_frees;
3762                                sleeps = z->uz_sleeps;
3763                                cachefree = 0;
3764                        } else
3765                                uma_zone_sumstat(z, &cachefree, &allocs,
3766                                    &frees, &sleeps);
3767                        if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
3768                            (LIST_FIRST(&kz->uk_zones) != z)))
3769                                cachefree += kz->uk_free;
3770                        LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3771                                cachefree += bucket->ub_cnt;
3772                        db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n",
3773                            z->uz_name, (uintmax_t)kz->uk_size,
3774                            (intmax_t)(allocs - frees), cachefree,
3775                            (uintmax_t)allocs, sleeps, z->uz_count);
3776                        if (db_pager_quit)
3777                                return;
3778                }
3779        }
3780}
3781
3782DB_SHOW_COMMAND(umacache, db_show_umacache)
3783{
3784        uint64_t allocs, frees;
3785        uma_bucket_t bucket;
3786        uma_zone_t z;
3787        int cachefree;
3788
3789        db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
3790            "Requests", "Bucket");
3791        LIST_FOREACH(z, &uma_cachezones, uz_link) {
3792                uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL);
3793                LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3794                        cachefree += bucket->ub_cnt;
3795                db_printf("%18s %8ju %8jd %8d %12ju %8u\n",
3796                    z->uz_name, (uintmax_t)z->uz_size,
3797                    (intmax_t)(allocs - frees), cachefree,
3798                    (uintmax_t)allocs, z->uz_count);
3799                if (db_pager_quit)
3800                        return;
3801        }
3802}
3803#endif  /* DDB */
3804#endif /* __rtems__ */
Note: See TracBrowser for help on using the repository browser.