source: rtems-libbsd/freebsd/sys/vm/uma_core.c @ 62c8ca0

55-freebsd-126-freebsd-12
Last change on this file since 62c8ca0 was 62c8ca0, checked in by Sebastian Huber <sebastian.huber@…>, on 05/18/17 at 07:35:46

Fix INVARIANTS support

  • Property mode set to 100644
File size: 91.5 KB
Line 
1#include <machine/rtems-bsd-kernel-space.h>
2
3/*-
4 * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
5 * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
6 * Copyright (c) 2004-2006 Robert N. M. Watson
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice unmodified, this list of conditions, and the following
14 *    disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31/*
32 * uma_core.c  Implementation of the Universal Memory allocator
33 *
34 * This allocator is intended to replace the multitude of similar object caches
35 * in the standard FreeBSD kernel.  The intent is to be flexible as well as
36 * efficient.  A primary design goal is to return unused memory to the rest of
37 * the system.  This will make the system as a whole more flexible due to the
38 * ability to move memory to subsystems which most need it instead of leaving
39 * pools of reserved memory unused.
40 *
41 * The basic ideas stem from similar slab/zone based allocators whose algorithms
42 * are well known.
43 *
44 */
45
46/*
47 * TODO:
48 *      - Improve memory usage for large allocations
49 *      - Investigate cache size adjustments
50 */
51
52#include <sys/cdefs.h>
53__FBSDID("$FreeBSD$");
54
55/* I should really use ktr.. */
56/*
57#define UMA_DEBUG 1
58#define UMA_DEBUG_ALLOC 1
59#define UMA_DEBUG_ALLOC_1 1
60*/
61
62#include <rtems/bsd/local/opt_ddb.h>
63#include <rtems/bsd/local/opt_param.h>
64#include <rtems/bsd/local/opt_vm.h>
65
66#include <sys/param.h>
67#include <sys/systm.h>
68#include <sys/bitset.h>
69#include <sys/eventhandler.h>
70#include <sys/kernel.h>
71#include <sys/types.h>
72#include <sys/queue.h>
73#include <sys/malloc.h>
74#include <sys/ktr.h>
75#include <sys/lock.h>
76#include <sys/sysctl.h>
77#include <sys/mutex.h>
78#include <sys/proc.h>
79#include <sys/random.h>
80#include <sys/rwlock.h>
81#include <sys/sbuf.h>
82#include <sys/sched.h>
83#include <sys/smp.h>
84#include <sys/taskqueue.h>
85#include <sys/vmmeter.h>
86
87#include <vm/vm.h>
88#include <vm/vm_object.h>
89#include <vm/vm_page.h>
90#include <vm/vm_pageout.h>
91#include <vm/vm_param.h>
92#include <vm/vm_map.h>
93#include <vm/vm_kern.h>
94#include <vm/vm_extern.h>
95#include <vm/uma.h>
96#include <vm/uma_int.h>
97#include <vm/uma_dbg.h>
98
99#include <ddb/ddb.h>
100#ifdef __rtems__
101  #ifdef RTEMS_SMP
102    /*
103     * It is essential that we have a per-processor cache, otherwise the
104     * critical_enter()/critical_exit() protection would be insufficient.
105     */
106    #undef curcpu
107    #define curcpu rtems_get_current_processor()
108    #undef mp_maxid
109    #define mp_maxid (rtems_get_processor_count() - 1)
110    #define SMP
111  #endif
112#endif /* __rtems__ */
113
114#ifdef DEBUG_MEMGUARD
115#include <vm/memguard.h>
116#endif
117
118/*
119 * This is the zone and keg from which all zones are spawned.  The idea is that
120 * even the zone & keg heads are allocated from the allocator, so we use the
121 * bss section to bootstrap us.
122 */
123static struct uma_keg masterkeg;
124static struct uma_zone masterzone_k;
125static struct uma_zone masterzone_z;
126static uma_zone_t kegs = &masterzone_k;
127static uma_zone_t zones = &masterzone_z;
128
129/* This is the zone from which all of uma_slab_t's are allocated. */
130static uma_zone_t slabzone;
131
132/*
133 * The initial hash tables come out of this zone so they can be allocated
134 * prior to malloc coming up.
135 */
136static uma_zone_t hashzone;
137
138/* The boot-time adjusted value for cache line alignment. */
139int uma_align_cache = 64 - 1;
140
141static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
142
143#ifndef __rtems__
144/*
145 * Are we allowed to allocate buckets?
146 */
147static int bucketdisable = 1;
148#else /* __rtems__ */
149#define bucketdisable 0
150#endif /* __rtems__ */
151
152/* Linked list of all kegs in the system */
153static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
154
155/* Linked list of all cache-only zones in the system */
156static LIST_HEAD(,uma_zone) uma_cachezones =
157    LIST_HEAD_INITIALIZER(uma_cachezones);
158
159/* This RW lock protects the keg list */
160static struct rwlock_padalign uma_rwlock;
161
162#ifndef __rtems__
163/* Linked list of boot time pages */
164static LIST_HEAD(,uma_slab) uma_boot_pages =
165    LIST_HEAD_INITIALIZER(uma_boot_pages);
166
167/* This mutex protects the boot time pages list */
168static struct mtx_padalign uma_boot_pages_mtx;
169#endif /* __rtems__ */
170
171static struct sx uma_drain_lock;
172
173#ifndef __rtems__
174/* Is the VM done starting up? */
175static int booted = 0;
176#define UMA_STARTUP     1
177#define UMA_STARTUP2    2
178#endif /* __rtems__ */
179
180/*
181 * This is the handle used to schedule events that need to happen
182 * outside of the allocation fast path.
183 */
184static struct callout uma_callout;
185#define UMA_TIMEOUT     20              /* Seconds for callout interval. */
186
187/*
188 * This structure is passed as the zone ctor arg so that I don't have to create
189 * a special allocation function just for zones.
190 */
191struct uma_zctor_args {
192        const char *name;
193        size_t size;
194        uma_ctor ctor;
195        uma_dtor dtor;
196        uma_init uminit;
197        uma_fini fini;
198        uma_import import;
199        uma_release release;
200        void *arg;
201        uma_keg_t keg;
202        int align;
203        uint32_t flags;
204};
205
206struct uma_kctor_args {
207        uma_zone_t zone;
208        size_t size;
209        uma_init uminit;
210        uma_fini fini;
211        int align;
212        uint32_t flags;
213};
214
215struct uma_bucket_zone {
216        uma_zone_t      ubz_zone;
217        char            *ubz_name;
218        int             ubz_entries;    /* Number of items it can hold. */
219        int             ubz_maxsize;    /* Maximum allocation size per-item. */
220};
221
222/*
223 * Compute the actual number of bucket entries to pack them in power
224 * of two sizes for more efficient space utilization.
225 */
226#define BUCKET_SIZE(n)                                          \
227    (((sizeof(void *) * (n)) - sizeof(struct uma_bucket)) / sizeof(void *))
228
229#ifndef __rtems__
230#define BUCKET_MAX      BUCKET_SIZE(256)
231#else /* __rtems__ */
232#define BUCKET_MAX      BUCKET_SIZE(128)
233#endif /* __rtems__ */
234
235struct uma_bucket_zone bucket_zones[] = {
236        { NULL, "4 Bucket", BUCKET_SIZE(4), 4096 },
237        { NULL, "6 Bucket", BUCKET_SIZE(6), 3072 },
238        { NULL, "8 Bucket", BUCKET_SIZE(8), 2048 },
239        { NULL, "12 Bucket", BUCKET_SIZE(12), 1536 },
240        { NULL, "16 Bucket", BUCKET_SIZE(16), 1024 },
241        { NULL, "32 Bucket", BUCKET_SIZE(32), 512 },
242        { NULL, "64 Bucket", BUCKET_SIZE(64), 256 },
243        { NULL, "128 Bucket", BUCKET_SIZE(128), 128 },
244#ifndef __rtems__
245        { NULL, "256 Bucket", BUCKET_SIZE(256), 64 },
246#endif /* __rtems__ */
247        { NULL, NULL, 0}
248};
249
250/*
251 * Flags and enumerations to be passed to internal functions.
252 */
253enum zfreeskip { SKIP_NONE = 0, SKIP_DTOR, SKIP_FINI };
254
255/* Prototypes.. */
256
257#ifndef __rtems__
258static void *noobj_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
259#endif /* __rtems__ */
260static void *page_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
261#ifndef __rtems__
262static void *startup_alloc(uma_zone_t, vm_size_t, uint8_t *, int);
263#endif /* __rtems__ */
264static void page_free(void *, vm_size_t, uint8_t);
265static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
266static void cache_drain(uma_zone_t);
267static void bucket_drain(uma_zone_t, uma_bucket_t);
268static void bucket_cache_drain(uma_zone_t zone);
269static int keg_ctor(void *, int, void *, int);
270static void keg_dtor(void *, int, void *);
271static int zone_ctor(void *, int, void *, int);
272static void zone_dtor(void *, int, void *);
273static int zero_init(void *, int, int);
274static void keg_small_init(uma_keg_t keg);
275static void keg_large_init(uma_keg_t keg);
276static void zone_foreach(void (*zfunc)(uma_zone_t));
277static void zone_timeout(uma_zone_t zone);
278static int hash_alloc(struct uma_hash *);
279static int hash_expand(struct uma_hash *, struct uma_hash *);
280static void hash_free(struct uma_hash *hash);
281static void uma_timeout(void *);
282static void uma_startup3(void);
283static void *zone_alloc_item(uma_zone_t, void *, int);
284static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip);
285static void bucket_enable(void);
286static void bucket_init(void);
287static uma_bucket_t bucket_alloc(uma_zone_t zone, void *, int);
288static void bucket_free(uma_zone_t zone, uma_bucket_t, void *);
289static void bucket_zone_drain(void);
290static uma_bucket_t zone_alloc_bucket(uma_zone_t zone, void *, int flags);
291static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
292#ifndef __rtems__
293static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
294#endif /* __rtems__ */
295static void *slab_alloc_item(uma_keg_t keg, uma_slab_t slab);
296static void slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item);
297static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
298    uma_fini fini, int align, uint32_t flags);
299static int zone_import(uma_zone_t zone, void **bucket, int max, int flags);
300static void zone_release(uma_zone_t zone, void **bucket, int cnt);
301static void uma_zero_item(void *item, uma_zone_t zone);
302
303void uma_print_zone(uma_zone_t);
304void uma_print_stats(void);
305static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
306static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
307
308#ifdef INVARIANTS
309static void uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item);
310static void uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item);
311#endif
312
313SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
314
315SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
316    0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
317
318SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
319    0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
320
321static int zone_warnings = 1;
322SYSCTL_INT(_vm, OID_AUTO, zone_warnings, CTLFLAG_RWTUN, &zone_warnings, 0,
323    "Warn when UMA zones becomes full");
324
325/*
326 * This routine checks to see whether or not it's safe to enable buckets.
327 */
328static void
329bucket_enable(void)
330{
331#ifndef __rtems__
332        bucketdisable = vm_page_count_min();
333#endif /* __rtems__ */
334}
335
336/*
337 * Initialize bucket_zones, the array of zones of buckets of various sizes.
338 *
339 * For each zone, calculate the memory required for each bucket, consisting
340 * of the header and an array of pointers.
341 */
342static void
343bucket_init(void)
344{
345        struct uma_bucket_zone *ubz;
346        int size;
347
348        for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++) {
349                size = roundup(sizeof(struct uma_bucket), sizeof(void *));
350                size += sizeof(void *) * ubz->ubz_entries;
351                ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
352                    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
353                    UMA_ZONE_MTXCLASS | UMA_ZFLAG_BUCKET);
354        }
355}
356
357/*
358 * Given a desired number of entries for a bucket, return the zone from which
359 * to allocate the bucket.
360 */
361static struct uma_bucket_zone *
362bucket_zone_lookup(int entries)
363{
364        struct uma_bucket_zone *ubz;
365
366        for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
367                if (ubz->ubz_entries >= entries)
368                        return (ubz);
369        ubz--;
370        return (ubz);
371}
372
373static int
374bucket_select(int size)
375{
376        struct uma_bucket_zone *ubz;
377
378        ubz = &bucket_zones[0];
379        if (size > ubz->ubz_maxsize)
380                return MAX((ubz->ubz_maxsize * ubz->ubz_entries) / size, 1);
381
382        for (; ubz->ubz_entries != 0; ubz++)
383                if (ubz->ubz_maxsize < size)
384                        break;
385        ubz--;
386        return (ubz->ubz_entries);
387}
388
389static uma_bucket_t
390bucket_alloc(uma_zone_t zone, void *udata, int flags)
391{
392        struct uma_bucket_zone *ubz;
393        uma_bucket_t bucket;
394
395#ifndef __rtems__
396        /*
397         * This is to stop us from allocating per cpu buckets while we're
398         * running out of vm.boot_pages.  Otherwise, we would exhaust the
399         * boot pages.  This also prevents us from allocating buckets in
400         * low memory situations.
401         */
402        if (bucketdisable)
403                return (NULL);
404#endif /* __rtems__ */
405        /*
406         * To limit bucket recursion we store the original zone flags
407         * in a cookie passed via zalloc_arg/zfree_arg.  This allows the
408         * NOVM flag to persist even through deep recursions.  We also
409         * store ZFLAG_BUCKET once we have recursed attempting to allocate
410         * a bucket for a bucket zone so we do not allow infinite bucket
411         * recursion.  This cookie will even persist to frees of unused
412         * buckets via the allocation path or bucket allocations in the
413         * free path.
414         */
415        if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
416                udata = (void *)(uintptr_t)zone->uz_flags;
417        else {
418                if ((uintptr_t)udata & UMA_ZFLAG_BUCKET)
419                        return (NULL);
420                udata = (void *)((uintptr_t)udata | UMA_ZFLAG_BUCKET);
421        }
422        if ((uintptr_t)udata & UMA_ZFLAG_CACHEONLY)
423                flags |= M_NOVM;
424        ubz = bucket_zone_lookup(zone->uz_count);
425        if (ubz->ubz_zone == zone && (ubz + 1)->ubz_entries != 0)
426                ubz++;
427        bucket = uma_zalloc_arg(ubz->ubz_zone, udata, flags);
428        if (bucket) {
429#ifdef INVARIANTS
430                bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
431#endif
432                bucket->ub_cnt = 0;
433                bucket->ub_entries = ubz->ubz_entries;
434        }
435
436        return (bucket);
437}
438
439static void
440bucket_free(uma_zone_t zone, uma_bucket_t bucket, void *udata)
441{
442        struct uma_bucket_zone *ubz;
443
444        KASSERT(bucket->ub_cnt == 0,
445            ("bucket_free: Freeing a non free bucket."));
446        if ((zone->uz_flags & UMA_ZFLAG_BUCKET) == 0)
447                udata = (void *)(uintptr_t)zone->uz_flags;
448        ubz = bucket_zone_lookup(bucket->ub_entries);
449        uma_zfree_arg(ubz->ubz_zone, bucket, udata);
450}
451
452static void
453bucket_zone_drain(void)
454{
455        struct uma_bucket_zone *ubz;
456
457        for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
458                zone_drain(ubz->ubz_zone);
459}
460
461static void
462zone_log_warning(uma_zone_t zone)
463{
464        static const struct timeval warninterval = { 300, 0 };
465
466        if (!zone_warnings || zone->uz_warning == NULL)
467                return;
468
469        if (ratecheck(&zone->uz_ratecheck, &warninterval))
470                printf("[zone: %s] %s\n", zone->uz_name, zone->uz_warning);
471}
472
473static inline void
474zone_maxaction(uma_zone_t zone)
475{
476
477        if (zone->uz_maxaction.ta_func != NULL)
478                taskqueue_enqueue(taskqueue_thread, &zone->uz_maxaction);
479}
480
481static void
482zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
483{
484        uma_klink_t klink;
485
486        LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
487                kegfn(klink->kl_keg);
488}
489
490/*
491 * Routine called by timeout which is used to fire off some time interval
492 * based calculations.  (stats, hash size, etc.)
493 *
494 * Arguments:
495 *      arg   Unused
496 *
497 * Returns:
498 *      Nothing
499 */
500static void
501uma_timeout(void *unused)
502{
503        bucket_enable();
504        zone_foreach(zone_timeout);
505
506        /* Reschedule this event */
507        callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
508}
509
510/*
511 * Routine to perform timeout driven calculations.  This expands the
512 * hashes and does per cpu statistics aggregation.
513 *
514 *  Returns nothing.
515 */
516static void
517keg_timeout(uma_keg_t keg)
518{
519
520        KEG_LOCK(keg);
521        /*
522         * Expand the keg hash table.
523         *
524         * This is done if the number of slabs is larger than the hash size.
525         * What I'm trying to do here is completely reduce collisions.  This
526         * may be a little aggressive.  Should I allow for two collisions max?
527         */
528        if (keg->uk_flags & UMA_ZONE_HASH &&
529            keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
530                struct uma_hash newhash;
531                struct uma_hash oldhash;
532                int ret;
533
534                /*
535                 * This is so involved because allocating and freeing
536                 * while the keg lock is held will lead to deadlock.
537                 * I have to do everything in stages and check for
538                 * races.
539                 */
540                newhash = keg->uk_hash;
541                KEG_UNLOCK(keg);
542                ret = hash_alloc(&newhash);
543                KEG_LOCK(keg);
544                if (ret) {
545                        if (hash_expand(&keg->uk_hash, &newhash)) {
546                                oldhash = keg->uk_hash;
547                                keg->uk_hash = newhash;
548                        } else
549                                oldhash = newhash;
550
551                        KEG_UNLOCK(keg);
552                        hash_free(&oldhash);
553                        return;
554                }
555        }
556        KEG_UNLOCK(keg);
557}
558
559static void
560zone_timeout(uma_zone_t zone)
561{
562
563        zone_foreach_keg(zone, &keg_timeout);
564}
565
566/*
567 * Allocate and zero fill the next sized hash table from the appropriate
568 * backing store.
569 *
570 * Arguments:
571 *      hash  A new hash structure with the old hash size in uh_hashsize
572 *
573 * Returns:
574 *      1 on success and 0 on failure.
575 */
576static int
577hash_alloc(struct uma_hash *hash)
578{
579        int oldsize;
580        int alloc;
581
582        oldsize = hash->uh_hashsize;
583
584        /* We're just going to go to a power of two greater */
585        if (oldsize)  {
586                hash->uh_hashsize = oldsize * 2;
587                alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
588                hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
589                    M_UMAHASH, M_NOWAIT);
590        } else {
591                alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
592                hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
593                    M_WAITOK);
594                hash->uh_hashsize = UMA_HASH_SIZE_INIT;
595        }
596        if (hash->uh_slab_hash) {
597                bzero(hash->uh_slab_hash, alloc);
598                hash->uh_hashmask = hash->uh_hashsize - 1;
599                return (1);
600        }
601
602        return (0);
603}
604
605/*
606 * Expands the hash table for HASH zones.  This is done from zone_timeout
607 * to reduce collisions.  This must not be done in the regular allocation
608 * path, otherwise, we can recurse on the vm while allocating pages.
609 *
610 * Arguments:
611 *      oldhash  The hash you want to expand
612 *      newhash  The hash structure for the new table
613 *
614 * Returns:
615 *      Nothing
616 *
617 * Discussion:
618 */
619static int
620hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
621{
622        uma_slab_t slab;
623        int hval;
624        int i;
625
626        if (!newhash->uh_slab_hash)
627                return (0);
628
629        if (oldhash->uh_hashsize >= newhash->uh_hashsize)
630                return (0);
631
632        /*
633         * I need to investigate hash algorithms for resizing without a
634         * full rehash.
635         */
636
637        for (i = 0; i < oldhash->uh_hashsize; i++)
638                while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
639                        slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
640                        SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
641                        hval = UMA_HASH(newhash, slab->us_data);
642                        SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
643                            slab, us_hlink);
644                }
645
646        return (1);
647}
648
649/*
650 * Free the hash bucket to the appropriate backing store.
651 *
652 * Arguments:
653 *      slab_hash  The hash bucket we're freeing
654 *      hashsize   The number of entries in that hash bucket
655 *
656 * Returns:
657 *      Nothing
658 */
659static void
660hash_free(struct uma_hash *hash)
661{
662        if (hash->uh_slab_hash == NULL)
663                return;
664        if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
665                zone_free_item(hashzone, hash->uh_slab_hash, NULL, SKIP_NONE);
666        else
667                free(hash->uh_slab_hash, M_UMAHASH);
668}
669
670/*
671 * Frees all outstanding items in a bucket
672 *
673 * Arguments:
674 *      zone   The zone to free to, must be unlocked.
675 *      bucket The free/alloc bucket with items, cpu queue must be locked.
676 *
677 * Returns:
678 *      Nothing
679 */
680
681static void
682bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
683{
684        int i;
685
686        if (bucket == NULL)
687                return;
688
689        if (zone->uz_fini)
690                for (i = 0; i < bucket->ub_cnt; i++)
691                        zone->uz_fini(bucket->ub_bucket[i], zone->uz_size);
692        zone->uz_release(zone->uz_arg, bucket->ub_bucket, bucket->ub_cnt);
693        bucket->ub_cnt = 0;
694}
695
696/*
697 * Drains the per cpu caches for a zone.
698 *
699 * NOTE: This may only be called while the zone is being turn down, and not
700 * during normal operation.  This is necessary in order that we do not have
701 * to migrate CPUs to drain the per-CPU caches.
702 *
703 * Arguments:
704 *      zone     The zone to drain, must be unlocked.
705 *
706 * Returns:
707 *      Nothing
708 */
709static void
710cache_drain(uma_zone_t zone)
711{
712        uma_cache_t cache;
713        int cpu;
714
715        /*
716         * XXX: It is safe to not lock the per-CPU caches, because we're
717         * tearing down the zone anyway.  I.e., there will be no further use
718         * of the caches at this point.
719         *
720         * XXX: It would good to be able to assert that the zone is being
721         * torn down to prevent improper use of cache_drain().
722         *
723         * XXX: We lock the zone before passing into bucket_cache_drain() as
724         * it is used elsewhere.  Should the tear-down path be made special
725         * there in some form?
726         */
727        CPU_FOREACH(cpu) {
728                cache = &zone->uz_cpu[cpu];
729                bucket_drain(zone, cache->uc_allocbucket);
730                bucket_drain(zone, cache->uc_freebucket);
731                if (cache->uc_allocbucket != NULL)
732                        bucket_free(zone, cache->uc_allocbucket, NULL);
733                if (cache->uc_freebucket != NULL)
734                        bucket_free(zone, cache->uc_freebucket, NULL);
735                cache->uc_allocbucket = cache->uc_freebucket = NULL;
736        }
737        ZONE_LOCK(zone);
738        bucket_cache_drain(zone);
739        ZONE_UNLOCK(zone);
740}
741
742#ifndef __rtems__
743static void
744cache_shrink(uma_zone_t zone)
745{
746
747        if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
748                return;
749
750        ZONE_LOCK(zone);
751        zone->uz_count = (zone->uz_count_min + zone->uz_count) / 2;
752        ZONE_UNLOCK(zone);
753}
754
755static void
756cache_drain_safe_cpu(uma_zone_t zone)
757{
758        uma_cache_t cache;
759        uma_bucket_t b1, b2;
760
761        if (zone->uz_flags & UMA_ZFLAG_INTERNAL)
762                return;
763
764        b1 = b2 = NULL;
765        ZONE_LOCK(zone);
766        critical_enter();
767        cache = &zone->uz_cpu[curcpu];
768        if (cache->uc_allocbucket) {
769                if (cache->uc_allocbucket->ub_cnt != 0)
770                        LIST_INSERT_HEAD(&zone->uz_buckets,
771                            cache->uc_allocbucket, ub_link);
772                else
773                        b1 = cache->uc_allocbucket;
774                cache->uc_allocbucket = NULL;
775        }
776        if (cache->uc_freebucket) {
777                if (cache->uc_freebucket->ub_cnt != 0)
778                        LIST_INSERT_HEAD(&zone->uz_buckets,
779                            cache->uc_freebucket, ub_link);
780                else
781                        b2 = cache->uc_freebucket;
782                cache->uc_freebucket = NULL;
783        }
784        critical_exit();
785        ZONE_UNLOCK(zone);
786        if (b1)
787                bucket_free(zone, b1, NULL);
788        if (b2)
789                bucket_free(zone, b2, NULL);
790}
791
792/*
793 * Safely drain per-CPU caches of a zone(s) to alloc bucket.
794 * This is an expensive call because it needs to bind to all CPUs
795 * one by one and enter a critical section on each of them in order
796 * to safely access their cache buckets.
797 * Zone lock must not be held on call this function.
798 */
799static void
800cache_drain_safe(uma_zone_t zone)
801{
802        int cpu;
803
804        /*
805         * Polite bucket sizes shrinking was not enouth, shrink aggressively.
806         */
807        if (zone)
808                cache_shrink(zone);
809        else
810                zone_foreach(cache_shrink);
811
812        CPU_FOREACH(cpu) {
813                thread_lock(curthread);
814                sched_bind(curthread, cpu);
815                thread_unlock(curthread);
816
817                if (zone)
818                        cache_drain_safe_cpu(zone);
819                else
820                        zone_foreach(cache_drain_safe_cpu);
821        }
822        thread_lock(curthread);
823        sched_unbind(curthread);
824        thread_unlock(curthread);
825}
826#endif /* __rtems__ */
827
828/*
829 * Drain the cached buckets from a zone.  Expects a locked zone on entry.
830 */
831static void
832bucket_cache_drain(uma_zone_t zone)
833{
834        uma_bucket_t bucket;
835
836        /*
837         * Drain the bucket queues and free the buckets, we just keep two per
838         * cpu (alloc/free).
839         */
840        while ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
841                LIST_REMOVE(bucket, ub_link);
842                ZONE_UNLOCK(zone);
843                bucket_drain(zone, bucket);
844                bucket_free(zone, bucket, NULL);
845                ZONE_LOCK(zone);
846        }
847
848        /*
849         * Shrink further bucket sizes.  Price of single zone lock collision
850         * is probably lower then price of global cache drain.
851         */
852        if (zone->uz_count > zone->uz_count_min)
853                zone->uz_count--;
854}
855
856static void
857keg_free_slab(uma_keg_t keg, uma_slab_t slab, int start)
858{
859        uint8_t *mem;
860        int i;
861        uint8_t flags;
862
863        mem = slab->us_data;
864        flags = slab->us_flags;
865        i = start;
866        if (keg->uk_fini != NULL) {
867                for (i--; i > -1; i--)
868                        keg->uk_fini(slab->us_data + (keg->uk_rsize * i),
869                            keg->uk_size);
870        }
871        if (keg->uk_flags & UMA_ZONE_OFFPAGE)
872                zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
873#ifdef UMA_DEBUG
874        printf("%s: Returning %d bytes.\n", keg->uk_name,
875            PAGE_SIZE * keg->uk_ppera);
876#endif
877        keg->uk_freef(mem, PAGE_SIZE * keg->uk_ppera, flags);
878}
879
880/*
881 * Frees pages from a keg back to the system.  This is done on demand from
882 * the pageout daemon.
883 *
884 * Returns nothing.
885 */
886static void
887keg_drain(uma_keg_t keg)
888{
889        struct slabhead freeslabs = { 0 };
890        uma_slab_t slab, tmp;
891
892        /*
893         * We don't want to take pages from statically allocated kegs at this
894         * time
895         */
896        if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
897                return;
898
899#ifdef UMA_DEBUG
900        printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
901#endif
902        KEG_LOCK(keg);
903        if (keg->uk_free == 0)
904                goto finished;
905
906        LIST_FOREACH_SAFE(slab, &keg->uk_free_slab, us_link, tmp) {
907#ifndef __rtems__
908                /* We have nowhere to free these to. */
909                if (slab->us_flags & UMA_SLAB_BOOT)
910                        continue;
911#endif /* __rtems__ */
912
913                LIST_REMOVE(slab, us_link);
914                keg->uk_pages -= keg->uk_ppera;
915                keg->uk_free -= keg->uk_ipers;
916
917                if (keg->uk_flags & UMA_ZONE_HASH)
918                        UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
919
920                SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
921        }
922finished:
923        KEG_UNLOCK(keg);
924
925        while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
926                SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
927                keg_free_slab(keg, slab, keg->uk_ipers);
928        }
929}
930
931static void
932zone_drain_wait(uma_zone_t zone, int waitok)
933{
934
935        /*
936         * Set draining to interlock with zone_dtor() so we can release our
937         * locks as we go.  Only dtor() should do a WAITOK call since it
938         * is the only call that knows the structure will still be available
939         * when it wakes up.
940         */
941        ZONE_LOCK(zone);
942        while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
943                if (waitok == M_NOWAIT)
944                        goto out;
945                msleep(zone, zone->uz_lockptr, PVM, "zonedrain", 1);
946        }
947        zone->uz_flags |= UMA_ZFLAG_DRAINING;
948        bucket_cache_drain(zone);
949        ZONE_UNLOCK(zone);
950        /*
951         * The DRAINING flag protects us from being freed while
952         * we're running.  Normally the uma_rwlock would protect us but we
953         * must be able to release and acquire the right lock for each keg.
954         */
955        zone_foreach_keg(zone, &keg_drain);
956        ZONE_LOCK(zone);
957        zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
958        wakeup(zone);
959out:
960        ZONE_UNLOCK(zone);
961}
962
963void
964zone_drain(uma_zone_t zone)
965{
966
967        zone_drain_wait(zone, M_NOWAIT);
968}
969
970/*
971 * Allocate a new slab for a keg.  This does not insert the slab onto a list.
972 *
973 * Arguments:
974 *      wait  Shall we wait?
975 *
976 * Returns:
977 *      The slab that was allocated or NULL if there is no memory and the
978 *      caller specified M_NOWAIT.
979 */
980static uma_slab_t
981keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
982{
983        uma_alloc allocf;
984        uma_slab_t slab;
985        uint8_t *mem;
986        uint8_t flags;
987        int i;
988
989        mtx_assert(&keg->uk_lock, MA_OWNED);
990        slab = NULL;
991        mem = NULL;
992
993#ifdef UMA_DEBUG
994        printf("alloc_slab:  Allocating a new slab for %s\n", keg->uk_name);
995#endif
996        allocf = keg->uk_allocf;
997        KEG_UNLOCK(keg);
998
999        if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1000                slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
1001                if (slab == NULL)
1002                        goto out;
1003        }
1004
1005        /*
1006         * This reproduces the old vm_zone behavior of zero filling pages the
1007         * first time they are added to a zone.
1008         *
1009         * Malloced items are zeroed in uma_zalloc.
1010         */
1011
1012        if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
1013                wait |= M_ZERO;
1014        else
1015                wait &= ~M_ZERO;
1016
1017        if (keg->uk_flags & UMA_ZONE_NODUMP)
1018                wait |= M_NODUMP;
1019
1020        /* zone is passed for legacy reasons. */
1021        mem = allocf(zone, keg->uk_ppera * PAGE_SIZE, &flags, wait);
1022        if (mem == NULL) {
1023                if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1024                        zone_free_item(keg->uk_slabzone, slab, NULL, SKIP_NONE);
1025                slab = NULL;
1026                goto out;
1027        }
1028
1029        /* Point the slab into the allocated memory */
1030        if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
1031                slab = (uma_slab_t )(mem + keg->uk_pgoff);
1032
1033        if (keg->uk_flags & UMA_ZONE_VTOSLAB)
1034                for (i = 0; i < keg->uk_ppera; i++)
1035                        vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
1036
1037        slab->us_keg = keg;
1038        slab->us_data = mem;
1039        slab->us_freecount = keg->uk_ipers;
1040        slab->us_flags = flags;
1041        BIT_FILL(SLAB_SETSIZE, &slab->us_free);
1042#ifdef INVARIANTS
1043        BIT_ZERO(SLAB_SETSIZE, &slab->us_debugfree);
1044#endif
1045
1046        if (keg->uk_init != NULL) {
1047                for (i = 0; i < keg->uk_ipers; i++)
1048                        if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
1049                            keg->uk_size, wait) != 0)
1050                                break;
1051                if (i != keg->uk_ipers) {
1052                        keg_free_slab(keg, slab, i);
1053                        slab = NULL;
1054                        goto out;
1055                }
1056        }
1057out:
1058        KEG_LOCK(keg);
1059
1060        if (slab != NULL) {
1061                if (keg->uk_flags & UMA_ZONE_HASH)
1062                        UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
1063
1064                keg->uk_pages += keg->uk_ppera;
1065                keg->uk_free += keg->uk_ipers;
1066        }
1067
1068        return (slab);
1069}
1070
1071#ifndef __rtems__
1072/*
1073 * This function is intended to be used early on in place of page_alloc() so
1074 * that we may use the boot time page cache to satisfy allocations before
1075 * the VM is ready.
1076 */
1077static void *
1078startup_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
1079{
1080        uma_keg_t keg;
1081        uma_slab_t tmps;
1082        int pages, check_pages;
1083
1084        keg = zone_first_keg(zone);
1085        pages = howmany(bytes, PAGE_SIZE);
1086        check_pages = pages - 1;
1087        KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
1088
1089        /*
1090         * Check our small startup cache to see if it has pages remaining.
1091         */
1092        mtx_lock(&uma_boot_pages_mtx);
1093
1094        /* First check if we have enough room. */
1095        tmps = LIST_FIRST(&uma_boot_pages);
1096        while (tmps != NULL && check_pages-- > 0)
1097                tmps = LIST_NEXT(tmps, us_link);
1098        if (tmps != NULL) {
1099                /*
1100                 * It's ok to lose tmps references.  The last one will
1101                 * have tmps->us_data pointing to the start address of
1102                 * "pages" contiguous pages of memory.
1103                 */
1104                while (pages-- > 0) {
1105                        tmps = LIST_FIRST(&uma_boot_pages);
1106                        LIST_REMOVE(tmps, us_link);
1107                }
1108                mtx_unlock(&uma_boot_pages_mtx);
1109                *pflag = tmps->us_flags;
1110                return (tmps->us_data);
1111        }
1112        mtx_unlock(&uma_boot_pages_mtx);
1113        if (booted < UMA_STARTUP2)
1114                panic("UMA: Increase vm.boot_pages");
1115        /*
1116         * Now that we've booted reset these users to their real allocator.
1117         */
1118#ifdef UMA_MD_SMALL_ALLOC
1119        keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
1120#else
1121        keg->uk_allocf = page_alloc;
1122#endif
1123        return keg->uk_allocf(zone, bytes, pflag, wait);
1124}
1125#endif /* __rtems__ */
1126
1127/*
1128 * Allocates a number of pages from the system
1129 *
1130 * Arguments:
1131 *      bytes  The number of bytes requested
1132 *      wait  Shall we wait?
1133 *
1134 * Returns:
1135 *      A pointer to the alloced memory or possibly
1136 *      NULL if M_NOWAIT is set.
1137 */
1138static void *
1139page_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *pflag, int wait)
1140{
1141        void *p;        /* Returned page */
1142
1143#ifndef __rtems__
1144        *pflag = UMA_SLAB_KMEM;
1145        p = (void *) kmem_malloc(kmem_arena, bytes, wait);
1146#else /* __rtems__ */
1147        *pflag = 0;
1148        p = rtems_bsd_page_alloc(bytes, wait);
1149#endif /* __rtems__ */
1150
1151        return (p);
1152}
1153
1154#ifndef __rtems__
1155/*
1156 * Allocates a number of pages from within an object
1157 *
1158 * Arguments:
1159 *      bytes  The number of bytes requested
1160 *      wait   Shall we wait?
1161 *
1162 * Returns:
1163 *      A pointer to the alloced memory or possibly
1164 *      NULL if M_NOWAIT is set.
1165 */
1166static void *
1167noobj_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait)
1168{
1169        TAILQ_HEAD(, vm_page) alloctail;
1170        u_long npages;
1171        vm_offset_t retkva, zkva;
1172        vm_page_t p, p_next;
1173        uma_keg_t keg;
1174
1175        TAILQ_INIT(&alloctail);
1176        keg = zone_first_keg(zone);
1177
1178        npages = howmany(bytes, PAGE_SIZE);
1179        while (npages > 0) {
1180                p = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT |
1181                    VM_ALLOC_WIRED | VM_ALLOC_NOOBJ);
1182                if (p != NULL) {
1183                        /*
1184                         * Since the page does not belong to an object, its
1185                         * listq is unused.
1186                         */
1187                        TAILQ_INSERT_TAIL(&alloctail, p, listq);
1188                        npages--;
1189                        continue;
1190                }
1191                if (wait & M_WAITOK) {
1192                        VM_WAIT;
1193                        continue;
1194                }
1195
1196                /*
1197                 * Page allocation failed, free intermediate pages and
1198                 * exit.
1199                 */
1200                TAILQ_FOREACH_SAFE(p, &alloctail, listq, p_next) {
1201                        vm_page_unwire(p, PQ_NONE);
1202                        vm_page_free(p);
1203                }
1204                return (NULL);
1205        }
1206        *flags = UMA_SLAB_PRIV;
1207        zkva = keg->uk_kva +
1208            atomic_fetchadd_long(&keg->uk_offset, round_page(bytes));
1209        retkva = zkva;
1210        TAILQ_FOREACH(p, &alloctail, listq) {
1211                pmap_qenter(zkva, &p, 1);
1212                zkva += PAGE_SIZE;
1213        }
1214
1215        return ((void *)retkva);
1216}
1217#endif /* __rtems__ */
1218
1219/*
1220 * Frees a number of pages to the system
1221 *
1222 * Arguments:
1223 *      mem   A pointer to the memory to be freed
1224 *      size  The size of the memory being freed
1225 *      flags The original p->us_flags field
1226 *
1227 * Returns:
1228 *      Nothing
1229 */
1230static void
1231page_free(void *mem, vm_size_t size, uint8_t flags)
1232{
1233#ifndef __rtems__
1234        struct vmem *vmem;
1235
1236        if (flags & UMA_SLAB_KMEM)
1237                vmem = kmem_arena;
1238        else if (flags & UMA_SLAB_KERNEL)
1239                vmem = kernel_arena;
1240        else
1241                panic("UMA: page_free used with invalid flags %x", flags);
1242
1243        kmem_free(vmem, (vm_offset_t)mem, size);
1244#else /* __rtems__ */
1245        if (flags & UMA_SLAB_KERNEL)
1246                free(mem, M_TEMP);
1247        else
1248                rtems_bsd_page_free(mem);
1249#endif /* __rtems__ */
1250}
1251
1252/*
1253 * Zero fill initializer
1254 *
1255 * Arguments/Returns follow uma_init specifications
1256 */
1257static int
1258zero_init(void *mem, int size, int flags)
1259{
1260        bzero(mem, size);
1261        return (0);
1262}
1263
1264/*
1265 * Finish creating a small uma keg.  This calculates ipers, and the keg size.
1266 *
1267 * Arguments
1268 *      keg  The zone we should initialize
1269 *
1270 * Returns
1271 *      Nothing
1272 */
1273static void
1274keg_small_init(uma_keg_t keg)
1275{
1276        u_int rsize;
1277        u_int memused;
1278        u_int wastedspace;
1279        u_int shsize;
1280        u_int slabsize;
1281
1282        if (keg->uk_flags & UMA_ZONE_PCPU) {
1283                u_int ncpus = (mp_maxid + 1) ? (mp_maxid + 1) : MAXCPU;
1284
1285                slabsize = sizeof(struct pcpu);
1286                keg->uk_ppera = howmany(ncpus * sizeof(struct pcpu),
1287                    PAGE_SIZE);
1288        } else {
1289                slabsize = UMA_SLAB_SIZE;
1290                keg->uk_ppera = 1;
1291        }
1292
1293        /*
1294         * Calculate the size of each allocation (rsize) according to
1295         * alignment.  If the requested size is smaller than we have
1296         * allocation bits for we round it up.
1297         */
1298        rsize = keg->uk_size;
1299        if (rsize < slabsize / SLAB_SETSIZE)
1300                rsize = slabsize / SLAB_SETSIZE;
1301        if (rsize & keg->uk_align)
1302                rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1303        keg->uk_rsize = rsize;
1304
1305        KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0 ||
1306            keg->uk_rsize < sizeof(struct pcpu),
1307            ("%s: size %u too large", __func__, keg->uk_rsize));
1308
1309        if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1310                shsize = 0;
1311        else
1312                shsize = sizeof(struct uma_slab);
1313
1314        keg->uk_ipers = (slabsize - shsize) / rsize;
1315        KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1316            ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1317
1318        memused = keg->uk_ipers * rsize + shsize;
1319        wastedspace = slabsize - memused;
1320
1321        /*
1322         * We can't do OFFPAGE if we're internal or if we've been
1323         * asked to not go to the VM for buckets.  If we do this we
1324         * may end up going to the VM  for slabs which we do not
1325         * want to do if we're UMA_ZFLAG_CACHEONLY as a result
1326         * of UMA_ZONE_VM, which clearly forbids it.
1327         */
1328        if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1329            (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1330                return;
1331
1332        /*
1333         * See if using an OFFPAGE slab will limit our waste.  Only do
1334         * this if it permits more items per-slab.
1335         *
1336         * XXX We could try growing slabsize to limit max waste as well.
1337         * Historically this was not done because the VM could not
1338         * efficiently handle contiguous allocations.
1339         */
1340        if ((wastedspace >= slabsize / UMA_MAX_WASTE) &&
1341            (keg->uk_ipers < (slabsize / keg->uk_rsize))) {
1342                keg->uk_ipers = slabsize / keg->uk_rsize;
1343                KASSERT(keg->uk_ipers > 0 && keg->uk_ipers <= SLAB_SETSIZE,
1344                    ("%s: keg->uk_ipers %u", __func__, keg->uk_ipers));
1345#ifdef UMA_DEBUG
1346                printf("UMA decided we need offpage slab headers for "
1347                    "keg: %s, calculated wastedspace = %d, "
1348                    "maximum wasted space allowed = %d, "
1349                    "calculated ipers = %d, "
1350                    "new wasted space = %d\n", keg->uk_name, wastedspace,
1351                    slabsize / UMA_MAX_WASTE, keg->uk_ipers,
1352                    slabsize - keg->uk_ipers * keg->uk_rsize);
1353#endif
1354                keg->uk_flags |= UMA_ZONE_OFFPAGE;
1355        }
1356
1357        if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1358            (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1359                keg->uk_flags |= UMA_ZONE_HASH;
1360}
1361
1362/*
1363 * Finish creating a large (> UMA_SLAB_SIZE) uma kegs.  Just give in and do
1364 * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
1365 * more complicated.
1366 *
1367 * Arguments
1368 *      keg  The keg we should initialize
1369 *
1370 * Returns
1371 *      Nothing
1372 */
1373static void
1374keg_large_init(uma_keg_t keg)
1375{
1376        u_int shsize;
1377
1378        KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
1379        KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1380            ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
1381        KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1382            ("%s: Cannot large-init a UMA_ZONE_PCPU keg", __func__));
1383
1384        keg->uk_ppera = howmany(keg->uk_size, PAGE_SIZE);
1385        keg->uk_ipers = 1;
1386        keg->uk_rsize = keg->uk_size;
1387
1388        /* We can't do OFFPAGE if we're internal, bail out here. */
1389        if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
1390                return;
1391
1392        /* Check whether we have enough space to not do OFFPAGE. */
1393        if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0) {
1394                shsize = sizeof(struct uma_slab);
1395                if (shsize & UMA_ALIGN_PTR)
1396                        shsize = (shsize & ~UMA_ALIGN_PTR) +
1397                            (UMA_ALIGN_PTR + 1);
1398
1399                if ((PAGE_SIZE * keg->uk_ppera) - keg->uk_rsize < shsize)
1400                        keg->uk_flags |= UMA_ZONE_OFFPAGE;
1401        }
1402
1403        if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
1404            (keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1405                keg->uk_flags |= UMA_ZONE_HASH;
1406}
1407
1408static void
1409keg_cachespread_init(uma_keg_t keg)
1410{
1411        int alignsize;
1412        int trailer;
1413        int pages;
1414        int rsize;
1415
1416        KASSERT((keg->uk_flags & UMA_ZONE_PCPU) == 0,
1417            ("%s: Cannot cachespread-init a UMA_ZONE_PCPU keg", __func__));
1418
1419        alignsize = keg->uk_align + 1;
1420        rsize = keg->uk_size;
1421        /*
1422         * We want one item to start on every align boundary in a page.  To
1423         * do this we will span pages.  We will also extend the item by the
1424         * size of align if it is an even multiple of align.  Otherwise, it
1425         * would fall on the same boundary every time.
1426         */
1427        if (rsize & keg->uk_align)
1428                rsize = (rsize & ~keg->uk_align) + alignsize;
1429        if ((rsize & alignsize) == 0)
1430                rsize += alignsize;
1431        trailer = rsize - keg->uk_size;
1432        pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
1433        pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
1434        keg->uk_rsize = rsize;
1435        keg->uk_ppera = pages;
1436        keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
1437        keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
1438        KASSERT(keg->uk_ipers <= SLAB_SETSIZE,
1439            ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
1440            keg->uk_ipers));
1441}
1442
1443/*
1444 * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
1445 * the keg onto the global keg list.
1446 *
1447 * Arguments/Returns follow uma_ctor specifications
1448 *      udata  Actually uma_kctor_args
1449 */
1450static int
1451keg_ctor(void *mem, int size, void *udata, int flags)
1452{
1453        struct uma_kctor_args *arg = udata;
1454        uma_keg_t keg = mem;
1455        uma_zone_t zone;
1456
1457        bzero(keg, size);
1458        keg->uk_size = arg->size;
1459        keg->uk_init = arg->uminit;
1460        keg->uk_fini = arg->fini;
1461        keg->uk_align = arg->align;
1462        keg->uk_free = 0;
1463        keg->uk_reserve = 0;
1464        keg->uk_pages = 0;
1465        keg->uk_flags = arg->flags;
1466        keg->uk_allocf = page_alloc;
1467        keg->uk_freef = page_free;
1468        keg->uk_slabzone = NULL;
1469
1470        /*
1471         * The master zone is passed to us at keg-creation time.
1472         */
1473        zone = arg->zone;
1474        keg->uk_name = zone->uz_name;
1475
1476        if (arg->flags & UMA_ZONE_VM)
1477                keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1478
1479        if (arg->flags & UMA_ZONE_ZINIT)
1480                keg->uk_init = zero_init;
1481
1482        if (arg->flags & UMA_ZONE_MALLOC)
1483                keg->uk_flags |= UMA_ZONE_VTOSLAB;
1484
1485        if (arg->flags & UMA_ZONE_PCPU)
1486#ifdef SMP
1487                keg->uk_flags |= UMA_ZONE_OFFPAGE;
1488#else
1489                keg->uk_flags &= ~UMA_ZONE_PCPU;
1490#endif
1491
1492        if (keg->uk_flags & UMA_ZONE_CACHESPREAD) {
1493                keg_cachespread_init(keg);
1494        } else {
1495                if (keg->uk_size > (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1496                        keg_large_init(keg);
1497                else
1498                        keg_small_init(keg);
1499        }
1500
1501        if (keg->uk_flags & UMA_ZONE_OFFPAGE)
1502                keg->uk_slabzone = slabzone;
1503
1504        /*
1505         * If we haven't booted yet we need allocations to go through the
1506         * startup cache until the vm is ready.
1507         */
1508        if (keg->uk_ppera == 1) {
1509#ifdef UMA_MD_SMALL_ALLOC
1510                keg->uk_allocf = uma_small_alloc;
1511                keg->uk_freef = uma_small_free;
1512
1513#ifndef __rtems__
1514                if (booted < UMA_STARTUP)
1515                        keg->uk_allocf = startup_alloc;
1516#endif /* __rtems__ */
1517#else
1518#ifndef __rtems__
1519                if (booted < UMA_STARTUP2)
1520                        keg->uk_allocf = startup_alloc;
1521#endif /* __rtems__ */
1522#endif
1523#ifndef __rtems__
1524        } else if (booted < UMA_STARTUP2 &&
1525            (keg->uk_flags & UMA_ZFLAG_INTERNAL))
1526                keg->uk_allocf = startup_alloc;
1527#else /* __rtems__ */
1528        }
1529#endif /* __rtems__ */
1530
1531        /*
1532         * Initialize keg's lock
1533         */
1534        KEG_LOCK_INIT(keg, (arg->flags & UMA_ZONE_MTXCLASS));
1535
1536        /*
1537         * If we're putting the slab header in the actual page we need to
1538         * figure out where in each page it goes.  This calculates a right
1539         * justified offset into the memory on an ALIGN_PTR boundary.
1540         */
1541        if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1542                u_int totsize;
1543
1544                /* Size of the slab struct and free list */
1545                totsize = sizeof(struct uma_slab);
1546
1547                if (totsize & UMA_ALIGN_PTR)
1548                        totsize = (totsize & ~UMA_ALIGN_PTR) +
1549                            (UMA_ALIGN_PTR + 1);
1550                keg->uk_pgoff = (PAGE_SIZE * keg->uk_ppera) - totsize;
1551
1552                /*
1553                 * The only way the following is possible is if with our
1554                 * UMA_ALIGN_PTR adjustments we are now bigger than
1555                 * UMA_SLAB_SIZE.  I haven't checked whether this is
1556                 * mathematically possible for all cases, so we make
1557                 * sure here anyway.
1558                 */
1559                totsize = keg->uk_pgoff + sizeof(struct uma_slab);
1560                if (totsize > PAGE_SIZE * keg->uk_ppera) {
1561                        printf("zone %s ipers %d rsize %d size %d\n",
1562                            zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1563                            keg->uk_size);
1564                        panic("UMA slab won't fit.");
1565                }
1566        }
1567
1568        if (keg->uk_flags & UMA_ZONE_HASH)
1569                hash_alloc(&keg->uk_hash);
1570
1571#ifdef UMA_DEBUG
1572        printf("UMA: %s(%p) size %d(%d) flags %#x ipers %d ppera %d out %d free %d\n",
1573            zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
1574            keg->uk_ipers, keg->uk_ppera,
1575            (keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
1576            keg->uk_free);
1577#endif
1578
1579        LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1580
1581        rw_wlock(&uma_rwlock);
1582        LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1583        rw_wunlock(&uma_rwlock);
1584        return (0);
1585}
1586
1587/*
1588 * Zone header ctor.  This initializes all fields, locks, etc.
1589 *
1590 * Arguments/Returns follow uma_ctor specifications
1591 *      udata  Actually uma_zctor_args
1592 */
1593static int
1594zone_ctor(void *mem, int size, void *udata, int flags)
1595{
1596        struct uma_zctor_args *arg = udata;
1597        uma_zone_t zone = mem;
1598        uma_zone_t z;
1599        uma_keg_t keg;
1600
1601        bzero(zone, size);
1602        zone->uz_name = arg->name;
1603        zone->uz_ctor = arg->ctor;
1604        zone->uz_dtor = arg->dtor;
1605        zone->uz_slab = zone_fetch_slab;
1606        zone->uz_init = NULL;
1607        zone->uz_fini = NULL;
1608        zone->uz_allocs = 0;
1609        zone->uz_frees = 0;
1610        zone->uz_fails = 0;
1611        zone->uz_sleeps = 0;
1612        zone->uz_count = 0;
1613        zone->uz_count_min = 0;
1614        zone->uz_flags = 0;
1615        zone->uz_warning = NULL;
1616        timevalclear(&zone->uz_ratecheck);
1617        keg = arg->keg;
1618
1619        ZONE_LOCK_INIT(zone, (arg->flags & UMA_ZONE_MTXCLASS));
1620
1621        /*
1622         * This is a pure cache zone, no kegs.
1623         */
1624        if (arg->import) {
1625                if (arg->flags & UMA_ZONE_VM)
1626                        arg->flags |= UMA_ZFLAG_CACHEONLY;
1627                zone->uz_flags = arg->flags;
1628                zone->uz_size = arg->size;
1629                zone->uz_import = arg->import;
1630                zone->uz_release = arg->release;
1631                zone->uz_arg = arg->arg;
1632                zone->uz_lockptr = &zone->uz_lock;
1633                rw_wlock(&uma_rwlock);
1634                LIST_INSERT_HEAD(&uma_cachezones, zone, uz_link);
1635                rw_wunlock(&uma_rwlock);
1636                goto out;
1637        }
1638
1639        /*
1640         * Use the regular zone/keg/slab allocator.
1641         */
1642        zone->uz_import = (uma_import)zone_import;
1643        zone->uz_release = (uma_release)zone_release;
1644        zone->uz_arg = zone;
1645
1646        if (arg->flags & UMA_ZONE_SECONDARY) {
1647                KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1648                zone->uz_init = arg->uminit;
1649                zone->uz_fini = arg->fini;
1650                zone->uz_lockptr = &keg->uk_lock;
1651                zone->uz_flags |= UMA_ZONE_SECONDARY;
1652                rw_wlock(&uma_rwlock);
1653                ZONE_LOCK(zone);
1654                LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1655                        if (LIST_NEXT(z, uz_link) == NULL) {
1656                                LIST_INSERT_AFTER(z, zone, uz_link);
1657                                break;
1658                        }
1659                }
1660                ZONE_UNLOCK(zone);
1661                rw_wunlock(&uma_rwlock);
1662        } else if (keg == NULL) {
1663                if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1664                    arg->align, arg->flags)) == NULL)
1665                        return (ENOMEM);
1666        } else {
1667                struct uma_kctor_args karg;
1668                int error;
1669
1670                /* We should only be here from uma_startup() */
1671                karg.size = arg->size;
1672                karg.uminit = arg->uminit;
1673                karg.fini = arg->fini;
1674                karg.align = arg->align;
1675                karg.flags = arg->flags;
1676                karg.zone = zone;
1677                error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1678                    flags);
1679                if (error)
1680                        return (error);
1681        }
1682
1683        /*
1684         * Link in the first keg.
1685         */
1686        zone->uz_klink.kl_keg = keg;
1687        LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
1688        zone->uz_lockptr = &keg->uk_lock;
1689        zone->uz_size = keg->uk_size;
1690        zone->uz_flags |= (keg->uk_flags &
1691            (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
1692
1693        /*
1694         * Some internal zones don't have room allocated for the per cpu
1695         * caches.  If we're internal, bail out here.
1696         */
1697        if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1698                KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
1699                    ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1700                return (0);
1701        }
1702
1703out:
1704        if ((arg->flags & UMA_ZONE_MAXBUCKET) == 0)
1705                zone->uz_count = bucket_select(zone->uz_size);
1706        else
1707                zone->uz_count = BUCKET_MAX;
1708        zone->uz_count_min = zone->uz_count;
1709
1710        return (0);
1711}
1712
1713/*
1714 * Keg header dtor.  This frees all data, destroys locks, frees the hash
1715 * table and removes the keg from the global list.
1716 *
1717 * Arguments/Returns follow uma_dtor specifications
1718 *      udata  unused
1719 */
1720static void
1721keg_dtor(void *arg, int size, void *udata)
1722{
1723        uma_keg_t keg;
1724
1725        keg = (uma_keg_t)arg;
1726        KEG_LOCK(keg);
1727        if (keg->uk_free != 0) {
1728                printf("Freed UMA keg (%s) was not empty (%d items). "
1729                    " Lost %d pages of memory.\n",
1730                    keg->uk_name ? keg->uk_name : "",
1731                    keg->uk_free, keg->uk_pages);
1732        }
1733        KEG_UNLOCK(keg);
1734
1735        hash_free(&keg->uk_hash);
1736
1737        KEG_LOCK_FINI(keg);
1738}
1739
1740/*
1741 * Zone header dtor.
1742 *
1743 * Arguments/Returns follow uma_dtor specifications
1744 *      udata  unused
1745 */
1746static void
1747zone_dtor(void *arg, int size, void *udata)
1748{
1749        uma_klink_t klink;
1750        uma_zone_t zone;
1751        uma_keg_t keg;
1752
1753        zone = (uma_zone_t)arg;
1754        keg = zone_first_keg(zone);
1755
1756        if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
1757                cache_drain(zone);
1758
1759        rw_wlock(&uma_rwlock);
1760        LIST_REMOVE(zone, uz_link);
1761        rw_wunlock(&uma_rwlock);
1762        /*
1763         * XXX there are some races here where
1764         * the zone can be drained but zone lock
1765         * released and then refilled before we
1766         * remove it... we dont care for now
1767         */
1768        zone_drain_wait(zone, M_WAITOK);
1769        /*
1770         * Unlink all of our kegs.
1771         */
1772        while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
1773                klink->kl_keg = NULL;
1774                LIST_REMOVE(klink, kl_link);
1775                if (klink == &zone->uz_klink)
1776                        continue;
1777                free(klink, M_TEMP);
1778        }
1779        /*
1780         * We only destroy kegs from non secondary zones.
1781         */
1782        if (keg != NULL && (zone->uz_flags & UMA_ZONE_SECONDARY) == 0)  {
1783                rw_wlock(&uma_rwlock);
1784                LIST_REMOVE(keg, uk_link);
1785                rw_wunlock(&uma_rwlock);
1786                zone_free_item(kegs, keg, NULL, SKIP_NONE);
1787        }
1788        ZONE_LOCK_FINI(zone);
1789}
1790
1791/*
1792 * Traverses every zone in the system and calls a callback
1793 *
1794 * Arguments:
1795 *      zfunc  A pointer to a function which accepts a zone
1796 *              as an argument.
1797 *
1798 * Returns:
1799 *      Nothing
1800 */
1801static void
1802zone_foreach(void (*zfunc)(uma_zone_t))
1803{
1804        uma_keg_t keg;
1805        uma_zone_t zone;
1806
1807        rw_rlock(&uma_rwlock);
1808        LIST_FOREACH(keg, &uma_kegs, uk_link) {
1809                LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1810                        zfunc(zone);
1811        }
1812        rw_runlock(&uma_rwlock);
1813}
1814
1815/* Public functions */
1816/* See uma.h */
1817void
1818uma_startup(void *bootmem, int boot_pages)
1819{
1820        struct uma_zctor_args args;
1821#ifndef __rtems__
1822        uma_slab_t slab;
1823        int i;
1824#endif /* __rtems__ */
1825
1826#ifdef UMA_DEBUG
1827        printf("Creating uma keg headers zone and keg.\n");
1828#endif
1829        rw_init(&uma_rwlock, "UMA lock");
1830
1831        /* "manually" create the initial zone */
1832        memset(&args, 0, sizeof(args));
1833        args.name = "UMA Kegs";
1834        args.size = sizeof(struct uma_keg);
1835        args.ctor = keg_ctor;
1836        args.dtor = keg_dtor;
1837        args.uminit = zero_init;
1838        args.fini = NULL;
1839        args.keg = &masterkeg;
1840        args.align = 32 - 1;
1841        args.flags = UMA_ZFLAG_INTERNAL;
1842        /* The initial zone has no Per cpu queues so it's smaller */
1843        zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1844
1845#ifndef __rtems__
1846#ifdef UMA_DEBUG
1847        printf("Filling boot free list.\n");
1848#endif
1849        for (i = 0; i < boot_pages; i++) {
1850                slab = (uma_slab_t)((uint8_t *)bootmem + (i * UMA_SLAB_SIZE));
1851                slab->us_data = (uint8_t *)slab;
1852                slab->us_flags = UMA_SLAB_BOOT;
1853                LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1854        }
1855        mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
1856#endif /* __rtems__ */
1857
1858#ifdef UMA_DEBUG
1859        printf("Creating uma zone headers zone and keg.\n");
1860#endif
1861        args.name = "UMA Zones";
1862        args.size = sizeof(struct uma_zone) +
1863            (sizeof(struct uma_cache) * (mp_maxid + 1));
1864        args.ctor = zone_ctor;
1865        args.dtor = zone_dtor;
1866        args.uminit = zero_init;
1867        args.fini = NULL;
1868        args.keg = NULL;
1869        args.align = 32 - 1;
1870        args.flags = UMA_ZFLAG_INTERNAL;
1871        /* The initial zone has no Per cpu queues so it's smaller */
1872        zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1873
1874#ifdef UMA_DEBUG
1875        printf("Creating slab and hash zones.\n");
1876#endif
1877
1878        /* Now make a zone for slab headers */
1879        slabzone = uma_zcreate("UMA Slabs",
1880                                sizeof(struct uma_slab),
1881                                NULL, NULL, NULL, NULL,
1882                                UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1883
1884        hashzone = uma_zcreate("UMA Hash",
1885            sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1886            NULL, NULL, NULL, NULL,
1887            UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1888
1889        bucket_init();
1890
1891#ifndef __rtems__
1892        booted = UMA_STARTUP;
1893#endif /* __rtems__ */
1894
1895#ifdef UMA_DEBUG
1896        printf("UMA startup complete.\n");
1897#endif
1898}
1899#ifdef __rtems__
1900static void
1901rtems_bsd_uma_startup(void *unused)
1902{
1903        (void) unused;
1904
1905        sx_init_flags(&uma_drain_lock, "umadrain", SX_RECURSE);
1906        uma_startup(NULL, 0);
1907}
1908
1909SYSINIT(rtems_bsd_uma_startup, SI_SUB_VM, SI_ORDER_SECOND,
1910    rtems_bsd_uma_startup, NULL);
1911#endif /* __rtems__ */
1912
1913#ifndef __rtems__
1914/* see uma.h */
1915void
1916uma_startup2(void)
1917{
1918        booted = UMA_STARTUP2;
1919        bucket_enable();
1920        sx_init(&uma_drain_lock, "umadrain");
1921#ifdef UMA_DEBUG
1922        printf("UMA startup2 complete.\n");
1923#endif
1924}
1925#endif /* __rtems__ */
1926
1927/*
1928 * Initialize our callout handle
1929 *
1930 */
1931
1932static void
1933uma_startup3(void)
1934{
1935#ifdef UMA_DEBUG
1936        printf("Starting callout.\n");
1937#endif
1938        callout_init(&uma_callout, 1);
1939        callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1940#ifdef UMA_DEBUG
1941        printf("UMA startup3 complete.\n");
1942#endif
1943}
1944
1945static uma_keg_t
1946uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1947                int align, uint32_t flags)
1948{
1949        struct uma_kctor_args args;
1950
1951        args.size = size;
1952        args.uminit = uminit;
1953        args.fini = fini;
1954        args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
1955        args.flags = flags;
1956        args.zone = zone;
1957        return (zone_alloc_item(kegs, &args, M_WAITOK));
1958}
1959
1960/* See uma.h */
1961void
1962uma_set_align(int align)
1963{
1964
1965        if (align != UMA_ALIGN_CACHE)
1966                uma_align_cache = align;
1967}
1968
1969/* See uma.h */
1970uma_zone_t
1971uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1972                uma_init uminit, uma_fini fini, int align, uint32_t flags)
1973
1974{
1975        struct uma_zctor_args args;
1976        uma_zone_t res;
1977#ifndef __rtems__
1978        bool locked;
1979#endif /* __rtems__ */
1980
1981        /* This stuff is essential for the zone ctor */
1982        memset(&args, 0, sizeof(args));
1983        args.name = name;
1984        args.size = size;
1985        args.ctor = ctor;
1986        args.dtor = dtor;
1987        args.uminit = uminit;
1988        args.fini = fini;
1989#ifdef  INVARIANTS
1990        /*
1991         * If a zone is being created with an empty constructor and
1992         * destructor, pass UMA constructor/destructor which checks for
1993         * memory use after free.
1994         */
1995        if ((!(flags & (UMA_ZONE_ZINIT | UMA_ZONE_NOFREE))) &&
1996            ctor == NULL && dtor == NULL && uminit == NULL && fini == NULL) {
1997                args.ctor = trash_ctor;
1998                args.dtor = trash_dtor;
1999                args.uminit = trash_init;
2000                args.fini = trash_fini;
2001        }
2002#endif
2003        args.align = align;
2004        args.flags = flags;
2005        args.keg = NULL;
2006
2007#ifndef __rtems__
2008        if (booted < UMA_STARTUP2) {
2009                locked = false;
2010        } else {
2011#endif /* __rtems__ */
2012                sx_slock(&uma_drain_lock);
2013#ifndef __rtems__
2014                locked = true;
2015        }
2016#endif /* __rtems__ */
2017        res = zone_alloc_item(zones, &args, M_WAITOK);
2018#ifndef __rtems__
2019        if (locked)
2020#endif /* __rtems__ */
2021                sx_sunlock(&uma_drain_lock);
2022        return (res);
2023}
2024
2025/* See uma.h */
2026uma_zone_t
2027uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
2028                    uma_init zinit, uma_fini zfini, uma_zone_t master)
2029{
2030        struct uma_zctor_args args;
2031        uma_keg_t keg;
2032        uma_zone_t res;
2033#ifndef __rtems__
2034        bool locked;
2035#endif /* __rtems__ */
2036
2037        keg = zone_first_keg(master);
2038        memset(&args, 0, sizeof(args));
2039        args.name = name;
2040        args.size = keg->uk_size;
2041        args.ctor = ctor;
2042        args.dtor = dtor;
2043        args.uminit = zinit;
2044        args.fini = zfini;
2045        args.align = keg->uk_align;
2046        args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
2047        args.keg = keg;
2048
2049#ifndef __rtems__
2050        if (booted < UMA_STARTUP2) {
2051                locked = false;
2052        } else {
2053#endif /* __rtems__ */
2054                sx_slock(&uma_drain_lock);
2055#ifndef __rtems__
2056                locked = true;
2057        }
2058#endif /* __rtems__ */
2059        /* XXX Attaches only one keg of potentially many. */
2060        res = zone_alloc_item(zones, &args, M_WAITOK);
2061#ifndef __rtems__
2062        if (locked)
2063#endif /* __rtems__ */
2064                sx_sunlock(&uma_drain_lock);
2065        return (res);
2066}
2067
2068/* See uma.h */
2069uma_zone_t
2070uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
2071                    uma_init zinit, uma_fini zfini, uma_import zimport,
2072                    uma_release zrelease, void *arg, int flags)
2073{
2074        struct uma_zctor_args args;
2075
2076        memset(&args, 0, sizeof(args));
2077        args.name = name;
2078        args.size = size;
2079        args.ctor = ctor;
2080        args.dtor = dtor;
2081        args.uminit = zinit;
2082        args.fini = zfini;
2083        args.import = zimport;
2084        args.release = zrelease;
2085        args.arg = arg;
2086        args.align = 0;
2087        args.flags = flags;
2088
2089        return (zone_alloc_item(zones, &args, M_WAITOK));
2090}
2091
2092#ifndef __rtems__
2093static void
2094zone_lock_pair(uma_zone_t a, uma_zone_t b)
2095{
2096        if (a < b) {
2097                ZONE_LOCK(a);
2098                mtx_lock_flags(b->uz_lockptr, MTX_DUPOK);
2099        } else {
2100                ZONE_LOCK(b);
2101                mtx_lock_flags(a->uz_lockptr, MTX_DUPOK);
2102        }
2103}
2104
2105static void
2106zone_unlock_pair(uma_zone_t a, uma_zone_t b)
2107{
2108
2109        ZONE_UNLOCK(a);
2110        ZONE_UNLOCK(b);
2111}
2112
2113int
2114uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
2115{
2116        uma_klink_t klink;
2117        uma_klink_t kl;
2118        int error;
2119
2120        error = 0;
2121        klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK | M_ZERO);
2122
2123        zone_lock_pair(zone, master);
2124        /*
2125         * zone must use vtoslab() to resolve objects and must already be
2126         * a secondary.
2127         */
2128        if ((zone->uz_flags & (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY))
2129            != (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY)) {
2130                error = EINVAL;
2131                goto out;
2132        }
2133        /*
2134         * The new master must also use vtoslab().
2135         */
2136        if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
2137                error = EINVAL;
2138                goto out;
2139        }
2140
2141        /*
2142         * The underlying object must be the same size.  rsize
2143         * may be different.
2144         */
2145        if (master->uz_size != zone->uz_size) {
2146                error = E2BIG;
2147                goto out;
2148        }
2149        /*
2150         * Put it at the end of the list.
2151         */
2152        klink->kl_keg = zone_first_keg(master);
2153        LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
2154                if (LIST_NEXT(kl, kl_link) == NULL) {
2155                        LIST_INSERT_AFTER(kl, klink, kl_link);
2156                        break;
2157                }
2158        }
2159        klink = NULL;
2160        zone->uz_flags |= UMA_ZFLAG_MULTI;
2161        zone->uz_slab = zone_fetch_slab_multi;
2162
2163out:
2164        zone_unlock_pair(zone, master);
2165        if (klink != NULL)
2166                free(klink, M_TEMP);
2167
2168        return (error);
2169}
2170#endif /* __rtems__ */
2171
2172
2173/* See uma.h */
2174void
2175uma_zdestroy(uma_zone_t zone)
2176{
2177
2178        sx_slock(&uma_drain_lock);
2179        zone_free_item(zones, zone, NULL, SKIP_NONE);
2180        sx_sunlock(&uma_drain_lock);
2181}
2182
2183/* See uma.h */
2184void *
2185uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
2186{
2187        void *item;
2188        uma_cache_t cache;
2189        uma_bucket_t bucket;
2190        int lockfail;
2191        int cpu;
2192
2193        /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
2194        random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
2195
2196        /* This is the fast path allocation */
2197#ifdef UMA_DEBUG_ALLOC_1
2198        printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
2199#endif
2200        CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
2201            zone->uz_name, flags);
2202
2203        if (flags & M_WAITOK) {
2204                WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
2205                    "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
2206        }
2207#ifndef __rtems__
2208        KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
2209            ("uma_zalloc_arg: called with spinlock or critical section held"));
2210#endif /* __rtems__ */
2211
2212#ifdef DEBUG_MEMGUARD
2213        if (memguard_cmp_zone(zone)) {
2214                item = memguard_alloc(zone->uz_size, flags);
2215                if (item != NULL) {
2216                        if (zone->uz_init != NULL &&
2217                            zone->uz_init(item, zone->uz_size, flags) != 0)
2218                                return (NULL);
2219                        if (zone->uz_ctor != NULL &&
2220                            zone->uz_ctor(item, zone->uz_size, udata,
2221                            flags) != 0) {
2222                                zone->uz_fini(item, zone->uz_size);
2223                                return (NULL);
2224                        }
2225                        return (item);
2226                }
2227                /* This is unfortunate but should not be fatal. */
2228        }
2229#endif
2230        /*
2231         * If possible, allocate from the per-CPU cache.  There are two
2232         * requirements for safe access to the per-CPU cache: (1) the thread
2233         * accessing the cache must not be preempted or yield during access,
2234         * and (2) the thread must not migrate CPUs without switching which
2235         * cache it accesses.  We rely on a critical section to prevent
2236         * preemption and migration.  We release the critical section in
2237         * order to acquire the zone mutex if we are unable to allocate from
2238         * the current cache; when we re-acquire the critical section, we
2239         * must detect and handle migration if it has occurred.
2240         */
2241        critical_enter();
2242        cpu = curcpu;
2243        cache = &zone->uz_cpu[cpu];
2244
2245zalloc_start:
2246        bucket = cache->uc_allocbucket;
2247        if (bucket != NULL && bucket->ub_cnt > 0) {
2248                bucket->ub_cnt--;
2249                item = bucket->ub_bucket[bucket->ub_cnt];
2250#ifdef INVARIANTS
2251                bucket->ub_bucket[bucket->ub_cnt] = NULL;
2252#endif
2253                KASSERT(item != NULL, ("uma_zalloc: Bucket pointer mangled."));
2254                cache->uc_allocs++;
2255                critical_exit();
2256                if (zone->uz_ctor != NULL &&
2257                    zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2258                        atomic_add_long(&zone->uz_fails, 1);
2259                        zone_free_item(zone, item, udata, SKIP_DTOR);
2260                        return (NULL);
2261                }
2262#ifdef INVARIANTS
2263                uma_dbg_alloc(zone, NULL, item);
2264#endif
2265                if (flags & M_ZERO)
2266                        uma_zero_item(item, zone);
2267                return (item);
2268        }
2269
2270        /*
2271         * We have run out of items in our alloc bucket.
2272         * See if we can switch with our free bucket.
2273         */
2274        bucket = cache->uc_freebucket;
2275        if (bucket != NULL && bucket->ub_cnt > 0) {
2276#ifdef UMA_DEBUG_ALLOC
2277                printf("uma_zalloc: Swapping empty with alloc.\n");
2278#endif
2279                cache->uc_freebucket = cache->uc_allocbucket;
2280                cache->uc_allocbucket = bucket;
2281                goto zalloc_start;
2282        }
2283
2284        /*
2285         * Discard any empty allocation bucket while we hold no locks.
2286         */
2287        bucket = cache->uc_allocbucket;
2288        cache->uc_allocbucket = NULL;
2289        critical_exit();
2290        if (bucket != NULL)
2291                bucket_free(zone, bucket, udata);
2292
2293        /* Short-circuit for zones without buckets and low memory. */
2294        if (zone->uz_count == 0 || bucketdisable)
2295                goto zalloc_item;
2296
2297        /*
2298         * Attempt to retrieve the item from the per-CPU cache has failed, so
2299         * we must go back to the zone.  This requires the zone lock, so we
2300         * must drop the critical section, then re-acquire it when we go back
2301         * to the cache.  Since the critical section is released, we may be
2302         * preempted or migrate.  As such, make sure not to maintain any
2303         * thread-local state specific to the cache from prior to releasing
2304         * the critical section.
2305         */
2306        lockfail = 0;
2307        if (ZONE_TRYLOCK(zone) == 0) {
2308                /* Record contention to size the buckets. */
2309                ZONE_LOCK(zone);
2310                lockfail = 1;
2311        }
2312        critical_enter();
2313        cpu = curcpu;
2314        cache = &zone->uz_cpu[cpu];
2315
2316        /*
2317         * Since we have locked the zone we may as well send back our stats.
2318         */
2319        atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
2320        atomic_add_long(&zone->uz_frees, cache->uc_frees);
2321        cache->uc_allocs = 0;
2322        cache->uc_frees = 0;
2323
2324        /* See if we lost the race to fill the cache. */
2325        if (cache->uc_allocbucket != NULL) {
2326                ZONE_UNLOCK(zone);
2327                goto zalloc_start;
2328        }
2329
2330        /*
2331         * Check the zone's cache of buckets.
2332         */
2333        if ((bucket = LIST_FIRST(&zone->uz_buckets)) != NULL) {
2334                KASSERT(bucket->ub_cnt != 0,
2335                    ("uma_zalloc_arg: Returning an empty bucket."));
2336
2337                LIST_REMOVE(bucket, ub_link);
2338                cache->uc_allocbucket = bucket;
2339                ZONE_UNLOCK(zone);
2340                goto zalloc_start;
2341        }
2342        /* We are no longer associated with this CPU. */
2343        critical_exit();
2344
2345        /*
2346         * We bump the uz count when the cache size is insufficient to
2347         * handle the working set.
2348         */
2349        if (lockfail && zone->uz_count < BUCKET_MAX)
2350                zone->uz_count++;
2351        ZONE_UNLOCK(zone);
2352
2353        /*
2354         * Now lets just fill a bucket and put it on the free list.  If that
2355         * works we'll restart the allocation from the beginning and it
2356         * will use the just filled bucket.
2357         */
2358        bucket = zone_alloc_bucket(zone, udata, flags);
2359        if (bucket != NULL) {
2360                ZONE_LOCK(zone);
2361                critical_enter();
2362                cpu = curcpu;
2363                cache = &zone->uz_cpu[cpu];
2364                /*
2365                 * See if we lost the race or were migrated.  Cache the
2366                 * initialized bucket to make this less likely or claim
2367                 * the memory directly.
2368                 */
2369                if (cache->uc_allocbucket == NULL)
2370                        cache->uc_allocbucket = bucket;
2371                else
2372                        LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
2373                ZONE_UNLOCK(zone);
2374                goto zalloc_start;
2375        }
2376
2377        /*
2378         * We may not be able to get a bucket so return an actual item.
2379         */
2380#ifdef UMA_DEBUG
2381        printf("uma_zalloc_arg: Bucketzone returned NULL\n");
2382#endif
2383
2384zalloc_item:
2385        item = zone_alloc_item(zone, udata, flags);
2386
2387        return (item);
2388}
2389
2390static uma_slab_t
2391keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
2392{
2393        uma_slab_t slab;
2394        int reserve;
2395
2396        mtx_assert(&keg->uk_lock, MA_OWNED);
2397        slab = NULL;
2398        reserve = 0;
2399        if ((flags & M_USE_RESERVE) == 0)
2400                reserve = keg->uk_reserve;
2401
2402        for (;;) {
2403                /*
2404                 * Find a slab with some space.  Prefer slabs that are partially
2405                 * used over those that are totally full.  This helps to reduce
2406                 * fragmentation.
2407                 */
2408                if (keg->uk_free > reserve) {
2409                        if (!LIST_EMPTY(&keg->uk_part_slab)) {
2410                                slab = LIST_FIRST(&keg->uk_part_slab);
2411                        } else {
2412                                slab = LIST_FIRST(&keg->uk_free_slab);
2413                                LIST_REMOVE(slab, us_link);
2414                                LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
2415                                    us_link);
2416                        }
2417                        MPASS(slab->us_keg == keg);
2418                        return (slab);
2419                }
2420
2421                /*
2422                 * M_NOVM means don't ask at all!
2423                 */
2424                if (flags & M_NOVM)
2425                        break;
2426
2427                if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
2428                        keg->uk_flags |= UMA_ZFLAG_FULL;
2429                        /*
2430                         * If this is not a multi-zone, set the FULL bit.
2431                         * Otherwise slab_multi() takes care of it.
2432                         */
2433                        if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0) {
2434                                zone->uz_flags |= UMA_ZFLAG_FULL;
2435                                zone_log_warning(zone);
2436                                zone_maxaction(zone);
2437                        }
2438                        if (flags & M_NOWAIT)
2439                                break;
2440                        zone->uz_sleeps++;
2441                        msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
2442                        continue;
2443                }
2444                slab = keg_alloc_slab(keg, zone, flags);
2445                /*
2446                 * If we got a slab here it's safe to mark it partially used
2447                 * and return.  We assume that the caller is going to remove
2448                 * at least one item.
2449                 */
2450                if (slab) {
2451                        MPASS(slab->us_keg == keg);
2452                        LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2453                        return (slab);
2454                }
2455                /*
2456                 * We might not have been able to get a slab but another cpu
2457                 * could have while we were unlocked.  Check again before we
2458                 * fail.
2459                 */
2460                flags |= M_NOVM;
2461        }
2462        return (slab);
2463}
2464
2465static uma_slab_t
2466zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
2467{
2468        uma_slab_t slab;
2469
2470        if (keg == NULL) {
2471                keg = zone_first_keg(zone);
2472                KEG_LOCK(keg);
2473        }
2474
2475        for (;;) {
2476                slab = keg_fetch_slab(keg, zone, flags);
2477                if (slab)
2478                        return (slab);
2479                if (flags & (M_NOWAIT | M_NOVM))
2480                        break;
2481        }
2482        KEG_UNLOCK(keg);
2483        return (NULL);
2484}
2485
2486#ifndef __rtems__
2487/*
2488 * uma_zone_fetch_slab_multi:  Fetches a slab from one available keg.  Returns
2489 * with the keg locked.  On NULL no lock is held.
2490 *
2491 * The last pointer is used to seed the search.  It is not required.
2492 */
2493static uma_slab_t
2494zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
2495{
2496        uma_klink_t klink;
2497        uma_slab_t slab;
2498        uma_keg_t keg;
2499        int flags;
2500        int empty;
2501        int full;
2502
2503        /*
2504         * Don't wait on the first pass.  This will skip limit tests
2505         * as well.  We don't want to block if we can find a provider
2506         * without blocking.
2507         */
2508        flags = (rflags & ~M_WAITOK) | M_NOWAIT;
2509        /*
2510         * Use the last slab allocated as a hint for where to start
2511         * the search.
2512         */
2513        if (last != NULL) {
2514                slab = keg_fetch_slab(last, zone, flags);
2515                if (slab)
2516                        return (slab);
2517                KEG_UNLOCK(last);
2518        }
2519        /*
2520         * Loop until we have a slab incase of transient failures
2521         * while M_WAITOK is specified.  I'm not sure this is 100%
2522         * required but we've done it for so long now.
2523         */
2524        for (;;) {
2525                empty = 0;
2526                full = 0;
2527                /*
2528                 * Search the available kegs for slabs.  Be careful to hold the
2529                 * correct lock while calling into the keg layer.
2530                 */
2531                LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
2532                        keg = klink->kl_keg;
2533                        KEG_LOCK(keg);
2534                        if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
2535                                slab = keg_fetch_slab(keg, zone, flags);
2536                                if (slab)
2537                                        return (slab);
2538                        }
2539                        if (keg->uk_flags & UMA_ZFLAG_FULL)
2540                                full++;
2541                        else
2542                                empty++;
2543                        KEG_UNLOCK(keg);
2544                }
2545                if (rflags & (M_NOWAIT | M_NOVM))
2546                        break;
2547                flags = rflags;
2548                /*
2549                 * All kegs are full.  XXX We can't atomically check all kegs
2550                 * and sleep so just sleep for a short period and retry.
2551                 */
2552                if (full && !empty) {
2553                        ZONE_LOCK(zone);
2554                        zone->uz_flags |= UMA_ZFLAG_FULL;
2555                        zone->uz_sleeps++;
2556                        zone_log_warning(zone);
2557                        zone_maxaction(zone);
2558                        msleep(zone, zone->uz_lockptr, PVM,
2559                            "zonelimit", hz/100);
2560                        zone->uz_flags &= ~UMA_ZFLAG_FULL;
2561                        ZONE_UNLOCK(zone);
2562                        continue;
2563                }
2564        }
2565        return (NULL);
2566}
2567#endif /* __rtems__ */
2568
2569static void *
2570slab_alloc_item(uma_keg_t keg, uma_slab_t slab)
2571{
2572        void *item;
2573        uint8_t freei;
2574
2575        MPASS(keg == slab->us_keg);
2576        mtx_assert(&keg->uk_lock, MA_OWNED);
2577
2578        freei = BIT_FFS(SLAB_SETSIZE, &slab->us_free) - 1;
2579        BIT_CLR(SLAB_SETSIZE, freei, &slab->us_free);
2580        item = slab->us_data + (keg->uk_rsize * freei);
2581        slab->us_freecount--;
2582        keg->uk_free--;
2583
2584        /* Move this slab to the full list */
2585        if (slab->us_freecount == 0) {
2586                LIST_REMOVE(slab, us_link);
2587                LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2588        }
2589
2590        return (item);
2591}
2592
2593static int
2594zone_import(uma_zone_t zone, void **bucket, int max, int flags)
2595{
2596        uma_slab_t slab;
2597        uma_keg_t keg;
2598        int i;
2599
2600        slab = NULL;
2601        keg = NULL;
2602        /* Try to keep the buckets totally full */
2603        for (i = 0; i < max; ) {
2604                if ((slab = zone->uz_slab(zone, keg, flags)) == NULL)
2605                        break;
2606                keg = slab->us_keg;
2607                while (slab->us_freecount && i < max) {
2608                        bucket[i++] = slab_alloc_item(keg, slab);
2609                        if (keg->uk_free <= keg->uk_reserve)
2610                                break;
2611                }
2612                /* Don't grab more than one slab at a time. */
2613                flags &= ~M_WAITOK;
2614                flags |= M_NOWAIT;
2615        }
2616        if (slab != NULL)
2617                KEG_UNLOCK(keg);
2618
2619        return i;
2620}
2621
2622static uma_bucket_t
2623zone_alloc_bucket(uma_zone_t zone, void *udata, int flags)
2624{
2625        uma_bucket_t bucket;
2626        int max;
2627
2628        /* Don't wait for buckets, preserve caller's NOVM setting. */
2629        bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
2630        if (bucket == NULL)
2631                return (NULL);
2632
2633        max = MIN(bucket->ub_entries, zone->uz_count);
2634        bucket->ub_cnt = zone->uz_import(zone->uz_arg, bucket->ub_bucket,
2635            max, flags);
2636
2637        /*
2638         * Initialize the memory if necessary.
2639         */
2640        if (bucket->ub_cnt != 0 && zone->uz_init != NULL) {
2641                int i;
2642
2643                for (i = 0; i < bucket->ub_cnt; i++)
2644                        if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
2645                            flags) != 0)
2646                                break;
2647                /*
2648                 * If we couldn't initialize the whole bucket, put the
2649                 * rest back onto the freelist.
2650                 */
2651                if (i != bucket->ub_cnt) {
2652                        zone->uz_release(zone->uz_arg, &bucket->ub_bucket[i],
2653                            bucket->ub_cnt - i);
2654#ifdef INVARIANTS
2655                        bzero(&bucket->ub_bucket[i],
2656                            sizeof(void *) * (bucket->ub_cnt - i));
2657#endif
2658                        bucket->ub_cnt = i;
2659                }
2660        }
2661
2662        if (bucket->ub_cnt == 0) {
2663                bucket_free(zone, bucket, udata);
2664                atomic_add_long(&zone->uz_fails, 1);
2665                return (NULL);
2666        }
2667
2668        return (bucket);
2669}
2670
2671/*
2672 * Allocates a single item from a zone.
2673 *
2674 * Arguments
2675 *      zone   The zone to alloc for.
2676 *      udata  The data to be passed to the constructor.
2677 *      flags  M_WAITOK, M_NOWAIT, M_ZERO.
2678 *
2679 * Returns
2680 *      NULL if there is no memory and M_NOWAIT is set
2681 *      An item if successful
2682 */
2683
2684static void *
2685zone_alloc_item(uma_zone_t zone, void *udata, int flags)
2686{
2687        void *item;
2688
2689        item = NULL;
2690
2691#ifdef UMA_DEBUG_ALLOC
2692        printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2693#endif
2694        if (zone->uz_import(zone->uz_arg, &item, 1, flags) != 1)
2695                goto fail;
2696        atomic_add_long(&zone->uz_allocs, 1);
2697
2698        /*
2699         * We have to call both the zone's init (not the keg's init)
2700         * and the zone's ctor.  This is because the item is going from
2701         * a keg slab directly to the user, and the user is expecting it
2702         * to be both zone-init'd as well as zone-ctor'd.
2703         */
2704        if (zone->uz_init != NULL) {
2705                if (zone->uz_init(item, zone->uz_size, flags) != 0) {
2706                        zone_free_item(zone, item, udata, SKIP_FINI);
2707                        goto fail;
2708                }
2709        }
2710        if (zone->uz_ctor != NULL) {
2711                if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2712                        zone_free_item(zone, item, udata, SKIP_DTOR);
2713                        goto fail;
2714                }
2715        }
2716#ifdef INVARIANTS
2717        uma_dbg_alloc(zone, NULL, item);
2718#endif
2719        if (flags & M_ZERO)
2720                uma_zero_item(item, zone);
2721
2722        return (item);
2723
2724fail:
2725        atomic_add_long(&zone->uz_fails, 1);
2726        return (NULL);
2727}
2728
2729/* See uma.h */
2730void
2731uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
2732{
2733        uma_cache_t cache;
2734        uma_bucket_t bucket;
2735        int lockfail;
2736        int cpu;
2737
2738        /* Enable entropy collection for RANDOM_ENABLE_UMA kernel option */
2739        random_harvest_fast_uma(&zone, sizeof(zone), 1, RANDOM_UMA);
2740
2741#ifdef UMA_DEBUG_ALLOC_1
2742        printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2743#endif
2744        CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2745            zone->uz_name);
2746
2747#ifndef __rtems__
2748        KASSERT(curthread->td_critnest == 0 || SCHEDULER_STOPPED(),
2749            ("uma_zfree_arg: called with spinlock or critical section held"));
2750#endif /* __rtems__ */
2751
2752        /* uma_zfree(..., NULL) does nothing, to match free(9). */
2753        if (item == NULL)
2754                return;
2755#ifdef DEBUG_MEMGUARD
2756        if (is_memguard_addr(item)) {
2757                if (zone->uz_dtor != NULL)
2758                        zone->uz_dtor(item, zone->uz_size, udata);
2759                if (zone->uz_fini != NULL)
2760                        zone->uz_fini(item, zone->uz_size);
2761                memguard_free(item);
2762                return;
2763        }
2764#endif
2765#ifdef INVARIANTS
2766        if (zone->uz_flags & UMA_ZONE_MALLOC)
2767                uma_dbg_free(zone, udata, item);
2768        else
2769                uma_dbg_free(zone, NULL, item);
2770#endif
2771        if (zone->uz_dtor != NULL)
2772                zone->uz_dtor(item, zone->uz_size, udata);
2773
2774        /*
2775         * The race here is acceptable.  If we miss it we'll just have to wait
2776         * a little longer for the limits to be reset.
2777         */
2778        if (zone->uz_flags & UMA_ZFLAG_FULL)
2779                goto zfree_item;
2780
2781        /*
2782         * If possible, free to the per-CPU cache.  There are two
2783         * requirements for safe access to the per-CPU cache: (1) the thread
2784         * accessing the cache must not be preempted or yield during access,
2785         * and (2) the thread must not migrate CPUs without switching which
2786         * cache it accesses.  We rely on a critical section to prevent
2787         * preemption and migration.  We release the critical section in
2788         * order to acquire the zone mutex if we are unable to free to the
2789         * current cache; when we re-acquire the critical section, we must
2790         * detect and handle migration if it has occurred.
2791         */
2792zfree_restart:
2793        critical_enter();
2794        cpu = curcpu;
2795        cache = &zone->uz_cpu[cpu];
2796
2797zfree_start:
2798        /*
2799         * Try to free into the allocbucket first to give LIFO ordering
2800         * for cache-hot datastructures.  Spill over into the freebucket
2801         * if necessary.  Alloc will swap them if one runs dry.
2802         */
2803        bucket = cache->uc_allocbucket;
2804        if (bucket == NULL || bucket->ub_cnt >= bucket->ub_entries)
2805                bucket = cache->uc_freebucket;
2806        if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
2807                KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2808                    ("uma_zfree: Freeing to non free bucket index."));
2809                bucket->ub_bucket[bucket->ub_cnt] = item;
2810                bucket->ub_cnt++;
2811                cache->uc_frees++;
2812                critical_exit();
2813                return;
2814        }
2815
2816        /*
2817         * We must go back the zone, which requires acquiring the zone lock,
2818         * which in turn means we must release and re-acquire the critical
2819         * section.  Since the critical section is released, we may be
2820         * preempted or migrate.  As such, make sure not to maintain any
2821         * thread-local state specific to the cache from prior to releasing
2822         * the critical section.
2823         */
2824        critical_exit();
2825        if (zone->uz_count == 0 || bucketdisable)
2826                goto zfree_item;
2827
2828        lockfail = 0;
2829        if (ZONE_TRYLOCK(zone) == 0) {
2830                /* Record contention to size the buckets. */
2831                ZONE_LOCK(zone);
2832                lockfail = 1;
2833        }
2834        critical_enter();
2835        cpu = curcpu;
2836        cache = &zone->uz_cpu[cpu];
2837
2838        /*
2839         * Since we have locked the zone we may as well send back our stats.
2840         */
2841        atomic_add_long(&zone->uz_allocs, cache->uc_allocs);
2842        atomic_add_long(&zone->uz_frees, cache->uc_frees);
2843        cache->uc_allocs = 0;
2844        cache->uc_frees = 0;
2845
2846        bucket = cache->uc_freebucket;
2847        if (bucket != NULL && bucket->ub_cnt < bucket->ub_entries) {
2848                ZONE_UNLOCK(zone);
2849                goto zfree_start;
2850        }
2851        cache->uc_freebucket = NULL;
2852        /* We are no longer associated with this CPU. */
2853        critical_exit();
2854
2855        /* Can we throw this on the zone full list? */
2856        if (bucket != NULL) {
2857#ifdef UMA_DEBUG_ALLOC
2858                printf("uma_zfree: Putting old bucket on the free list.\n");
2859#endif
2860                /* ub_cnt is pointing to the last free item */
2861                KASSERT(bucket->ub_cnt != 0,
2862                    ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2863                LIST_INSERT_HEAD(&zone->uz_buckets, bucket, ub_link);
2864        }
2865
2866        /*
2867         * We bump the uz count when the cache size is insufficient to
2868         * handle the working set.
2869         */
2870        if (lockfail && zone->uz_count < BUCKET_MAX)
2871                zone->uz_count++;
2872        ZONE_UNLOCK(zone);
2873
2874#ifdef UMA_DEBUG_ALLOC
2875        printf("uma_zfree: Allocating new free bucket.\n");
2876#endif
2877        bucket = bucket_alloc(zone, udata, M_NOWAIT);
2878        if (bucket) {
2879                critical_enter();
2880                cpu = curcpu;
2881                cache = &zone->uz_cpu[cpu];
2882                if (cache->uc_freebucket == NULL) {
2883                        cache->uc_freebucket = bucket;
2884                        goto zfree_start;
2885                }
2886                /*
2887                 * We lost the race, start over.  We have to drop our
2888                 * critical section to free the bucket.
2889                 */
2890                critical_exit();
2891                bucket_free(zone, bucket, udata);
2892                goto zfree_restart;
2893        }
2894
2895        /*
2896         * If nothing else caught this, we'll just do an internal free.
2897         */
2898zfree_item:
2899        zone_free_item(zone, item, udata, SKIP_DTOR);
2900
2901        return;
2902}
2903
2904static void
2905slab_free_item(uma_keg_t keg, uma_slab_t slab, void *item)
2906{
2907        uint8_t freei;
2908
2909        mtx_assert(&keg->uk_lock, MA_OWNED);
2910        MPASS(keg == slab->us_keg);
2911
2912        /* Do we need to remove from any lists? */
2913        if (slab->us_freecount+1 == keg->uk_ipers) {
2914                LIST_REMOVE(slab, us_link);
2915                LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2916        } else if (slab->us_freecount == 0) {
2917                LIST_REMOVE(slab, us_link);
2918                LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2919        }
2920
2921        /* Slab management. */
2922        freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
2923        BIT_SET(SLAB_SETSIZE, freei, &slab->us_free);
2924        slab->us_freecount++;
2925
2926        /* Keg statistics. */
2927        keg->uk_free++;
2928}
2929
2930static void
2931zone_release(uma_zone_t zone, void **bucket, int cnt)
2932{
2933        void *item;
2934        uma_slab_t slab;
2935        uma_keg_t keg;
2936        uint8_t *mem;
2937        int clearfull;
2938        int i;
2939
2940        clearfull = 0;
2941        keg = zone_first_keg(zone);
2942        KEG_LOCK(keg);
2943        for (i = 0; i < cnt; i++) {
2944                item = bucket[i];
2945                if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
2946                        mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
2947                        if (zone->uz_flags & UMA_ZONE_HASH) {
2948                                slab = hash_sfind(&keg->uk_hash, mem);
2949                        } else {
2950                                mem += keg->uk_pgoff;
2951                                slab = (uma_slab_t)mem;
2952                        }
2953                } else {
2954                        slab = vtoslab((vm_offset_t)item);
2955                        if (slab->us_keg != keg) {
2956                                KEG_UNLOCK(keg);
2957                                keg = slab->us_keg;
2958                                KEG_LOCK(keg);
2959                        }
2960                }
2961                slab_free_item(keg, slab, item);
2962                if (keg->uk_flags & UMA_ZFLAG_FULL) {
2963                        if (keg->uk_pages < keg->uk_maxpages) {
2964                                keg->uk_flags &= ~UMA_ZFLAG_FULL;
2965                                clearfull = 1;
2966                        }
2967
2968                        /*
2969                         * We can handle one more allocation. Since we're
2970                         * clearing ZFLAG_FULL, wake up all procs blocked
2971                         * on pages. This should be uncommon, so keeping this
2972                         * simple for now (rather than adding count of blocked
2973                         * threads etc).
2974                         */
2975                        wakeup(keg);
2976                }
2977        }
2978        KEG_UNLOCK(keg);
2979        if (clearfull) {
2980                ZONE_LOCK(zone);
2981                zone->uz_flags &= ~UMA_ZFLAG_FULL;
2982                wakeup(zone);
2983                ZONE_UNLOCK(zone);
2984        }
2985
2986}
2987
2988/*
2989 * Frees a single item to any zone.
2990 *
2991 * Arguments:
2992 *      zone   The zone to free to
2993 *      item   The item we're freeing
2994 *      udata  User supplied data for the dtor
2995 *      skip   Skip dtors and finis
2996 */
2997static void
2998zone_free_item(uma_zone_t zone, void *item, void *udata, enum zfreeskip skip)
2999{
3000
3001#ifdef INVARIANTS
3002        if (skip == SKIP_NONE) {
3003                if (zone->uz_flags & UMA_ZONE_MALLOC)
3004                        uma_dbg_free(zone, udata, item);
3005                else
3006                        uma_dbg_free(zone, NULL, item);
3007        }
3008#endif
3009        if (skip < SKIP_DTOR && zone->uz_dtor)
3010                zone->uz_dtor(item, zone->uz_size, udata);
3011
3012        if (skip < SKIP_FINI && zone->uz_fini)
3013                zone->uz_fini(item, zone->uz_size);
3014
3015        atomic_add_long(&zone->uz_frees, 1);
3016        zone->uz_release(zone->uz_arg, &item, 1);
3017}
3018
3019/* See uma.h */
3020int
3021uma_zone_set_max(uma_zone_t zone, int nitems)
3022{
3023        uma_keg_t keg;
3024
3025        keg = zone_first_keg(zone);
3026        if (keg == NULL)
3027                return (0);
3028        KEG_LOCK(keg);
3029#ifdef __rtems__
3030#ifdef SMP
3031        /*
3032         * Ensure we have enough items to fill the per-processor caches.  This
3033         * is a heuristic approach and works not under all conditions.
3034         */
3035        nitems += 2 * BUCKET_MAX * (mp_maxid + 1);
3036#endif
3037#endif /* __rtems__ */
3038        keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
3039        if (keg->uk_maxpages * keg->uk_ipers < nitems)
3040                keg->uk_maxpages += keg->uk_ppera;
3041        nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
3042        KEG_UNLOCK(keg);
3043
3044        return (nitems);
3045}
3046
3047/* See uma.h */
3048int
3049uma_zone_get_max(uma_zone_t zone)
3050{
3051        int nitems;
3052        uma_keg_t keg;
3053
3054        keg = zone_first_keg(zone);
3055        if (keg == NULL)
3056                return (0);
3057        KEG_LOCK(keg);
3058        nitems = (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers;
3059        KEG_UNLOCK(keg);
3060
3061        return (nitems);
3062}
3063
3064/* See uma.h */
3065void
3066uma_zone_set_warning(uma_zone_t zone, const char *warning)
3067{
3068
3069        ZONE_LOCK(zone);
3070        zone->uz_warning = warning;
3071        ZONE_UNLOCK(zone);
3072}
3073
3074/* See uma.h */
3075void
3076uma_zone_set_maxaction(uma_zone_t zone, uma_maxaction_t maxaction)
3077{
3078
3079        ZONE_LOCK(zone);
3080        TASK_INIT(&zone->uz_maxaction, 0, (task_fn_t *)maxaction, zone);
3081        ZONE_UNLOCK(zone);
3082}
3083
3084/* See uma.h */
3085int
3086uma_zone_get_cur(uma_zone_t zone)
3087{
3088        int64_t nitems;
3089        u_int i;
3090
3091        ZONE_LOCK(zone);
3092        nitems = zone->uz_allocs - zone->uz_frees;
3093        CPU_FOREACH(i) {
3094                /*
3095                 * See the comment in sysctl_vm_zone_stats() regarding the
3096                 * safety of accessing the per-cpu caches. With the zone lock
3097                 * held, it is safe, but can potentially result in stale data.
3098                 */
3099                nitems += zone->uz_cpu[i].uc_allocs -
3100                    zone->uz_cpu[i].uc_frees;
3101        }
3102        ZONE_UNLOCK(zone);
3103
3104        return (nitems < 0 ? 0 : nitems);
3105}
3106
3107/* See uma.h */
3108void
3109uma_zone_set_init(uma_zone_t zone, uma_init uminit)
3110{
3111        uma_keg_t keg;
3112
3113        keg = zone_first_keg(zone);
3114        KASSERT(keg != NULL, ("uma_zone_set_init: Invalid zone type"));
3115        KEG_LOCK(keg);
3116        KASSERT(keg->uk_pages == 0,
3117            ("uma_zone_set_init on non-empty keg"));
3118        keg->uk_init = uminit;
3119        KEG_UNLOCK(keg);
3120}
3121
3122/* See uma.h */
3123void
3124uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
3125{
3126        uma_keg_t keg;
3127
3128        keg = zone_first_keg(zone);
3129        KASSERT(keg != NULL, ("uma_zone_set_fini: Invalid zone type"));
3130        KEG_LOCK(keg);
3131        KASSERT(keg->uk_pages == 0,
3132            ("uma_zone_set_fini on non-empty keg"));
3133        keg->uk_fini = fini;
3134        KEG_UNLOCK(keg);
3135}
3136
3137/* See uma.h */
3138void
3139uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
3140{
3141
3142        ZONE_LOCK(zone);
3143        KASSERT(zone_first_keg(zone)->uk_pages == 0,
3144            ("uma_zone_set_zinit on non-empty keg"));
3145        zone->uz_init = zinit;
3146        ZONE_UNLOCK(zone);
3147}
3148
3149/* See uma.h */
3150void
3151uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
3152{
3153
3154        ZONE_LOCK(zone);
3155        KASSERT(zone_first_keg(zone)->uk_pages == 0,
3156            ("uma_zone_set_zfini on non-empty keg"));
3157        zone->uz_fini = zfini;
3158        ZONE_UNLOCK(zone);
3159}
3160
3161/* See uma.h */
3162/* XXX uk_freef is not actually used with the zone locked */
3163void
3164uma_zone_set_freef(uma_zone_t zone, uma_free freef)
3165{
3166        uma_keg_t keg;
3167
3168        keg = zone_first_keg(zone);
3169        KASSERT(keg != NULL, ("uma_zone_set_freef: Invalid zone type"));
3170        KEG_LOCK(keg);
3171        keg->uk_freef = freef;
3172        KEG_UNLOCK(keg);
3173}
3174
3175/* See uma.h */
3176/* XXX uk_allocf is not actually used with the zone locked */
3177void
3178uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
3179{
3180        uma_keg_t keg;
3181
3182        keg = zone_first_keg(zone);
3183        KEG_LOCK(keg);
3184        keg->uk_allocf = allocf;
3185        KEG_UNLOCK(keg);
3186}
3187
3188/* See uma.h */
3189void
3190uma_zone_reserve(uma_zone_t zone, int items)
3191{
3192        uma_keg_t keg;
3193
3194        keg = zone_first_keg(zone);
3195        if (keg == NULL)
3196                return;
3197        KEG_LOCK(keg);
3198        keg->uk_reserve = items;
3199        KEG_UNLOCK(keg);
3200
3201        return;
3202}
3203
3204#ifndef __rtems__
3205/* See uma.h */
3206int
3207uma_zone_reserve_kva(uma_zone_t zone, int count)
3208{
3209        uma_keg_t keg;
3210        vm_offset_t kva;
3211        u_int pages;
3212
3213        keg = zone_first_keg(zone);
3214        if (keg == NULL)
3215                return (0);
3216        pages = count / keg->uk_ipers;
3217
3218        if (pages * keg->uk_ipers < count)
3219                pages++;
3220        pages *= keg->uk_ppera;
3221
3222#ifdef UMA_MD_SMALL_ALLOC
3223        if (keg->uk_ppera > 1) {
3224#else
3225        if (1) {
3226#endif
3227                kva = kva_alloc((vm_size_t)pages * PAGE_SIZE);
3228                if (kva == 0)
3229                        return (0);
3230        } else
3231                kva = 0;
3232        KEG_LOCK(keg);
3233        keg->uk_kva = kva;
3234        keg->uk_offset = 0;
3235        keg->uk_maxpages = pages;
3236#ifdef UMA_MD_SMALL_ALLOC
3237        keg->uk_allocf = (keg->uk_ppera > 1) ? noobj_alloc : uma_small_alloc;
3238#else
3239        keg->uk_allocf = noobj_alloc;
3240#endif
3241        keg->uk_flags |= UMA_ZONE_NOFREE;
3242        KEG_UNLOCK(keg);
3243
3244        return (1);
3245}
3246
3247/* See uma.h */
3248void
3249uma_prealloc(uma_zone_t zone, int items)
3250{
3251        int slabs;
3252        uma_slab_t slab;
3253        uma_keg_t keg;
3254
3255        keg = zone_first_keg(zone);
3256        if (keg == NULL)
3257                return;
3258        KEG_LOCK(keg);
3259        slabs = items / keg->uk_ipers;
3260        if (slabs * keg->uk_ipers < items)
3261                slabs++;
3262        while (slabs > 0) {
3263                slab = keg_alloc_slab(keg, zone, M_WAITOK);
3264                if (slab == NULL)
3265                        break;
3266                MPASS(slab->us_keg == keg);
3267                LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
3268                slabs--;
3269        }
3270        KEG_UNLOCK(keg);
3271}
3272#endif /* __rtems__ */
3273
3274/* See uma.h */
3275static void
3276uma_reclaim_locked(bool kmem_danger)
3277{
3278
3279#ifdef UMA_DEBUG
3280        printf("UMA: vm asked us to release pages!\n");
3281#endif
3282        sx_assert(&uma_drain_lock, SA_XLOCKED);
3283        bucket_enable();
3284        zone_foreach(zone_drain);
3285#ifndef __rtems__
3286        if (vm_page_count_min() || kmem_danger) {
3287                cache_drain_safe(NULL);
3288                zone_foreach(zone_drain);
3289        }
3290#endif /* __rtems__ */
3291        /*
3292         * Some slabs may have been freed but this zone will be visited early
3293         * we visit again so that we can free pages that are empty once other
3294         * zones are drained.  We have to do the same for buckets.
3295         */
3296        zone_drain(slabzone);
3297        bucket_zone_drain();
3298}
3299
3300void
3301uma_reclaim(void)
3302{
3303
3304        sx_xlock(&uma_drain_lock);
3305        uma_reclaim_locked(false);
3306        sx_xunlock(&uma_drain_lock);
3307}
3308
3309static int uma_reclaim_needed;
3310
3311void
3312uma_reclaim_wakeup(void)
3313{
3314
3315        uma_reclaim_needed = 1;
3316        wakeup(&uma_reclaim_needed);
3317}
3318
3319void
3320uma_reclaim_worker(void *arg __unused)
3321{
3322
3323        sx_xlock(&uma_drain_lock);
3324        for (;;) {
3325                sx_sleep(&uma_reclaim_needed, &uma_drain_lock, PVM,
3326                    "umarcl", 0);
3327                if (uma_reclaim_needed) {
3328                        uma_reclaim_needed = 0;
3329#ifndef __rtems__
3330                        sx_xunlock(&uma_drain_lock);
3331                        EVENTHANDLER_INVOKE(vm_lowmem, VM_LOW_KMEM);
3332                        sx_xlock(&uma_drain_lock);
3333#endif /* __rtems__ */
3334                        uma_reclaim_locked(true);
3335                }
3336        }
3337}
3338
3339/* See uma.h */
3340int
3341uma_zone_exhausted(uma_zone_t zone)
3342{
3343        int full;
3344
3345        ZONE_LOCK(zone);
3346        full = (zone->uz_flags & UMA_ZFLAG_FULL);
3347        ZONE_UNLOCK(zone);
3348        return (full); 
3349}
3350
3351int
3352uma_zone_exhausted_nolock(uma_zone_t zone)
3353{
3354        return (zone->uz_flags & UMA_ZFLAG_FULL);
3355}
3356
3357#ifndef __rtems__
3358void *
3359uma_large_malloc(vm_size_t size, int wait)
3360{
3361        void *mem;
3362        uma_slab_t slab;
3363        uint8_t flags;
3364
3365        slab = zone_alloc_item(slabzone, NULL, wait);
3366        if (slab == NULL)
3367                return (NULL);
3368        mem = page_alloc(NULL, size, &flags, wait);
3369        if (mem) {
3370                vsetslab((vm_offset_t)mem, slab);
3371                slab->us_data = mem;
3372                slab->us_flags = flags | UMA_SLAB_MALLOC;
3373                slab->us_size = size;
3374        } else {
3375                zone_free_item(slabzone, slab, NULL, SKIP_NONE);
3376        }
3377
3378        return (mem);
3379}
3380
3381void
3382uma_large_free(uma_slab_t slab)
3383{
3384
3385        page_free(slab->us_data, slab->us_size, slab->us_flags);
3386        zone_free_item(slabzone, slab, NULL, SKIP_NONE);
3387}
3388#endif /* __rtems__ */
3389
3390static void
3391uma_zero_item(void *item, uma_zone_t zone)
3392{
3393        int i;
3394
3395        if (zone->uz_flags & UMA_ZONE_PCPU) {
3396                CPU_FOREACH(i)
3397                        bzero(zpcpu_get_cpu(item, i), zone->uz_size);
3398        } else
3399                bzero(item, zone->uz_size);
3400}
3401
3402void
3403uma_print_stats(void)
3404{
3405        zone_foreach(uma_print_zone);
3406}
3407
3408static void
3409slab_print(uma_slab_t slab)
3410{
3411        printf("slab: keg %p, data %p, freecount %d\n",
3412                slab->us_keg, slab->us_data, slab->us_freecount);
3413}
3414
3415static void
3416cache_print(uma_cache_t cache)
3417{
3418        printf("alloc: %p(%d), free: %p(%d)\n",
3419                cache->uc_allocbucket,
3420                cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
3421                cache->uc_freebucket,
3422                cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
3423}
3424
3425static void
3426uma_print_keg(uma_keg_t keg)
3427{
3428        uma_slab_t slab;
3429
3430        printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
3431            "out %d free %d limit %d\n",
3432            keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
3433            keg->uk_ipers, keg->uk_ppera,
3434            (keg->uk_pages / keg->uk_ppera) * keg->uk_ipers - keg->uk_free,
3435            keg->uk_free, (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
3436        printf("Part slabs:\n");
3437        LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
3438                slab_print(slab);
3439        printf("Free slabs:\n");
3440        LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
3441                slab_print(slab);
3442        printf("Full slabs:\n");
3443        LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
3444                slab_print(slab);
3445}
3446
3447void
3448uma_print_zone(uma_zone_t zone)
3449{
3450        uma_cache_t cache;
3451        uma_klink_t kl;
3452        int i;
3453
3454        printf("zone: %s(%p) size %d flags %#x\n",
3455            zone->uz_name, zone, zone->uz_size, zone->uz_flags);
3456        LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
3457                uma_print_keg(kl->kl_keg);
3458        CPU_FOREACH(i) {
3459                cache = &zone->uz_cpu[i];
3460                printf("CPU %d Cache:\n", i);
3461                cache_print(cache);
3462        }
3463}
3464
3465#ifndef __rtems__
3466#ifdef DDB
3467/*
3468 * Generate statistics across both the zone and its per-cpu cache's.  Return
3469 * desired statistics if the pointer is non-NULL for that statistic.
3470 *
3471 * Note: does not update the zone statistics, as it can't safely clear the
3472 * per-CPU cache statistic.
3473 *
3474 * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
3475 * safe from off-CPU; we should modify the caches to track this information
3476 * directly so that we don't have to.
3477 */
3478static void
3479uma_zone_sumstat(uma_zone_t z, int *cachefreep, uint64_t *allocsp,
3480    uint64_t *freesp, uint64_t *sleepsp)
3481{
3482        uma_cache_t cache;
3483        uint64_t allocs, frees, sleeps;
3484        int cachefree, cpu;
3485
3486        allocs = frees = sleeps = 0;
3487        cachefree = 0;
3488        CPU_FOREACH(cpu) {
3489                cache = &z->uz_cpu[cpu];
3490                if (cache->uc_allocbucket != NULL)
3491                        cachefree += cache->uc_allocbucket->ub_cnt;
3492                if (cache->uc_freebucket != NULL)
3493                        cachefree += cache->uc_freebucket->ub_cnt;
3494                allocs += cache->uc_allocs;
3495                frees += cache->uc_frees;
3496        }
3497        allocs += z->uz_allocs;
3498        frees += z->uz_frees;
3499        sleeps += z->uz_sleeps;
3500        if (cachefreep != NULL)
3501                *cachefreep = cachefree;
3502        if (allocsp != NULL)
3503                *allocsp = allocs;
3504        if (freesp != NULL)
3505                *freesp = frees;
3506        if (sleepsp != NULL)
3507                *sleepsp = sleeps;
3508}
3509#endif /* DDB */
3510#endif /* __rtems__ */
3511
3512static int
3513sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
3514{
3515        uma_keg_t kz;
3516        uma_zone_t z;
3517        int count;
3518
3519        count = 0;
3520        rw_rlock(&uma_rwlock);
3521        LIST_FOREACH(kz, &uma_kegs, uk_link) {
3522                LIST_FOREACH(z, &kz->uk_zones, uz_link)
3523                        count++;
3524        }
3525        rw_runlock(&uma_rwlock);
3526        return (sysctl_handle_int(oidp, &count, 0, req));
3527}
3528
3529static int
3530sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
3531{
3532        struct uma_stream_header ush;
3533        struct uma_type_header uth;
3534        struct uma_percpu_stat ups;
3535        uma_bucket_t bucket;
3536        struct sbuf sbuf;
3537        uma_cache_t cache;
3538        uma_klink_t kl;
3539        uma_keg_t kz;
3540        uma_zone_t z;
3541        uma_keg_t k;
3542        int count, error, i;
3543
3544        error = sysctl_wire_old_buffer(req, 0);
3545        if (error != 0)
3546                return (error);
3547        sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
3548        sbuf_clear_flags(&sbuf, SBUF_INCLUDENUL);
3549
3550        count = 0;
3551        rw_rlock(&uma_rwlock);
3552        LIST_FOREACH(kz, &uma_kegs, uk_link) {
3553                LIST_FOREACH(z, &kz->uk_zones, uz_link)
3554                        count++;
3555        }
3556
3557        /*
3558         * Insert stream header.
3559         */
3560        bzero(&ush, sizeof(ush));
3561        ush.ush_version = UMA_STREAM_VERSION;
3562        ush.ush_maxcpus = (mp_maxid + 1);
3563        ush.ush_count = count;
3564        (void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
3565
3566        LIST_FOREACH(kz, &uma_kegs, uk_link) {
3567                LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3568                        bzero(&uth, sizeof(uth));
3569                        ZONE_LOCK(z);
3570                        strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
3571                        uth.uth_align = kz->uk_align;
3572                        uth.uth_size = kz->uk_size;
3573                        uth.uth_rsize = kz->uk_rsize;
3574                        LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
3575                                k = kl->kl_keg;
3576                                uth.uth_maxpages += k->uk_maxpages;
3577                                uth.uth_pages += k->uk_pages;
3578                                uth.uth_keg_free += k->uk_free;
3579                                uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
3580                                    * k->uk_ipers;
3581                        }
3582
3583                        /*
3584                         * A zone is secondary is it is not the first entry
3585                         * on the keg's zone list.
3586                         */
3587                        if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
3588                            (LIST_FIRST(&kz->uk_zones) != z))
3589                                uth.uth_zone_flags = UTH_ZONE_SECONDARY;
3590
3591                        LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3592                                uth.uth_zone_free += bucket->ub_cnt;
3593                        uth.uth_allocs = z->uz_allocs;
3594                        uth.uth_frees = z->uz_frees;
3595                        uth.uth_fails = z->uz_fails;
3596                        uth.uth_sleeps = z->uz_sleeps;
3597                        (void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
3598                        /*
3599                         * While it is not normally safe to access the cache
3600                         * bucket pointers while not on the CPU that owns the
3601                         * cache, we only allow the pointers to be exchanged
3602                         * without the zone lock held, not invalidated, so
3603                         * accept the possible race associated with bucket
3604                         * exchange during monitoring.
3605                         */
3606                        for (i = 0; i < (mp_maxid + 1); i++) {
3607                                bzero(&ups, sizeof(ups));
3608                                if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
3609                                        goto skip;
3610                                if (CPU_ABSENT(i))
3611                                        goto skip;
3612                                cache = &z->uz_cpu[i];
3613                                if (cache->uc_allocbucket != NULL)
3614                                        ups.ups_cache_free +=
3615                                            cache->uc_allocbucket->ub_cnt;
3616                                if (cache->uc_freebucket != NULL)
3617                                        ups.ups_cache_free +=
3618                                            cache->uc_freebucket->ub_cnt;
3619                                ups.ups_allocs = cache->uc_allocs;
3620                                ups.ups_frees = cache->uc_frees;
3621skip:
3622                                (void)sbuf_bcat(&sbuf, &ups, sizeof(ups));
3623                        }
3624                        ZONE_UNLOCK(z);
3625                }
3626        }
3627        rw_runlock(&uma_rwlock);
3628        error = sbuf_finish(&sbuf);
3629        sbuf_delete(&sbuf);
3630        return (error);
3631}
3632
3633int
3634sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS)
3635{
3636        uma_zone_t zone = *(uma_zone_t *)arg1;
3637        int error, max;
3638
3639        max = uma_zone_get_max(zone);
3640        error = sysctl_handle_int(oidp, &max, 0, req);
3641        if (error || !req->newptr)
3642                return (error);
3643
3644        uma_zone_set_max(zone, max);
3645
3646        return (0);
3647}
3648
3649int
3650sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS)
3651{
3652        uma_zone_t zone = *(uma_zone_t *)arg1;
3653        int cur;
3654
3655        cur = uma_zone_get_cur(zone);
3656        return (sysctl_handle_int(oidp, &cur, 0, req));
3657}
3658
3659#ifdef INVARIANTS
3660static uma_slab_t
3661uma_dbg_getslab(uma_zone_t zone, void *item)
3662{
3663        uma_slab_t slab;
3664        uma_keg_t keg;
3665        uint8_t *mem;
3666
3667        mem = (uint8_t *)((uintptr_t)item & (~UMA_SLAB_MASK));
3668        if (zone->uz_flags & UMA_ZONE_VTOSLAB) {
3669                slab = vtoslab((vm_offset_t)mem);
3670        } else {
3671                /*
3672                 * It is safe to return the slab here even though the
3673                 * zone is unlocked because the item's allocation state
3674                 * essentially holds a reference.
3675                 */
3676                ZONE_LOCK(zone);
3677                keg = LIST_FIRST(&zone->uz_kegs)->kl_keg;
3678                if (keg->uk_flags & UMA_ZONE_HASH)
3679                        slab = hash_sfind(&keg->uk_hash, mem);
3680                else
3681                        slab = (uma_slab_t)(mem + keg->uk_pgoff);
3682                ZONE_UNLOCK(zone);
3683        }
3684
3685        return (slab);
3686}
3687
3688/*
3689 * Set up the slab's freei data such that uma_dbg_free can function.
3690 *
3691 */
3692static void
3693uma_dbg_alloc(uma_zone_t zone, uma_slab_t slab, void *item)
3694{
3695        uma_keg_t keg;
3696        int freei;
3697
3698        if (zone_first_keg(zone) == NULL)
3699                return;
3700        if (slab == NULL) {
3701                slab = uma_dbg_getslab(zone, item);
3702                if (slab == NULL)
3703                        panic("uma: item %p did not belong to zone %s\n",
3704                            item, zone->uz_name);
3705        }
3706        keg = slab->us_keg;
3707        freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
3708
3709        if (BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
3710                panic("Duplicate alloc of %p from zone %p(%s) slab %p(%d)\n",
3711                    item, zone, zone->uz_name, slab, freei);
3712        BIT_SET_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
3713
3714        return;
3715}
3716
3717/*
3718 * Verifies freed addresses.  Checks for alignment, valid slab membership
3719 * and duplicate frees.
3720 *
3721 */
3722static void
3723uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
3724{
3725        uma_keg_t keg;
3726        int freei;
3727
3728        if (zone_first_keg(zone) == NULL)
3729                return;
3730        if (slab == NULL) {
3731                slab = uma_dbg_getslab(zone, item);
3732                if (slab == NULL)
3733                        panic("uma: Freed item %p did not belong to zone %s\n",
3734                            item, zone->uz_name);
3735        }
3736        keg = slab->us_keg;
3737        freei = ((uintptr_t)item - (uintptr_t)slab->us_data) / keg->uk_rsize;
3738
3739        if (freei >= keg->uk_ipers)
3740                panic("Invalid free of %p from zone %p(%s) slab %p(%d)\n",
3741                    item, zone, zone->uz_name, slab, freei);
3742
3743        if (((freei * keg->uk_rsize) + slab->us_data) != item)
3744                panic("Unaligned free of %p from zone %p(%s) slab %p(%d)\n",
3745                    item, zone, zone->uz_name, slab, freei);
3746
3747        if (!BIT_ISSET(SLAB_SETSIZE, freei, &slab->us_debugfree))
3748                panic("Duplicate free of %p from zone %p(%s) slab %p(%d)\n",
3749                    item, zone, zone->uz_name, slab, freei);
3750
3751        BIT_CLR_ATOMIC(SLAB_SETSIZE, freei, &slab->us_debugfree);
3752}
3753#endif /* INVARIANTS */
3754
3755#ifndef __rtems__
3756#ifdef DDB
3757DB_SHOW_COMMAND(uma, db_show_uma)
3758{
3759        uint64_t allocs, frees, sleeps;
3760        uma_bucket_t bucket;
3761        uma_keg_t kz;
3762        uma_zone_t z;
3763        int cachefree;
3764
3765        db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used",
3766            "Free", "Requests", "Sleeps", "Bucket");
3767        LIST_FOREACH(kz, &uma_kegs, uk_link) {
3768                LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3769                        if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
3770                                allocs = z->uz_allocs;
3771                                frees = z->uz_frees;
3772                                sleeps = z->uz_sleeps;
3773                                cachefree = 0;
3774                        } else
3775                                uma_zone_sumstat(z, &cachefree, &allocs,
3776                                    &frees, &sleeps);
3777                        if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
3778                            (LIST_FIRST(&kz->uk_zones) != z)))
3779                                cachefree += kz->uk_free;
3780                        LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3781                                cachefree += bucket->ub_cnt;
3782                        db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n",
3783                            z->uz_name, (uintmax_t)kz->uk_size,
3784                            (intmax_t)(allocs - frees), cachefree,
3785                            (uintmax_t)allocs, sleeps, z->uz_count);
3786                        if (db_pager_quit)
3787                                return;
3788                }
3789        }
3790}
3791
3792DB_SHOW_COMMAND(umacache, db_show_umacache)
3793{
3794        uint64_t allocs, frees;
3795        uma_bucket_t bucket;
3796        uma_zone_t z;
3797        int cachefree;
3798
3799        db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
3800            "Requests", "Bucket");
3801        LIST_FOREACH(z, &uma_cachezones, uz_link) {
3802                uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL);
3803                LIST_FOREACH(bucket, &z->uz_buckets, ub_link)
3804                        cachefree += bucket->ub_cnt;
3805                db_printf("%18s %8ju %8jd %8d %12ju %8u\n",
3806                    z->uz_name, (uintmax_t)z->uz_size,
3807                    (intmax_t)(allocs - frees), cachefree,
3808                    (uintmax_t)allocs, z->uz_count);
3809                if (db_pager_quit)
3810                        return;
3811        }
3812}
3813#endif  /* DDB */
3814#endif /* __rtems__ */
Note: See TracBrowser for help on using the repository browser.