source: rtems-libbsd/rtemsbsd/src/rtems-bsd-uma.c @ cbffdb7f

4.1155-freebsd-126-freebsd-12freebsd-9.3
Last change on this file since cbffdb7f was cbffdb7f, checked in by Joel Sherrill <joel.sherrill@…>, on 03/07/12 at 22:14:13

Separate RTEMS Specific Files from Those Direct from FreeBSD

  • Property mode set to 100644
File size: 68.7 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bsd_rtems
5 *
6 * @brief TODO.
7 */
8
9/*
10 * Copyright (c) 2009, 2010 embedded brains GmbH.  All rights reserved.
11 *
12 *  embedded brains GmbH
13 *  Obere Lagerstr. 30
14 *  82178 Puchheim
15 *  Germany
16 *  <rtems@embedded-brains.de>
17 *
18 * The license and distribution terms for this file may be
19 * found in the file LICENSE in this distribution or at
20 * http://www.rtems.com/license/LICENSE.
21 */
22
23#include <rtems/freebsd/machine/rtems-bsd-config.h>
24
25#include <rtems/freebsd/sys/param.h>
26#include <rtems/freebsd/sys/types.h>
27#include <rtems/freebsd/sys/systm.h>
28#include <rtems/freebsd/sys/malloc.h>
29#include <rtems/freebsd/sys/kernel.h>
30#include <rtems/freebsd/sys/lock.h>
31#include <rtems/freebsd/sys/mutex.h>
32#include <rtems/freebsd/sys/ktr.h>
33#include <rtems/freebsd/vm/uma.h>
34#include <rtems/freebsd/vm/uma_int.h>
35#include <rtems/freebsd/vm/uma_dbg.h>
36
37/*
38 * This is the zone and keg from which all zones are spawned.  The idea is that
39 * even the zone & keg heads are allocated from the allocator, so we use the
40 * bss section to bootstrap us.
41 */
42static struct uma_keg masterkeg;
43static struct uma_zone masterzone_k;
44static struct uma_zone masterzone_z;
45static uma_zone_t kegs = &masterzone_k;
46static uma_zone_t zones = &masterzone_z;
47
48/* This is the zone from which all of uma_slab_t's are allocated. */
49static uma_zone_t slabzone;
50static uma_zone_t slabrefzone;  /* With refcounters (for UMA_ZONE_REFCNT) */
51
52static u_int mp_maxid = 0; /* simulate 1 CPU.  This should really come from RTEMS SMP.  AT this time, RTEMS SMP is not functional */
53#define CPU_ABSENT(x_cpu) 0 /* force all cpus to be present.  This should really come from RTEMS SMP. */
54#define CPU_FOREACH(i)              \
55  for ((i) = 0; (i) <= mp_maxid; (i)++)       \
56    if (!CPU_ABSENT((i)))
57
58/*
59 * The initial hash tables come out of this zone so they can be allocated
60 * prior to malloc coming up.
61 */
62static uma_zone_t hashzone;
63
64/* The boot-time adjusted value for cache line alignment. */
65static int uma_align_cache = 64 - 1;
66
67static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
68
69/*
70 * Are we allowed to allocate buckets?
71 */
72static int bucketdisable = 1;
73
74/* Linked list of all kegs in the system */
75static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
76
77/* This mutex protects the keg list */
78static struct mtx uma_mtx;
79
80/* Linked list of boot time pages */
81static LIST_HEAD(,uma_slab) uma_boot_pages =
82    LIST_HEAD_INITIALIZER(uma_boot_pages);
83
84/* This mutex protects the boot time pages list */
85static struct mtx uma_boot_pages_mtx;
86
87/* Is the VM done starting up? */
88static int booted = 0;
89
90/* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
91static u_int uma_max_ipers;
92static u_int uma_max_ipers_ref;
93
94/*
95 * This is the handle used to schedule events that need to happen
96 * outside of the allocation fast path.
97 */
98static struct callout uma_callout;
99#define UMA_TIMEOUT     20              /* Seconds for callout interval. */
100
101/*
102 * This structure is passed as the zone ctor arg so that I don't have to create
103 * a special allocation function just for zones.
104 */
105struct uma_zctor_args {
106        char *name;
107        size_t size;
108        uma_ctor ctor;
109        uma_dtor dtor;
110        uma_init uminit;
111        uma_fini fini;
112        uma_keg_t keg;
113        int align;
114        u_int32_t flags;
115};
116
117struct uma_kctor_args {
118        uma_zone_t zone;
119        size_t size;
120        uma_init uminit;
121        uma_fini fini;
122        int align;
123        u_int32_t flags;
124};
125
126struct uma_bucket_zone {
127        uma_zone_t      ubz_zone;
128        char            *ubz_name;
129        int             ubz_entries;
130};
131
132#define BUCKET_MAX      128
133
134struct uma_bucket_zone bucket_zones[] = {
135        { NULL, "16 Bucket", 16 },
136        { NULL, "32 Bucket", 32 },
137        { NULL, "64 Bucket", 64 },
138        { NULL, "128 Bucket", 128 },
139        { NULL, NULL, 0}
140};
141
142#define BUCKET_SHIFT    4
143#define BUCKET_ZONES    ((BUCKET_MAX >> BUCKET_SHIFT) + 1)
144
145/*
146 * bucket_size[] maps requested bucket sizes to zones that allocate a bucket
147 * of approximately the right size.
148 */
149static uint8_t bucket_size[BUCKET_ZONES];
150
151/*
152 * Flags and enumerations to be passed to internal functions.
153 */
154enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI };
155
156#define ZFREE_STATFAIL  0x00000001      /* Update zone failure statistic. */
157#define ZFREE_STATFREE  0x00000002      /* Update zone free statistic. */
158
159/* Prototypes.. */
160
161static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
162static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
163static void page_free(void *, int, u_int8_t);
164static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
165static void cache_drain(uma_zone_t);
166static void bucket_drain(uma_zone_t, uma_bucket_t);
167static void bucket_cache_drain(uma_zone_t zone);
168static int keg_ctor(void *, int, void *, int);
169static void keg_dtor(void *, int, void *);
170static int zone_ctor(void *, int, void *, int);
171static void zone_dtor(void *, int, void *);
172static int zero_init(void *, int, int);
173static void keg_small_init(uma_keg_t keg);
174static void keg_large_init(uma_keg_t keg);
175static void zone_foreach(void (*zfunc)(uma_zone_t));
176static void zone_timeout(uma_zone_t zone);
177static int hash_alloc(struct uma_hash *);
178static int hash_expand(struct uma_hash *, struct uma_hash *);
179static void hash_free(struct uma_hash *hash);
180static void *zone_alloc_item(uma_zone_t, void *, int);
181static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip,
182    int);
183static void bucket_init(void);
184static uma_bucket_t bucket_alloc(int, int);
185static void bucket_free(uma_bucket_t);
186static void bucket_zone_drain(void);
187static int zone_alloc_bucket(uma_zone_t zone, int flags);
188static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
189static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
190static void *slab_alloc_item(uma_zone_t zone, uma_slab_t slab);
191static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
192    uma_fini fini, int align, u_int32_t flags);
193static inline void zone_relock(uma_zone_t zone, uma_keg_t keg);
194static inline void keg_relock(uma_keg_t keg, uma_zone_t zone);
195
196void uma_print_zone(uma_zone_t);
197void uma_print_stats(void);
198
199/*
200 * Initialize bucket_zones, the array of zones of buckets of various sizes.
201 *
202 * For each zone, calculate the memory required for each bucket, consisting
203 * of the header and an array of pointers.  Initialize bucket_size[] to point
204 * the range of appropriate bucket sizes at the zone.
205 */
206static void
207bucket_init(void)
208{
209        struct uma_bucket_zone *ubz;
210        int i;
211        int j;
212
213        for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
214                int size;
215
216                ubz = &bucket_zones[j];
217                size = roundup(sizeof(struct uma_bucket), sizeof(void *));
218                size += sizeof(void *) * ubz->ubz_entries;
219                ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
220                    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
221                    UMA_ZFLAG_INTERNAL | UMA_ZFLAG_BUCKET);
222                for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
223                        bucket_size[i >> BUCKET_SHIFT] = j;
224        }
225}
226
227/*
228 * Given a desired number of entries for a bucket, return the zone from which
229 * to allocate the bucket.
230 */
231static struct uma_bucket_zone *
232bucket_zone_lookup(int entries)
233{
234        int idx;
235
236        idx = howmany(entries, 1 << BUCKET_SHIFT);
237        return (&bucket_zones[bucket_size[idx]]);
238}
239
240static uma_bucket_t
241bucket_alloc(int entries, int bflags)
242{
243        struct uma_bucket_zone *ubz;
244        uma_bucket_t bucket;
245
246        /*
247         * This is to stop us from allocating per cpu buckets while we're
248         * running out of vm.boot_pages.  Otherwise, we would exhaust the
249         * boot pages.  This also prevents us from allocating buckets in
250         * low memory situations.
251         */
252        if (bucketdisable)
253                return (NULL);
254
255        ubz = bucket_zone_lookup(entries);
256        bucket = zone_alloc_item(ubz->ubz_zone, NULL, bflags);
257        if (bucket) {
258#ifdef INVARIANTS
259                bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
260#endif
261                bucket->ub_cnt = 0;
262                bucket->ub_entries = ubz->ubz_entries;
263        }
264
265        return (bucket);
266}
267
268static void
269bucket_free(uma_bucket_t bucket)
270{
271        struct uma_bucket_zone *ubz;
272
273        ubz = bucket_zone_lookup(bucket->ub_entries);
274        zone_free_item(ubz->ubz_zone, bucket, NULL, SKIP_NONE,
275            ZFREE_STATFREE);
276}
277
278static void
279bucket_zone_drain(void)
280{
281        struct uma_bucket_zone *ubz;
282
283        for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
284                zone_drain(ubz->ubz_zone);
285}
286
287static inline uma_keg_t
288zone_first_keg(uma_zone_t zone)
289{
290
291        return (LIST_FIRST(&zone->uz_kegs)->kl_keg);
292}
293
294static void
295zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
296{
297        uma_klink_t klink;
298
299        LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
300                kegfn(klink->kl_keg);
301}
302
303/*
304 * Routine to perform timeout driven calculations.  This expands the
305 * hashes and does per cpu statistics aggregation.
306 *
307 *  Returns nothing.
308 */
309static void
310keg_timeout(uma_keg_t keg)
311{
312
313        KEG_LOCK(keg);
314        /*
315         * Expand the keg hash table.
316         *
317         * This is done if the number of slabs is larger than the hash size.
318         * What I'm trying to do here is completely reduce collisions.  This
319         * may be a little aggressive.  Should I allow for two collisions max?
320         */
321        if (keg->uk_flags & UMA_ZONE_HASH &&
322            keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
323                struct uma_hash newhash;
324                struct uma_hash oldhash;
325                int ret;
326
327                /*
328                 * This is so involved because allocating and freeing
329                 * while the keg lock is held will lead to deadlock.
330                 * I have to do everything in stages and check for
331                 * races.
332                 */
333                newhash = keg->uk_hash;
334                KEG_UNLOCK(keg);
335                ret = hash_alloc(&newhash);
336                KEG_LOCK(keg);
337                if (ret) {
338                        if (hash_expand(&keg->uk_hash, &newhash)) {
339                                oldhash = keg->uk_hash;
340                                keg->uk_hash = newhash;
341                        } else
342                                oldhash = newhash;
343
344                        KEG_UNLOCK(keg);
345                        hash_free(&oldhash);
346                        KEG_LOCK(keg);
347                }
348        }
349        KEG_UNLOCK(keg);
350}
351
352static void
353zone_timeout(uma_zone_t zone)
354{
355
356        zone_foreach_keg(zone, &keg_timeout);
357}
358
359/*
360 * Allocate and zero fill the next sized hash table from the appropriate
361 * backing store.
362 *
363 * Arguments:
364 *      hash  A new hash structure with the old hash size in uh_hashsize
365 *
366 * Returns:
367 *      1 on sucess and 0 on failure.
368 */
369static int
370hash_alloc(struct uma_hash *hash)
371{
372        int oldsize;
373        int alloc;
374
375        oldsize = hash->uh_hashsize;
376
377        /* We're just going to go to a power of two greater */
378        if (oldsize)  {
379                hash->uh_hashsize = oldsize * 2;
380                alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
381                hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
382                    M_UMAHASH, M_NOWAIT);
383        } else {
384                alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
385                hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
386                    M_WAITOK);
387                hash->uh_hashsize = UMA_HASH_SIZE_INIT;
388        }
389        if (hash->uh_slab_hash) {
390                bzero(hash->uh_slab_hash, alloc);
391                hash->uh_hashmask = hash->uh_hashsize - 1;
392                return (1);
393        }
394
395        return (0);
396}
397
398/*
399 * Expands the hash table for HASH zones.  This is done from zone_timeout
400 * to reduce collisions.  This must not be done in the regular allocation
401 * path, otherwise, we can recurse on the vm while allocating pages.
402 *
403 * Arguments:
404 *      oldhash  The hash you want to expand
405 *      newhash  The hash structure for the new table
406 *
407 * Returns:
408 *      Nothing
409 *
410 * Discussion:
411 */
412static int
413hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
414{
415        uma_slab_t slab;
416        int hval;
417        int i;
418
419        if (!newhash->uh_slab_hash)
420                return (0);
421
422        if (oldhash->uh_hashsize >= newhash->uh_hashsize)
423                return (0);
424
425        /*
426         * I need to investigate hash algorithms for resizing without a
427         * full rehash.
428         */
429
430        for (i = 0; i < oldhash->uh_hashsize; i++)
431                while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
432                        slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
433                        SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
434                        hval = UMA_HASH(newhash, slab->us_data);
435                        SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
436                            slab, us_hlink);
437                }
438
439        return (1);
440}
441
442/*
443 * Free the hash bucket to the appropriate backing store.
444 *
445 * Arguments:
446 *      slab_hash  The hash bucket we're freeing
447 *      hashsize   The number of entries in that hash bucket
448 *
449 * Returns:
450 *      Nothing
451 */
452static void
453hash_free(struct uma_hash *hash)
454{
455        if (hash->uh_slab_hash == NULL)
456                return;
457        if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
458                zone_free_item(hashzone,
459                    hash->uh_slab_hash, NULL, SKIP_NONE, ZFREE_STATFREE);
460        else
461                free(hash->uh_slab_hash, M_UMAHASH);
462}
463
464/*
465 * Frees all outstanding items in a bucket
466 *
467 * Arguments:
468 *      zone   The zone to free to, must be unlocked.
469 *      bucket The free/alloc bucket with items, cpu queue must be locked.
470 *
471 * Returns:
472 *      Nothing
473 */
474
475static void
476bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
477{
478        void *item;
479
480        if (bucket == NULL)
481                return;
482
483        while (bucket->ub_cnt > 0)  {
484                bucket->ub_cnt--;
485                item = bucket->ub_bucket[bucket->ub_cnt];
486#ifdef INVARIANTS
487                bucket->ub_bucket[bucket->ub_cnt] = NULL;
488                KASSERT(item != NULL,
489                    ("bucket_drain: botched ptr, item is NULL"));
490#endif
491                zone_free_item(zone, item, NULL, SKIP_DTOR, 0);
492        }
493}
494
495/*
496 * Drains the per cpu caches for a zone.
497 *
498 * NOTE: This may only be called while the zone is being turn down, and not
499 * during normal operation.  This is necessary in order that we do not have
500 * to migrate CPUs to drain the per-CPU caches.
501 *
502 * Arguments:
503 *      zone     The zone to drain, must be unlocked.
504 *
505 * Returns:
506 *      Nothing
507 */
508static void
509cache_drain(uma_zone_t zone)
510{
511        uma_cache_t cache;
512        int cpu;
513
514        /*
515         * XXX: It is safe to not lock the per-CPU caches, because we're
516         * tearing down the zone anyway.  I.e., there will be no further use
517         * of the caches at this point.
518         *
519         * XXX: It would good to be able to assert that the zone is being
520         * torn down to prevent improper use of cache_drain().
521         *
522         * XXX: We lock the zone before passing into bucket_cache_drain() as
523         * it is used elsewhere.  Should the tear-down path be made special
524         * there in some form?
525         */
526        for (cpu = 0; cpu <= mp_maxid; cpu++) {
527                if (CPU_ABSENT(cpu))
528                        continue;
529                cache = &zone->uz_cpu[cpu];
530                bucket_drain(zone, cache->uc_allocbucket);
531                bucket_drain(zone, cache->uc_freebucket);
532                if (cache->uc_allocbucket != NULL)
533                        bucket_free(cache->uc_allocbucket);
534                if (cache->uc_freebucket != NULL)
535                        bucket_free(cache->uc_freebucket);
536                cache->uc_allocbucket = cache->uc_freebucket = NULL;
537        }
538        ZONE_LOCK(zone);
539        bucket_cache_drain(zone);
540        ZONE_UNLOCK(zone);
541}
542
543/*
544 * Drain the cached buckets from a zone.  Expects a locked zone on entry.
545 */
546static void
547bucket_cache_drain(uma_zone_t zone)
548{
549        uma_bucket_t bucket;
550
551        /*
552         * Drain the bucket queues and free the buckets, we just keep two per
553         * cpu (alloc/free).
554         */
555        while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
556                LIST_REMOVE(bucket, ub_link);
557                ZONE_UNLOCK(zone);
558                bucket_drain(zone, bucket);
559                bucket_free(bucket);
560                ZONE_LOCK(zone);
561        }
562
563        /* Now we do the free queue.. */
564        while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
565                LIST_REMOVE(bucket, ub_link);
566                bucket_free(bucket);
567        }
568}
569
570/*
571 * Frees pages from a keg back to the system.  This is done on demand from
572 * the pageout daemon.
573 *
574 * Returns nothing.
575 */
576static void
577keg_drain(uma_keg_t keg)
578{
579        struct slabhead freeslabs = { 0 };
580        uma_slab_t slab;
581        uma_slab_t n;
582        u_int8_t flags;
583        u_int8_t *mem;
584        int i;
585
586        /*
587         * We don't want to take pages from statically allocated kegs at this
588         * time
589         */
590        if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
591                return;
592
593#ifdef UMA_DEBUG
594        printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
595#endif
596        KEG_LOCK(keg);
597        if (keg->uk_free == 0)
598                goto finished;
599
600        slab = LIST_FIRST(&keg->uk_free_slab);
601        while (slab) {
602                n = LIST_NEXT(slab, us_link);
603
604                /* We have no where to free these to */
605                if (slab->us_flags & UMA_SLAB_BOOT) {
606                        slab = n;
607                        continue;
608                }
609
610                LIST_REMOVE(slab, us_link);
611                keg->uk_pages -= keg->uk_ppera;
612                keg->uk_free -= keg->uk_ipers;
613
614                if (keg->uk_flags & UMA_ZONE_HASH)
615                        UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
616
617                SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
618
619                slab = n;
620        }
621finished:
622        KEG_UNLOCK(keg);
623
624        while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
625                SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
626                if (keg->uk_fini)
627                        for (i = 0; i < keg->uk_ipers; i++)
628                                keg->uk_fini(
629                                    slab->us_data + (keg->uk_rsize * i),
630                                    keg->uk_size);
631                flags = slab->us_flags;
632                mem = slab->us_data;
633
634                if (keg->uk_flags & UMA_ZONE_OFFPAGE)
635                        zone_free_item(keg->uk_slabzone, slab, NULL,
636                            SKIP_NONE, ZFREE_STATFREE);
637#ifdef UMA_DEBUG
638                printf("%s: Returning %d bytes.\n",
639                    keg->uk_name, UMA_SLAB_SIZE * keg->uk_ppera);
640#endif
641                keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
642        }
643}
644
645static void
646zone_drain_wait(uma_zone_t zone, int waitok)
647{
648
649        /*
650         * Set draining to interlock with zone_dtor() so we can release our
651         * locks as we go.  Only dtor() should do a WAITOK call since it
652         * is the only call that knows the structure will still be available
653         * when it wakes up.
654         */
655        ZONE_LOCK(zone);
656        while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
657                if (waitok == M_NOWAIT)
658                        goto out;
659                mtx_unlock(&uma_mtx);
660                msleep(zone, zone->uz_lock, PVM, "zonedrain", 1);
661                mtx_lock(&uma_mtx);
662        }
663        zone->uz_flags |= UMA_ZFLAG_DRAINING;
664        bucket_cache_drain(zone);
665        ZONE_UNLOCK(zone);
666        /*
667         * The DRAINING flag protects us from being freed while
668         * we're running.  Normally the uma_mtx would protect us but we
669         * must be able to release and acquire the right lock for each keg.
670         */
671        zone_foreach_keg(zone, &keg_drain);
672        ZONE_LOCK(zone);
673        zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
674        wakeup(zone);
675out:
676        ZONE_UNLOCK(zone);
677}
678
679void
680zone_drain(uma_zone_t zone)
681{
682
683        zone_drain_wait(zone, M_NOWAIT);
684}
685
686/*
687 * Allocate a new slab for a keg.  This does not insert the slab onto a list.
688 *
689 * Arguments:
690 *      wait  Shall we wait?
691 *
692 * Returns:
693 *      The slab that was allocated or NULL if there is no memory and the
694 *      caller specified M_NOWAIT.
695 */
696static uma_slab_t
697keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
698{
699        uma_slabrefcnt_t slabref;
700        uma_alloc allocf;
701        uma_slab_t slab;
702        u_int8_t *mem;
703        u_int8_t flags;
704        int i;
705
706        mtx_assert(&keg->uk_lock, MA_OWNED);
707        slab = NULL;
708
709#ifdef UMA_DEBUG
710        printf("slab_zalloc:  Allocating a new slab for %s\n", keg->uk_name);
711#endif
712        allocf = keg->uk_allocf;
713        KEG_UNLOCK(keg);
714
715        if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
716                slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
717                if (slab == NULL) {
718                        KEG_LOCK(keg);
719                        return NULL;
720                }
721        }
722
723        /*
724         * This reproduces the old vm_zone behavior of zero filling pages the
725         * first time they are added to a zone.
726         *
727         * Malloced items are zeroed in uma_zalloc.
728         */
729
730        if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
731                wait |= M_ZERO;
732        else
733                wait &= ~M_ZERO;
734
735        /* zone is passed for legacy reasons. */
736        mem = allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait);
737        if (mem == NULL) {
738                if (keg->uk_flags & UMA_ZONE_OFFPAGE)
739                        zone_free_item(keg->uk_slabzone, slab, NULL,
740                            SKIP_NONE, ZFREE_STATFREE);
741                KEG_LOCK(keg);
742                return (NULL);
743        }
744
745        /* Point the slab into the allocated memory */
746        if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
747                slab = (uma_slab_t )(mem + keg->uk_pgoff);
748
749        slab->us_keg = keg;
750        slab->us_data = mem;
751        slab->us_freecount = keg->uk_ipers;
752        slab->us_firstfree = 0;
753        slab->us_flags = flags;
754
755        if (keg->uk_flags & UMA_ZONE_REFCNT) {
756                slabref = (uma_slabrefcnt_t)slab;
757                for (i = 0; i < keg->uk_ipers; i++) {
758                        slabref->us_freelist[i].us_refcnt = 0;
759                        slabref->us_freelist[i].us_item = i+1;
760                }
761        } else {
762                for (i = 0; i < keg->uk_ipers; i++)
763                        slab->us_freelist[i].us_item = i+1;
764        }
765
766        if (keg->uk_init != NULL) {
767                for (i = 0; i < keg->uk_ipers; i++)
768                        if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
769                            keg->uk_size, wait) != 0)
770                                break;
771                if (i != keg->uk_ipers) {
772                        if (keg->uk_fini != NULL) {
773                                for (i--; i > -1; i--)
774                                        keg->uk_fini(slab->us_data +
775                                            (keg->uk_rsize * i),
776                                            keg->uk_size);
777                        }
778                        if (keg->uk_flags & UMA_ZONE_OFFPAGE)
779                                zone_free_item(keg->uk_slabzone, slab,
780                                    NULL, SKIP_NONE, ZFREE_STATFREE);
781                        keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
782                            flags);
783                        KEG_LOCK(keg);
784                        return (NULL);
785                }
786        }
787        KEG_LOCK(keg);
788
789        if (keg->uk_flags & UMA_ZONE_HASH)
790                UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
791
792        keg->uk_pages += keg->uk_ppera;
793        keg->uk_free += keg->uk_ipers;
794
795        return (slab);
796}
797
798/*
799 * This function is intended to be used early on in place of page_alloc() so
800 * that we may use the boot time page cache to satisfy allocations before
801 * the VM is ready.
802 */
803static void *
804startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
805{
806        uma_keg_t keg;
807        uma_slab_t tmps;
808        int pages, check_pages;
809
810        keg = zone_first_keg(zone);
811        pages = howmany(bytes, PAGE_SIZE);
812        check_pages = pages - 1;
813        KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
814
815        /*
816         * Check our small startup cache to see if it has pages remaining.
817         */
818        mtx_lock(&uma_boot_pages_mtx);
819
820        /* First check if we have enough room. */
821        tmps = LIST_FIRST(&uma_boot_pages);
822        while (tmps != NULL && check_pages-- > 0)
823                tmps = LIST_NEXT(tmps, us_link);
824        if (tmps != NULL) {
825                /*
826                 * It's ok to lose tmps references.  The last one will
827                 * have tmps->us_data pointing to the start address of
828                 * "pages" contiguous pages of memory.
829                 */
830                while (pages-- > 0) {
831                        tmps = LIST_FIRST(&uma_boot_pages);
832                        LIST_REMOVE(tmps, us_link);
833                }
834                mtx_unlock(&uma_boot_pages_mtx);
835                *pflag = tmps->us_flags;
836                return (tmps->us_data);
837        }
838        mtx_unlock(&uma_boot_pages_mtx);
839        if (booted == 0)
840                panic("UMA: Increase vm.boot_pages");
841        /*
842         * Now that we've booted reset these users to their real allocator.
843         */
844#ifdef UMA_MD_SMALL_ALLOC
845        keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
846#else
847        keg->uk_allocf = page_alloc;
848#endif
849        return keg->uk_allocf(zone, bytes, pflag, wait);
850}
851
852/*
853 * Allocates a number of pages from the system
854 *
855 * Arguments:
856 *      bytes  The number of bytes requested
857 *      wait  Shall we wait?
858 *
859 * Returns:
860 *      A pointer to the alloced memory or possibly
861 *      NULL if M_NOWAIT is set.
862 */
863static void *
864page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
865{
866        void *p;        /* Returned page */
867
868        *pflag = UMA_SLAB_KMEM;
869  p = (void *) malloc(bytes, M_TEMP, wait);
870
871        return (p);
872}
873
874/*
875 * Frees a number of pages to the system
876 *
877 * Arguments:
878 *      mem   A pointer to the memory to be freed
879 *      size  The size of the memory being freed
880 *      flags The original p->us_flags field
881 *
882 * Returns:
883 *      Nothing
884 */
885static void
886page_free(void *mem, int size, u_int8_t flags)
887{
888  free( mem, M_TEMP );
889}
890
891/*
892 * Zero fill initializer
893 *
894 * Arguments/Returns follow uma_init specifications
895 */
896static int
897zero_init(void *mem, int size, int flags)
898{
899        bzero(mem, size);
900        return (0);
901}
902
903/*
904 * Finish creating a small uma keg.  This calculates ipers, and the keg size.
905 *
906 * Arguments
907 *      keg  The zone we should initialize
908 *
909 * Returns
910 *      Nothing
911 */
912static void
913keg_small_init(uma_keg_t keg)
914{
915        u_int rsize;
916        u_int memused;
917        u_int wastedspace;
918        u_int shsize;
919
920        KASSERT(keg != NULL, ("Keg is null in keg_small_init"));
921        rsize = keg->uk_size;
922
923        if (rsize < UMA_SMALLEST_UNIT)
924                rsize = UMA_SMALLEST_UNIT;
925        if (rsize & keg->uk_align)
926                rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
927
928        keg->uk_rsize = rsize;
929        keg->uk_ppera = 1;
930
931        if (keg->uk_flags & UMA_ZONE_REFCNT) {
932                rsize += UMA_FRITMREF_SZ;       /* linkage & refcnt */
933                shsize = sizeof(struct uma_slab_refcnt);
934        } else {
935                rsize += UMA_FRITM_SZ;  /* Account for linkage */
936                shsize = sizeof(struct uma_slab);
937        }
938
939        keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
940        KASSERT(keg->uk_ipers != 0, ("keg_small_init: ipers is 0"));
941        memused = keg->uk_ipers * rsize + shsize;
942        wastedspace = UMA_SLAB_SIZE - memused;
943
944        /*
945         * We can't do OFFPAGE if we're internal or if we've been
946         * asked to not go to the VM for buckets.  If we do this we
947         * may end up going to the VM (kmem_map) for slabs which we
948         * do not want to do if we're UMA_ZFLAG_CACHEONLY as a
949         * result of UMA_ZONE_VM, which clearly forbids it.
950         */
951        if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
952            (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
953                return;
954
955        if ((wastedspace >= UMA_MAX_WASTE) &&
956            (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
957                keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
958                KASSERT(keg->uk_ipers <= 255,
959                    ("keg_small_init: keg->uk_ipers too high!"));
960#ifdef UMA_DEBUG
961                printf("UMA decided we need offpage slab headers for "
962                    "keg: %s, calculated wastedspace = %d, "
963                    "maximum wasted space allowed = %d, "
964                    "calculated ipers = %d, "
965                    "new wasted space = %d\n", keg->uk_name, wastedspace,
966                    UMA_MAX_WASTE, keg->uk_ipers,
967                    UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
968#endif
969                keg->uk_flags |= UMA_ZONE_OFFPAGE;
970                if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
971                        keg->uk_flags |= UMA_ZONE_HASH;
972        }
973}
974
975/*
976 * Finish creating a large (> UMA_SLAB_SIZE) uma kegs.  Just give in and do
977 * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
978 * more complicated.
979 *
980 * Arguments
981 *      keg  The keg we should initialize
982 *
983 * Returns
984 *      Nothing
985 */
986static void
987keg_large_init(uma_keg_t keg)
988{
989        int pages;
990
991        KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
992        KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
993            ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
994
995        pages = keg->uk_size / UMA_SLAB_SIZE;
996
997        /* Account for remainder */
998        if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
999                pages++;
1000
1001        keg->uk_ppera = pages;
1002        keg->uk_ipers = 1;
1003        keg->uk_rsize = keg->uk_size;
1004
1005        /* We can't do OFFPAGE if we're internal, bail out here. */
1006        if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
1007                return;
1008
1009        keg->uk_flags |= UMA_ZONE_OFFPAGE;
1010        if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1011                keg->uk_flags |= UMA_ZONE_HASH;
1012}
1013
1014static void
1015keg_cachespread_init(uma_keg_t keg)
1016{
1017        int alignsize;
1018        int trailer;
1019        int pages;
1020        int rsize;
1021
1022        alignsize = keg->uk_align + 1;
1023        rsize = keg->uk_size;
1024        /*
1025         * We want one item to start on every align boundary in a page.  To
1026         * do this we will span pages.  We will also extend the item by the
1027         * size of align if it is an even multiple of align.  Otherwise, it
1028         * would fall on the same boundary every time.
1029         */
1030        if (rsize & keg->uk_align)
1031                rsize = (rsize & ~keg->uk_align) + alignsize;
1032        if ((rsize & alignsize) == 0)
1033                rsize += alignsize;
1034        trailer = rsize - keg->uk_size;
1035        pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
1036        pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
1037        keg->uk_rsize = rsize;
1038        keg->uk_ppera = pages;
1039        keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
1040  //keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
1041        KASSERT(keg->uk_ipers <= uma_max_ipers,
1042            ("keg_small_init: keg->uk_ipers too high(%d) increase max_ipers",
1043            keg->uk_ipers));
1044}
1045
1046/*
1047 * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
1048 * the keg onto the global keg list.
1049 *
1050 * Arguments/Returns follow uma_ctor specifications
1051 *      udata  Actually uma_kctor_args
1052 */
1053static int
1054keg_ctor(void *mem, int size, void *udata, int flags)
1055{
1056        struct uma_kctor_args *arg = udata;
1057        uma_keg_t keg = mem;
1058        uma_zone_t zone;
1059
1060        bzero(keg, size);
1061        keg->uk_size = arg->size;
1062        keg->uk_init = arg->uminit;
1063        keg->uk_fini = arg->fini;
1064        keg->uk_align = arg->align;
1065        keg->uk_free = 0;
1066        keg->uk_pages = 0;
1067        keg->uk_flags = arg->flags;
1068        keg->uk_allocf = page_alloc;
1069        keg->uk_freef = page_free;
1070        keg->uk_recurse = 0;
1071        keg->uk_slabzone = NULL;
1072
1073        /*
1074         * The master zone is passed to us at keg-creation time.
1075         */
1076        zone = arg->zone;
1077        keg->uk_name = zone->uz_name;
1078
1079        if (arg->flags & UMA_ZONE_VM)
1080                keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1081
1082        if (arg->flags & UMA_ZONE_ZINIT)
1083                keg->uk_init = zero_init;
1084
1085  /*if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC)
1086    keg->uk_flags |= UMA_ZONE_VTOSLAB;*/
1087
1088        /*
1089         * The +UMA_FRITM_SZ added to uk_size is to account for the
1090         * linkage that is added to the size in keg_small_init().  If
1091         * we don't account for this here then we may end up in
1092         * keg_small_init() with a calculated 'ipers' of 0.
1093         */
1094        if (keg->uk_flags & UMA_ZONE_REFCNT) {
1095                if (keg->uk_flags & UMA_ZONE_CACHESPREAD)
1096                        keg_cachespread_init(keg);
1097                else if ((keg->uk_size+UMA_FRITMREF_SZ) >
1098                    (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
1099                        keg_large_init(keg);
1100                else
1101                        keg_small_init(keg);
1102        } else {
1103                if (keg->uk_flags & UMA_ZONE_CACHESPREAD)
1104                        keg_cachespread_init(keg);
1105                else if ((keg->uk_size+UMA_FRITM_SZ) >
1106                    (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1107                        keg_large_init(keg);
1108                else
1109                        keg_small_init(keg);
1110        }
1111
1112        if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1113                if (keg->uk_flags & UMA_ZONE_REFCNT)
1114                        keg->uk_slabzone = slabrefzone;
1115                else
1116                        keg->uk_slabzone = slabzone;
1117        }
1118
1119        /*
1120         * If we haven't booted yet we need allocations to go through the
1121         * startup cache until the vm is ready.
1122         */
1123        if (keg->uk_ppera == 1) {
1124#ifdef UMA_MD_SMALL_ALLOC
1125                keg->uk_allocf = uma_small_alloc;
1126                keg->uk_freef = uma_small_free;
1127#endif
1128                if (booted == 0)
1129                        keg->uk_allocf = startup_alloc;
1130        } else if (booted == 0 && (keg->uk_flags & UMA_ZFLAG_INTERNAL))
1131                keg->uk_allocf = startup_alloc;
1132
1133        /*
1134         * Initialize keg's lock (shared among zones).
1135         */
1136        if (arg->flags & UMA_ZONE_MTXCLASS)
1137                KEG_LOCK_INIT(keg, 1);
1138        else
1139                KEG_LOCK_INIT(keg, 0);
1140
1141        /*
1142         * If we're putting the slab header in the actual page we need to
1143         * figure out where in each page it goes.  This calculates a right
1144         * justified offset into the memory on an ALIGN_PTR boundary.
1145         */
1146        if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1147                u_int totsize;
1148
1149                /* Size of the slab struct and free list */
1150                if (keg->uk_flags & UMA_ZONE_REFCNT)
1151                        totsize = sizeof(struct uma_slab_refcnt) +
1152                            keg->uk_ipers * UMA_FRITMREF_SZ;
1153                else
1154                        totsize = sizeof(struct uma_slab) +
1155                            keg->uk_ipers * UMA_FRITM_SZ;
1156
1157                if (totsize & UMA_ALIGN_PTR)
1158                        totsize = (totsize & ~UMA_ALIGN_PTR) +
1159                            (UMA_ALIGN_PTR + 1);
1160                keg->uk_pgoff = (UMA_SLAB_SIZE * keg->uk_ppera) - totsize;
1161
1162                if (keg->uk_flags & UMA_ZONE_REFCNT)
1163                        totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
1164                            + keg->uk_ipers * UMA_FRITMREF_SZ;
1165                else
1166                        totsize = keg->uk_pgoff + sizeof(struct uma_slab)
1167                            + keg->uk_ipers * UMA_FRITM_SZ;
1168
1169                /*
1170                 * The only way the following is possible is if with our
1171                 * UMA_ALIGN_PTR adjustments we are now bigger than
1172                 * UMA_SLAB_SIZE.  I haven't checked whether this is
1173                 * mathematically possible for all cases, so we make
1174                 * sure here anyway.
1175                 */
1176                if (totsize > UMA_SLAB_SIZE * keg->uk_ppera) {
1177                        printf("zone %s ipers %d rsize %d size %d\n",
1178                            zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1179                            keg->uk_size);
1180                        panic("UMA slab won't fit.");
1181                }
1182        }
1183
1184        if (keg->uk_flags & UMA_ZONE_HASH)
1185                hash_alloc(&keg->uk_hash);
1186
1187#ifdef UMA_DEBUG
1188        printf("UMA: %s(%p) size %d(%d) flags %d ipers %d ppera %d out %d free %d\n",
1189            zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
1190            keg->uk_ipers, keg->uk_ppera,
1191            (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
1192#endif
1193
1194        LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1195
1196        mtx_lock(&uma_mtx);
1197        LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1198        mtx_unlock(&uma_mtx);
1199        return (0);
1200}
1201
1202/*
1203 * Zone header ctor.  This initializes all fields, locks, etc.
1204 *
1205 * Arguments/Returns follow uma_ctor specifications
1206 *      udata  Actually uma_zctor_args
1207 */
1208static int
1209zone_ctor(void *mem, int size, void *udata, int flags)
1210{
1211        struct uma_zctor_args *arg = udata;
1212        uma_zone_t zone = mem;
1213        uma_zone_t z;
1214        uma_keg_t keg;
1215
1216        bzero(zone, size);
1217        zone->uz_name = arg->name;
1218        zone->uz_ctor = arg->ctor;
1219        zone->uz_dtor = arg->dtor;
1220        zone->uz_slab = zone_fetch_slab;
1221        zone->uz_init = NULL;
1222        zone->uz_fini = NULL;
1223        zone->uz_allocs = 0;
1224        zone->uz_frees = 0;
1225        zone->uz_fails = 0;
1226        zone->uz_fills = zone->uz_count = 0;
1227        zone->uz_flags = 0;
1228        keg = arg->keg;
1229
1230        if (arg->flags & UMA_ZONE_SECONDARY) {
1231                KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1232                zone->uz_init = arg->uminit;
1233                zone->uz_fini = arg->fini;
1234                zone->uz_lock = &keg->uk_lock;
1235                zone->uz_flags |= UMA_ZONE_SECONDARY;
1236                mtx_lock(&uma_mtx);
1237                ZONE_LOCK(zone);
1238                LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1239                        if (LIST_NEXT(z, uz_link) == NULL) {
1240                                LIST_INSERT_AFTER(z, zone, uz_link);
1241                                break;
1242                        }
1243                }
1244                ZONE_UNLOCK(zone);
1245                mtx_unlock(&uma_mtx);
1246        } else if (keg == NULL) {
1247                if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1248                    arg->align, arg->flags)) == NULL)
1249                        return (ENOMEM);
1250        } else {
1251                struct uma_kctor_args karg;
1252                int error;
1253
1254                /* We should only be here from uma_startup() */
1255                karg.size = arg->size;
1256                karg.uminit = arg->uminit;
1257                karg.fini = arg->fini;
1258                karg.align = arg->align;
1259                karg.flags = arg->flags;
1260                karg.zone = zone;
1261                error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1262                    flags);
1263                if (error)
1264                        return (error);
1265        }
1266        /*
1267         * Link in the first keg.
1268         */
1269        zone->uz_klink.kl_keg = keg;
1270        LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
1271        zone->uz_lock = &keg->uk_lock;
1272        zone->uz_size = keg->uk_size;
1273        zone->uz_flags |= (keg->uk_flags &
1274            (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
1275
1276        /*
1277         * Some internal zones don't have room allocated for the per cpu
1278         * caches.  If we're internal, bail out here.
1279         */
1280        if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1281                KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
1282                    ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1283                return (0);
1284        }
1285
1286        if (keg->uk_flags & UMA_ZONE_MAXBUCKET)
1287                zone->uz_count = BUCKET_MAX;
1288        else if (keg->uk_ipers <= BUCKET_MAX)
1289                zone->uz_count = keg->uk_ipers;
1290        else
1291                zone->uz_count = BUCKET_MAX;
1292        return (0);
1293}
1294
1295/*
1296 * Keg header dtor.  This frees all data, destroys locks, frees the hash
1297 * table and removes the keg from the global list.
1298 *
1299 * Arguments/Returns follow uma_dtor specifications
1300 *      udata  unused
1301 */
1302static void
1303keg_dtor(void *arg, int size, void *udata)
1304{
1305        uma_keg_t keg;
1306
1307        keg = (uma_keg_t)arg;
1308        KEG_LOCK(keg);
1309        if (keg->uk_free != 0) {
1310                printf("Freed UMA keg was not empty (%d items). "
1311                    " Lost %d pages of memory.\n",
1312                    keg->uk_free, keg->uk_pages);
1313        }
1314        KEG_UNLOCK(keg);
1315
1316        hash_free(&keg->uk_hash);
1317
1318        KEG_LOCK_FINI(keg);
1319}
1320
1321/*
1322 * Zone header dtor.
1323 *
1324 * Arguments/Returns follow uma_dtor specifications
1325 *      udata  unused
1326 */
1327static void
1328zone_dtor(void *arg, int size, void *udata)
1329{
1330        uma_klink_t klink;
1331        uma_zone_t zone;
1332        uma_keg_t keg;
1333
1334        zone = (uma_zone_t)arg;
1335        keg = zone_first_keg(zone);
1336
1337        if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
1338                cache_drain(zone);
1339
1340        mtx_lock(&uma_mtx);
1341        LIST_REMOVE(zone, uz_link);
1342        mtx_unlock(&uma_mtx);
1343        /*
1344         * XXX there are some races here where
1345         * the zone can be drained but zone lock
1346         * released and then refilled before we
1347         * remove it... we dont care for now
1348         */
1349        zone_drain_wait(zone, M_WAITOK);
1350        /*
1351         * Unlink all of our kegs.
1352         */
1353        while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
1354                klink->kl_keg = NULL;
1355                LIST_REMOVE(klink, kl_link);
1356                if (klink == &zone->uz_klink)
1357                        continue;
1358                free(klink, M_TEMP);
1359        }
1360        /*
1361         * We only destroy kegs from non secondary zones.
1362         */
1363        if ((zone->uz_flags & UMA_ZONE_SECONDARY) == 0)  {
1364                mtx_lock(&uma_mtx);
1365                LIST_REMOVE(keg, uk_link);
1366                mtx_unlock(&uma_mtx);
1367                zone_free_item(kegs, keg, NULL, SKIP_NONE,
1368                    ZFREE_STATFREE);
1369        }
1370}
1371
1372/*
1373 * Traverses every zone in the system and calls a callback
1374 *
1375 * Arguments:
1376 *      zfunc  A pointer to a function which accepts a zone
1377 *              as an argument.
1378 *
1379 * Returns:
1380 *      Nothing
1381 */
1382static void
1383zone_foreach(void (*zfunc)(uma_zone_t))
1384{
1385        uma_keg_t keg;
1386        uma_zone_t zone;
1387
1388        mtx_lock(&uma_mtx);
1389        LIST_FOREACH(keg, &uma_kegs, uk_link) {
1390                LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1391                        zfunc(zone);
1392        }
1393        mtx_unlock(&uma_mtx);
1394}
1395
1396/* Public functions */
1397/* See uma.h */
1398void
1399uma_startup(void *bootmem, int boot_pages)
1400{
1401        struct uma_zctor_args args;
1402        uma_slab_t slab;
1403        u_int slabsize;
1404        u_int objsize, totsize, wsize;
1405        int i;
1406
1407#ifdef UMA_DEBUG
1408        printf("Creating uma keg headers zone and keg.\n");
1409#endif
1410        mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1411
1412        /*
1413         * Figure out the maximum number of items-per-slab we'll have if
1414         * we're using the OFFPAGE slab header to track free items, given
1415         * all possible object sizes and the maximum desired wastage
1416         * (UMA_MAX_WASTE).
1417         *
1418         * We iterate until we find an object size for
1419         * which the calculated wastage in keg_small_init() will be
1420         * enough to warrant OFFPAGE.  Since wastedspace versus objsize
1421         * is an overall increasing see-saw function, we find the smallest
1422         * objsize such that the wastage is always acceptable for objects
1423         * with that objsize or smaller.  Since a smaller objsize always
1424         * generates a larger possible uma_max_ipers, we use this computed
1425         * objsize to calculate the largest ipers possible.  Since the
1426         * ipers calculated for OFFPAGE slab headers is always larger than
1427         * the ipers initially calculated in keg_small_init(), we use
1428         * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
1429         * obtain the maximum ipers possible for offpage slab headers.
1430         *
1431         * It should be noted that ipers versus objsize is an inversly
1432         * proportional function which drops off rather quickly so as
1433         * long as our UMA_MAX_WASTE is such that the objsize we calculate
1434         * falls into the portion of the inverse relation AFTER the steep
1435         * falloff, then uma_max_ipers shouldn't be too high (~10 on i386).
1436         *
1437         * Note that we have 8-bits (1 byte) to use as a freelist index
1438         * inside the actual slab header itself and this is enough to
1439         * accomodate us.  In the worst case, a UMA_SMALLEST_UNIT sized
1440         * object with offpage slab header would have ipers =
1441         * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is
1442         * 1 greater than what our byte-integer freelist index can
1443         * accomodate, but we know that this situation never occurs as
1444         * for UMA_SMALLEST_UNIT-sized objects, we will never calculate
1445         * that we need to go to offpage slab headers.  Or, if we do,
1446         * then we trap that condition below and panic in the INVARIANTS case.
1447         */
1448        wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
1449        totsize = wsize;
1450        objsize = UMA_SMALLEST_UNIT;
1451        while (totsize >= wsize) {
1452                totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) /
1453                    (objsize + UMA_FRITM_SZ);
1454                totsize *= (UMA_FRITM_SZ + objsize);
1455                objsize++;
1456        }
1457        if (objsize > UMA_SMALLEST_UNIT)
1458                objsize--;
1459        uma_max_ipers = MAX(UMA_SLAB_SIZE / objsize, 64);
1460
1461        wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
1462        totsize = wsize;
1463        objsize = UMA_SMALLEST_UNIT;
1464        while (totsize >= wsize) {
1465                totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) /
1466                    (objsize + UMA_FRITMREF_SZ);
1467                totsize *= (UMA_FRITMREF_SZ + objsize);
1468                objsize++;
1469        }
1470        if (objsize > UMA_SMALLEST_UNIT)
1471                objsize--;
1472        uma_max_ipers_ref = MAX(UMA_SLAB_SIZE / objsize, 64);
1473
1474        KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255),
1475            ("uma_startup: calculated uma_max_ipers values too large!"));
1476
1477#ifdef UMA_DEBUG
1478        printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers);
1479        printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n",
1480            uma_max_ipers_ref);
1481#endif
1482
1483        /* "manually" create the initial zone */
1484        args.name = "UMA Kegs";
1485        args.size = sizeof(struct uma_keg);
1486        args.ctor = keg_ctor;
1487        args.dtor = keg_dtor;
1488        args.uminit = zero_init;
1489        args.fini = NULL;
1490        args.keg = &masterkeg;
1491        args.align = 32 - 1;
1492        args.flags = UMA_ZFLAG_INTERNAL;
1493        /* The initial zone has no Per cpu queues so it's smaller */
1494        zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1495
1496#ifdef UMA_DEBUG
1497        printf("Filling boot free list.\n");
1498#endif
1499        for (i = 0; i < boot_pages; i++) {
1500                slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1501                slab->us_data = (u_int8_t *)slab;
1502                slab->us_flags = UMA_SLAB_BOOT;
1503                LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1504        }
1505        mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
1506
1507#ifdef UMA_DEBUG
1508        printf("Creating uma zone headers zone and keg.\n");
1509#endif
1510        args.name = "UMA Zones";
1511        args.size = sizeof(struct uma_zone) +
1512            (sizeof(struct uma_cache) * (mp_maxid + 1));
1513        args.ctor = zone_ctor;
1514        args.dtor = zone_dtor;
1515        args.uminit = zero_init;
1516        args.fini = NULL;
1517        args.keg = NULL;
1518        args.align = 32 - 1;
1519        args.flags = UMA_ZFLAG_INTERNAL;
1520        /* The initial zone has no Per cpu queues so it's smaller */
1521        zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1522
1523#ifdef UMA_DEBUG
1524        printf("Initializing pcpu cache locks.\n");
1525#endif
1526#ifdef UMA_DEBUG
1527        printf("Creating slab and hash zones.\n");
1528#endif
1529
1530        /*
1531         * This is the max number of free list items we'll have with
1532         * offpage slabs.
1533         */
1534        slabsize = uma_max_ipers * UMA_FRITM_SZ;
1535        slabsize += sizeof(struct uma_slab);
1536
1537        /* Now make a zone for slab headers */
1538        slabzone = uma_zcreate("UMA Slabs",
1539                                slabsize,
1540                                NULL, NULL, NULL, NULL,
1541                                UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1542
1543        /*
1544         * We also create a zone for the bigger slabs with reference
1545         * counts in them, to accomodate UMA_ZONE_REFCNT zones.
1546         */
1547        slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ;
1548        slabsize += sizeof(struct uma_slab_refcnt);
1549        slabrefzone = uma_zcreate("UMA RCntSlabs",
1550                                  slabsize,
1551                                  NULL, NULL, NULL, NULL,
1552                                  UMA_ALIGN_PTR,
1553                                  UMA_ZFLAG_INTERNAL);
1554
1555        hashzone = uma_zcreate("UMA Hash",
1556            sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1557            NULL, NULL, NULL, NULL,
1558            UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1559
1560        bucket_init();
1561
1562#if defined(UMA_MD_SMALL_ALLOC) && !defined(UMA_MD_SMALL_ALLOC_NEEDS_VM)
1563        booted = 1;
1564#endif
1565
1566#ifdef UMA_DEBUG
1567        printf("UMA startup complete.\n");
1568#endif
1569}
1570
1571static uma_keg_t
1572uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1573                int align, u_int32_t flags)
1574{
1575        struct uma_kctor_args args;
1576
1577        args.size = size;
1578        args.uminit = uminit;
1579        args.fini = fini;
1580        args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
1581        args.flags = flags;
1582        args.zone = zone;
1583        return (zone_alloc_item(kegs, &args, M_WAITOK));
1584}
1585
1586/* See uma.h */
1587void
1588uma_set_align(int align)
1589{
1590
1591        if (align != UMA_ALIGN_CACHE)
1592                uma_align_cache = align;
1593}
1594
1595/* See uma.h */
1596uma_zone_t
1597uma_zcreate(char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1598                uma_init uminit, uma_fini fini, int align, u_int32_t flags)
1599
1600{
1601        struct uma_zctor_args args;
1602
1603        /* This stuff is essential for the zone ctor */
1604        args.name = name;
1605        args.size = size;
1606        args.ctor = ctor;
1607        args.dtor = dtor;
1608        args.uminit = uminit;
1609        args.fini = fini;
1610        args.align = align;
1611        args.flags = flags;
1612        args.keg = NULL;
1613
1614        return (zone_alloc_item(zones, &args, M_WAITOK));
1615}
1616
1617/* See uma.h */
1618uma_zone_t
1619uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1620                    uma_init zinit, uma_fini zfini, uma_zone_t master)
1621{
1622        struct uma_zctor_args args;
1623        uma_keg_t keg;
1624
1625        keg = zone_first_keg(master);
1626        args.name = name;
1627        args.size = keg->uk_size;
1628        args.ctor = ctor;
1629        args.dtor = dtor;
1630        args.uminit = zinit;
1631        args.fini = zfini;
1632        args.align = keg->uk_align;
1633        args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
1634        args.keg = keg;
1635
1636        /* XXX Attaches only one keg of potentially many. */
1637        return (zone_alloc_item(zones, &args, M_WAITOK));
1638}
1639
1640static void
1641zone_lock_pair(uma_zone_t a, uma_zone_t b)
1642{
1643        if (a < b) {
1644                ZONE_LOCK(a);
1645                mtx_lock_flags(b->uz_lock, MTX_DUPOK);
1646        } else {
1647                ZONE_LOCK(b);
1648                mtx_lock_flags(a->uz_lock, MTX_DUPOK);
1649        }
1650}
1651
1652static void
1653zone_unlock_pair(uma_zone_t a, uma_zone_t b)
1654{
1655
1656        ZONE_UNLOCK(a);
1657        ZONE_UNLOCK(b);
1658}
1659
1660
1661/* See uma.h */
1662void
1663uma_zdestroy(uma_zone_t zone)
1664{
1665
1666        zone_free_item(zones, zone, NULL, SKIP_NONE, ZFREE_STATFREE);
1667}
1668
1669/* See uma.h */
1670void *
1671uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
1672{
1673        void *item;
1674        uma_cache_t cache;
1675        uma_bucket_t bucket;
1676        int cpu;
1677
1678        /* This is the fast path allocation */
1679#ifdef UMA_DEBUG_ALLOC_1
1680        printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
1681#endif
1682        CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
1683            zone->uz_name, flags);
1684
1685        if (flags & M_WAITOK) {
1686                WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
1687                    "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
1688        }
1689
1690        /*
1691         * If possible, allocate from the per-CPU cache.  There are two
1692         * requirements for safe access to the per-CPU cache: (1) the thread
1693         * accessing the cache must not be preempted or yield during access,
1694         * and (2) the thread must not migrate CPUs without switching which
1695         * cache it accesses.  We rely on a critical section to prevent
1696         * preemption and migration.  We release the critical section in
1697         * order to acquire the zone mutex if we are unable to allocate from
1698         * the current cache; when we re-acquire the critical section, we
1699         * must detect and handle migration if it has occurred.
1700         */
1701zalloc_restart:
1702        critical_enter();
1703        cpu = curcpu;
1704        cache = &zone->uz_cpu[cpu];
1705
1706zalloc_start:
1707        bucket = cache->uc_allocbucket;
1708
1709        if (bucket) {
1710                if (bucket->ub_cnt > 0) {
1711                        bucket->ub_cnt--;
1712                        item = bucket->ub_bucket[bucket->ub_cnt];
1713#ifdef INVARIANTS
1714                        bucket->ub_bucket[bucket->ub_cnt] = NULL;
1715#endif
1716                        KASSERT(item != NULL,
1717                            ("uma_zalloc: Bucket pointer mangled."));
1718                        cache->uc_allocs++;
1719                        critical_exit();
1720#ifdef INVARIANTS
1721                        ZONE_LOCK(zone);
1722                        uma_dbg_alloc(zone, NULL, item);
1723                        ZONE_UNLOCK(zone);
1724#endif
1725                        if (zone->uz_ctor != NULL) {
1726                                if (zone->uz_ctor(item, zone->uz_size,
1727                                    udata, flags) != 0) {
1728                                        zone_free_item(zone, item, udata,
1729                                            SKIP_DTOR, ZFREE_STATFAIL |
1730                                            ZFREE_STATFREE);
1731                                        return (NULL);
1732                                }
1733                        }
1734                        if (flags & M_ZERO)
1735                                bzero(item, zone->uz_size);
1736                        return (item);
1737                } else if (cache->uc_freebucket) {
1738                        /*
1739                         * We have run out of items in our allocbucket.
1740                         * See if we can switch with our free bucket.
1741                         */
1742                        if (cache->uc_freebucket->ub_cnt > 0) {
1743#ifdef UMA_DEBUG_ALLOC
1744                                printf("uma_zalloc: Swapping empty with"
1745                                    " alloc.\n");
1746#endif
1747                                bucket = cache->uc_freebucket;
1748                                cache->uc_freebucket = cache->uc_allocbucket;
1749                                cache->uc_allocbucket = bucket;
1750
1751                                goto zalloc_start;
1752                        }
1753                }
1754        }
1755        /*
1756         * Attempt to retrieve the item from the per-CPU cache has failed, so
1757         * we must go back to the zone.  This requires the zone lock, so we
1758         * must drop the critical section, then re-acquire it when we go back
1759         * to the cache.  Since the critical section is released, we may be
1760         * preempted or migrate.  As such, make sure not to maintain any
1761         * thread-local state specific to the cache from prior to releasing
1762         * the critical section.
1763         */
1764        critical_exit();
1765        ZONE_LOCK(zone);
1766        critical_enter();
1767        cpu = curcpu;
1768        cache = &zone->uz_cpu[cpu];
1769        bucket = cache->uc_allocbucket;
1770        if (bucket != NULL) {
1771                if (bucket->ub_cnt > 0) {
1772                        ZONE_UNLOCK(zone);
1773                        goto zalloc_start;
1774                }
1775                bucket = cache->uc_freebucket;
1776                if (bucket != NULL && bucket->ub_cnt > 0) {
1777                        ZONE_UNLOCK(zone);
1778                        goto zalloc_start;
1779                }
1780        }
1781
1782        /* Since we have locked the zone we may as well send back our stats */
1783        zone->uz_allocs += cache->uc_allocs;
1784        cache->uc_allocs = 0;
1785        zone->uz_frees += cache->uc_frees;
1786        cache->uc_frees = 0;
1787
1788        /* Our old one is now a free bucket */
1789        if (cache->uc_allocbucket) {
1790                KASSERT(cache->uc_allocbucket->ub_cnt == 0,
1791                    ("uma_zalloc_arg: Freeing a non free bucket."));
1792                LIST_INSERT_HEAD(&zone->uz_free_bucket,
1793                    cache->uc_allocbucket, ub_link);
1794                cache->uc_allocbucket = NULL;
1795        }
1796
1797        /* Check the free list for a new alloc bucket */
1798        if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
1799                KASSERT(bucket->ub_cnt != 0,
1800                    ("uma_zalloc_arg: Returning an empty bucket."));
1801
1802                LIST_REMOVE(bucket, ub_link);
1803                cache->uc_allocbucket = bucket;
1804                ZONE_UNLOCK(zone);
1805                goto zalloc_start;
1806        }
1807        /* We are no longer associated with this CPU. */
1808        critical_exit();
1809
1810        /* Bump up our uz_count so we get here less */
1811        if (zone->uz_count < BUCKET_MAX)
1812                zone->uz_count++;
1813
1814        /*
1815         * Now lets just fill a bucket and put it on the free list.  If that
1816         * works we'll restart the allocation from the begining.
1817         */
1818        if (zone_alloc_bucket(zone, flags)) {
1819                ZONE_UNLOCK(zone);
1820                goto zalloc_restart;
1821        }
1822        ZONE_UNLOCK(zone);
1823        /*
1824         * We may not be able to get a bucket so return an actual item.
1825         */
1826#ifdef UMA_DEBUG
1827        printf("uma_zalloc_arg: Bucketzone returned NULL\n");
1828#endif
1829
1830        item = zone_alloc_item(zone, udata, flags);
1831        return (item);
1832}
1833
1834static uma_slab_t
1835keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
1836{
1837        uma_slab_t slab;
1838
1839        mtx_assert(&keg->uk_lock, MA_OWNED);
1840        slab = NULL;
1841
1842        for (;;) {
1843                /*
1844                 * Find a slab with some space.  Prefer slabs that are partially
1845                 * used over those that are totally full.  This helps to reduce
1846                 * fragmentation.
1847                 */
1848                if (keg->uk_free != 0) {
1849                        if (!LIST_EMPTY(&keg->uk_part_slab)) {
1850                                slab = LIST_FIRST(&keg->uk_part_slab);
1851                        } else {
1852                                slab = LIST_FIRST(&keg->uk_free_slab);
1853                                LIST_REMOVE(slab, us_link);
1854                                LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
1855                                    us_link);
1856                        }
1857                        MPASS(slab->us_keg == keg);
1858                        return (slab);
1859                }
1860
1861                /*
1862                 * M_NOVM means don't ask at all!
1863                 */
1864                if (flags & M_NOVM)
1865                        break;
1866
1867                if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
1868                        keg->uk_flags |= UMA_ZFLAG_FULL;
1869                        /*
1870                         * If this is not a multi-zone, set the FULL bit.
1871                         * Otherwise slab_multi() takes care of it.
1872                         */
1873                        if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0)
1874                                zone->uz_flags |= UMA_ZFLAG_FULL;
1875                        if (flags & M_NOWAIT)
1876                                break;
1877                        msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
1878                        continue;
1879                }
1880                keg->uk_recurse++;
1881                slab = keg_alloc_slab(keg, zone, flags);
1882                keg->uk_recurse--;
1883                /*
1884                 * If we got a slab here it's safe to mark it partially used
1885                 * and return.  We assume that the caller is going to remove
1886                 * at least one item.
1887                 */
1888                if (slab) {
1889                        MPASS(slab->us_keg == keg);
1890                        LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
1891                        return (slab);
1892                }
1893                /*
1894                 * We might not have been able to get a slab but another cpu
1895                 * could have while we were unlocked.  Check again before we
1896                 * fail.
1897                 */
1898                flags |= M_NOVM;
1899        }
1900        return (slab);
1901}
1902
1903static inline void
1904zone_relock(uma_zone_t zone, uma_keg_t keg)
1905{
1906        if (zone->uz_lock != &keg->uk_lock) {
1907                KEG_UNLOCK(keg);
1908                ZONE_LOCK(zone);
1909        }
1910}
1911
1912static inline void
1913keg_relock(uma_keg_t keg, uma_zone_t zone)
1914{
1915        if (zone->uz_lock != &keg->uk_lock) {
1916                ZONE_UNLOCK(zone);
1917                KEG_LOCK(keg);
1918        }
1919}
1920
1921static uma_slab_t
1922zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
1923{
1924        uma_slab_t slab;
1925
1926        if (keg == NULL)
1927                keg = zone_first_keg(zone);
1928        /*
1929         * This is to prevent us from recursively trying to allocate
1930         * buckets.  The problem is that if an allocation forces us to
1931         * grab a new bucket we will call page_alloc, which will go off
1932         * and cause the vm to allocate vm_map_entries.  If we need new
1933         * buckets there too we will recurse in kmem_alloc and bad
1934         * things happen.  So instead we return a NULL bucket, and make
1935         * the code that allocates buckets smart enough to deal with it
1936         */
1937        if (keg->uk_flags & UMA_ZFLAG_BUCKET && keg->uk_recurse != 0)
1938                return (NULL);
1939
1940        for (;;) {
1941                slab = keg_fetch_slab(keg, zone, flags);
1942                if (slab)
1943                        return (slab);
1944                if (flags & (M_NOWAIT | M_NOVM))
1945                        break;
1946        }
1947        return (NULL);
1948}
1949
1950/*
1951 * uma_zone_fetch_slab_multi:  Fetches a slab from one available keg.  Returns
1952 * with the keg locked.  Caller must call zone_relock() afterwards if the
1953 * zone lock is required.  On NULL the zone lock is held.
1954 *
1955 * The last pointer is used to seed the search.  It is not required.
1956 */
1957static uma_slab_t
1958zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
1959{
1960        uma_klink_t klink;
1961        uma_slab_t slab;
1962        uma_keg_t keg;
1963        int flags;
1964        int empty;
1965        int full;
1966
1967        /*
1968         * Don't wait on the first pass.  This will skip limit tests
1969         * as well.  We don't want to block if we can find a provider
1970         * without blocking.
1971         */
1972        flags = (rflags & ~M_WAITOK) | M_NOWAIT;
1973        /*
1974         * Use the last slab allocated as a hint for where to start
1975         * the search.
1976         */
1977        if (last) {
1978                slab = keg_fetch_slab(last, zone, flags);
1979                if (slab)
1980                        return (slab);
1981                zone_relock(zone, last);
1982                last = NULL;
1983        }
1984        /*
1985         * Loop until we have a slab incase of transient failures
1986         * while M_WAITOK is specified.  I'm not sure this is 100%
1987         * required but we've done it for so long now.
1988         */
1989        for (;;) {
1990                empty = 0;
1991                full = 0;
1992                /*
1993                 * Search the available kegs for slabs.  Be careful to hold the
1994                 * correct lock while calling into the keg layer.
1995                 */
1996                LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
1997                        keg = klink->kl_keg;
1998                        keg_relock(keg, zone);
1999                        if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
2000                                slab = keg_fetch_slab(keg, zone, flags);
2001                                if (slab)
2002                                        return (slab);
2003                        }
2004                        if (keg->uk_flags & UMA_ZFLAG_FULL)
2005                                full++;
2006                        else
2007                                empty++;
2008                        zone_relock(zone, keg);
2009                }
2010                if (rflags & (M_NOWAIT | M_NOVM))
2011                        break;
2012                flags = rflags;
2013                /*
2014                 * All kegs are full.  XXX We can't atomically check all kegs
2015                 * and sleep so just sleep for a short period and retry.
2016                 */
2017                if (full && !empty) {
2018                        zone->uz_flags |= UMA_ZFLAG_FULL;
2019                        msleep(zone, zone->uz_lock, PVM, "zonelimit", hz/100);
2020                        zone->uz_flags &= ~UMA_ZFLAG_FULL;
2021                        continue;
2022                }
2023        }
2024        return (NULL);
2025}
2026
2027static void *
2028slab_alloc_item(uma_zone_t zone, uma_slab_t slab)
2029{
2030        uma_keg_t keg;
2031        uma_slabrefcnt_t slabref;
2032        void *item;
2033        u_int8_t freei;
2034
2035        keg = slab->us_keg;
2036        mtx_assert(&keg->uk_lock, MA_OWNED);
2037
2038        freei = slab->us_firstfree;
2039        if (keg->uk_flags & UMA_ZONE_REFCNT) {
2040                slabref = (uma_slabrefcnt_t)slab;
2041                slab->us_firstfree = slabref->us_freelist[freei].us_item;
2042        } else {
2043                slab->us_firstfree = slab->us_freelist[freei].us_item;
2044        }
2045        item = slab->us_data + (keg->uk_rsize * freei);
2046
2047        slab->us_freecount--;
2048        keg->uk_free--;
2049#ifdef INVARIANTS
2050        uma_dbg_alloc(zone, slab, item);
2051#endif
2052        /* Move this slab to the full list */
2053        if (slab->us_freecount == 0) {
2054                LIST_REMOVE(slab, us_link);
2055                LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2056        }
2057
2058        return (item);
2059}
2060
2061static int
2062zone_alloc_bucket(uma_zone_t zone, int flags)
2063{
2064        uma_bucket_t bucket;
2065        uma_slab_t slab;
2066        uma_keg_t keg;
2067        int16_t saved;
2068        int max, origflags = flags;
2069
2070        /*
2071         * Try this zone's free list first so we don't allocate extra buckets.
2072         */
2073        if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2074                KASSERT(bucket->ub_cnt == 0,
2075                    ("zone_alloc_bucket: Bucket on free list is not empty."));
2076                LIST_REMOVE(bucket, ub_link);
2077        } else {
2078                int bflags;
2079
2080                bflags = (flags & ~M_ZERO);
2081                if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
2082                        bflags |= M_NOVM;
2083
2084                ZONE_UNLOCK(zone);
2085                bucket = bucket_alloc(zone->uz_count, bflags);
2086                ZONE_LOCK(zone);
2087        }
2088
2089        if (bucket == NULL) {
2090                return (0);
2091        }
2092
2093#ifdef SMP
2094        /*
2095         * This code is here to limit the number of simultaneous bucket fills
2096         * for any given zone to the number of per cpu caches in this zone. This
2097         * is done so that we don't allocate more memory than we really need.
2098         */
2099        if (zone->uz_fills >= mp_ncpus)
2100                goto done;
2101
2102#endif
2103        zone->uz_fills++;
2104
2105        max = MIN(bucket->ub_entries, zone->uz_count);
2106        /* Try to keep the buckets totally full */
2107        saved = bucket->ub_cnt;
2108        slab = NULL;
2109        keg = NULL;
2110        while (bucket->ub_cnt < max &&
2111            (slab = zone->uz_slab(zone, keg, flags)) != NULL) {
2112                keg = slab->us_keg;
2113                while (slab->us_freecount && bucket->ub_cnt < max) {
2114                        bucket->ub_bucket[bucket->ub_cnt++] =
2115                            slab_alloc_item(zone, slab);
2116                }
2117
2118                /* Don't block on the next fill */
2119                flags |= M_NOWAIT;
2120        }
2121        if (slab)
2122                zone_relock(zone, keg);
2123
2124        /*
2125         * We unlock here because we need to call the zone's init.
2126         * It should be safe to unlock because the slab dealt with
2127         * above is already on the appropriate list within the keg
2128         * and the bucket we filled is not yet on any list, so we
2129         * own it.
2130         */
2131        if (zone->uz_init != NULL) {
2132                int i;
2133
2134                ZONE_UNLOCK(zone);
2135                for (i = saved; i < bucket->ub_cnt; i++)
2136                        if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
2137                            origflags) != 0)
2138                                break;
2139                /*
2140                 * If we couldn't initialize the whole bucket, put the
2141                 * rest back onto the freelist.
2142                 */
2143                if (i != bucket->ub_cnt) {
2144                        int j;
2145
2146                        for (j = i; j < bucket->ub_cnt; j++) {
2147                                zone_free_item(zone, bucket->ub_bucket[j],
2148                                    NULL, SKIP_FINI, 0);
2149#ifdef INVARIANTS
2150                                bucket->ub_bucket[j] = NULL;
2151#endif
2152                        }
2153                        bucket->ub_cnt = i;
2154                }
2155                ZONE_LOCK(zone);
2156        }
2157
2158        zone->uz_fills--;
2159        if (bucket->ub_cnt != 0) {
2160                LIST_INSERT_HEAD(&zone->uz_full_bucket,
2161                    bucket, ub_link);
2162                return (1);
2163        }
2164#ifdef SMP
2165done:
2166#endif
2167        bucket_free(bucket);
2168
2169        return (0);
2170}
2171/*
2172 * Allocates an item for an internal zone
2173 *
2174 * Arguments
2175 *      zone   The zone to alloc for.
2176 *      udata  The data to be passed to the constructor.
2177 *      flags  M_WAITOK, M_NOWAIT, M_ZERO.
2178 *
2179 * Returns
2180 *      NULL if there is no memory and M_NOWAIT is set
2181 *      An item if successful
2182 */
2183
2184static void *
2185zone_alloc_item(uma_zone_t zone, void *udata, int flags)
2186{
2187        uma_slab_t slab;
2188        void *item;
2189
2190        item = NULL;
2191
2192#ifdef UMA_DEBUG_ALLOC
2193        printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2194#endif
2195        ZONE_LOCK(zone);
2196
2197        slab = zone->uz_slab(zone, NULL, flags);
2198        if (slab == NULL) {
2199                zone->uz_fails++;
2200                ZONE_UNLOCK(zone);
2201                return (NULL);
2202        }
2203
2204        item = slab_alloc_item(zone, slab);
2205
2206        zone_relock(zone, slab->us_keg);
2207        zone->uz_allocs++;
2208        ZONE_UNLOCK(zone);
2209
2210        /*
2211         * We have to call both the zone's init (not the keg's init)
2212         * and the zone's ctor.  This is because the item is going from
2213         * a keg slab directly to the user, and the user is expecting it
2214         * to be both zone-init'd as well as zone-ctor'd.
2215         */
2216        if (zone->uz_init != NULL) {
2217                if (zone->uz_init(item, zone->uz_size, flags) != 0) {
2218                        zone_free_item(zone, item, udata, SKIP_FINI,
2219                            ZFREE_STATFAIL | ZFREE_STATFREE);
2220                        return (NULL);
2221                }
2222        }
2223        if (zone->uz_ctor != NULL) {
2224                if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2225                        zone_free_item(zone, item, udata, SKIP_DTOR,
2226                            ZFREE_STATFAIL | ZFREE_STATFREE);
2227                        return (NULL);
2228                }
2229        }
2230        if (flags & M_ZERO)
2231                bzero(item, zone->uz_size);
2232
2233        return (item);
2234}
2235
2236/* See uma.h */
2237void
2238uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
2239{
2240        uma_cache_t cache;
2241        uma_bucket_t bucket;
2242        int bflags;
2243        int cpu;
2244
2245#ifdef UMA_DEBUG_ALLOC_1
2246        printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2247#endif
2248        CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2249            zone->uz_name);
2250
2251        /* uma_zfree(..., NULL) does nothing, to match free(9). */
2252        if (item == NULL)
2253                return;
2254
2255        if (zone->uz_dtor)
2256                zone->uz_dtor(item, zone->uz_size, udata);
2257
2258#ifdef INVARIANTS
2259        ZONE_LOCK(zone);
2260        if (zone->uz_flags & UMA_ZONE_MALLOC)
2261                uma_dbg_free(zone, udata, item);
2262        else
2263                uma_dbg_free(zone, NULL, item);
2264        ZONE_UNLOCK(zone);
2265#endif
2266        /*
2267         * The race here is acceptable.  If we miss it we'll just have to wait
2268         * a little longer for the limits to be reset.
2269         */
2270        if (zone->uz_flags & UMA_ZFLAG_FULL)
2271                goto zfree_internal;
2272
2273        /*
2274         * If possible, free to the per-CPU cache.  There are two
2275         * requirements for safe access to the per-CPU cache: (1) the thread
2276         * accessing the cache must not be preempted or yield during access,
2277         * and (2) the thread must not migrate CPUs without switching which
2278         * cache it accesses.  We rely on a critical section to prevent
2279         * preemption and migration.  We release the critical section in
2280         * order to acquire the zone mutex if we are unable to free to the
2281         * current cache; when we re-acquire the critical section, we must
2282         * detect and handle migration if it has occurred.
2283         */
2284zfree_restart:
2285        critical_enter();
2286        cpu = curcpu;
2287        cache = &zone->uz_cpu[cpu];
2288
2289zfree_start:
2290        bucket = cache->uc_freebucket;
2291
2292        if (bucket) {
2293                /*
2294                 * Do we have room in our bucket? It is OK for this uz count
2295                 * check to be slightly out of sync.
2296                 */
2297
2298                if (bucket->ub_cnt < bucket->ub_entries) {
2299                        KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2300                            ("uma_zfree: Freeing to non free bucket index."));
2301                        bucket->ub_bucket[bucket->ub_cnt] = item;
2302                        bucket->ub_cnt++;
2303                        cache->uc_frees++;
2304                        critical_exit();
2305                        return;
2306                } else if (cache->uc_allocbucket) {
2307#ifdef UMA_DEBUG_ALLOC
2308                        printf("uma_zfree: Swapping buckets.\n");
2309#endif
2310                        /*
2311                         * We have run out of space in our freebucket.
2312                         * See if we can switch with our alloc bucket.
2313                         */
2314                        if (cache->uc_allocbucket->ub_cnt <
2315                            cache->uc_freebucket->ub_cnt) {
2316                                bucket = cache->uc_freebucket;
2317                                cache->uc_freebucket = cache->uc_allocbucket;
2318                                cache->uc_allocbucket = bucket;
2319                                goto zfree_start;
2320                        }
2321                }
2322        }
2323        /*
2324         * We can get here for two reasons:
2325         *
2326         * 1) The buckets are NULL
2327         * 2) The alloc and free buckets are both somewhat full.
2328         *
2329         * We must go back the zone, which requires acquiring the zone lock,
2330         * which in turn means we must release and re-acquire the critical
2331         * section.  Since the critical section is released, we may be
2332         * preempted or migrate.  As such, make sure not to maintain any
2333         * thread-local state specific to the cache from prior to releasing
2334         * the critical section.
2335         */
2336        critical_exit();
2337        ZONE_LOCK(zone);
2338        critical_enter();
2339        cpu = curcpu;
2340        cache = &zone->uz_cpu[cpu];
2341        if (cache->uc_freebucket != NULL) {
2342                if (cache->uc_freebucket->ub_cnt <
2343                    cache->uc_freebucket->ub_entries) {
2344                        ZONE_UNLOCK(zone);
2345                        goto zfree_start;
2346                }
2347                if (cache->uc_allocbucket != NULL &&
2348                    (cache->uc_allocbucket->ub_cnt <
2349                    cache->uc_freebucket->ub_cnt)) {
2350                        ZONE_UNLOCK(zone);
2351                        goto zfree_start;
2352                }
2353        }
2354
2355        /* Since we have locked the zone we may as well send back our stats */
2356        zone->uz_allocs += cache->uc_allocs;
2357        cache->uc_allocs = 0;
2358        zone->uz_frees += cache->uc_frees;
2359        cache->uc_frees = 0;
2360
2361        bucket = cache->uc_freebucket;
2362        cache->uc_freebucket = NULL;
2363
2364        /* Can we throw this on the zone full list? */
2365        if (bucket != NULL) {
2366#ifdef UMA_DEBUG_ALLOC
2367                printf("uma_zfree: Putting old bucket on the free list.\n");
2368#endif
2369                /* ub_cnt is pointing to the last free item */
2370                KASSERT(bucket->ub_cnt != 0,
2371                    ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2372                LIST_INSERT_HEAD(&zone->uz_full_bucket,
2373                    bucket, ub_link);
2374        }
2375        if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2376                LIST_REMOVE(bucket, ub_link);
2377                ZONE_UNLOCK(zone);
2378                cache->uc_freebucket = bucket;
2379                goto zfree_start;
2380        }
2381        /* We are no longer associated with this CPU. */
2382        critical_exit();
2383
2384        /* And the zone.. */
2385        ZONE_UNLOCK(zone);
2386
2387#ifdef UMA_DEBUG_ALLOC
2388        printf("uma_zfree: Allocating new free bucket.\n");
2389#endif
2390        bflags = M_NOWAIT;
2391
2392        if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
2393                bflags |= M_NOVM;
2394        bucket = bucket_alloc(zone->uz_count, bflags);
2395        if (bucket) {
2396                ZONE_LOCK(zone);
2397                LIST_INSERT_HEAD(&zone->uz_free_bucket,
2398                    bucket, ub_link);
2399                ZONE_UNLOCK(zone);
2400                goto zfree_restart;
2401        }
2402
2403        /*
2404         * If nothing else caught this, we'll just do an internal free.
2405         */
2406zfree_internal:
2407        zone_free_item(zone, item, udata, SKIP_DTOR, ZFREE_STATFREE);
2408
2409        return;
2410}
2411
2412/*
2413 * Frees an item to an INTERNAL zone or allocates a free bucket
2414 *
2415 * Arguments:
2416 *      zone   The zone to free to
2417 *      item   The item we're freeing
2418 *      udata  User supplied data for the dtor
2419 *      skip   Skip dtors and finis
2420 */
2421static void
2422zone_free_item(uma_zone_t zone, void *item, void *udata,
2423    enum zfreeskip skip, int flags)
2424{
2425        uma_slab_t slab;
2426        uma_slabrefcnt_t slabref;
2427        uma_keg_t keg;
2428        u_int8_t *mem;
2429        u_int8_t freei;
2430        int clearfull;
2431
2432        if (skip < SKIP_DTOR && zone->uz_dtor)
2433                zone->uz_dtor(item, zone->uz_size, udata);
2434
2435        if (skip < SKIP_FINI && zone->uz_fini)
2436                zone->uz_fini(item, zone->uz_size);
2437
2438        ZONE_LOCK(zone);
2439
2440        if (flags & ZFREE_STATFAIL)
2441                zone->uz_fails++;
2442        if (flags & ZFREE_STATFREE)
2443                zone->uz_frees++;
2444
2445  if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
2446                mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
2447                keg = zone_first_keg(zone); /* Must only be one. */
2448                if (zone->uz_flags & UMA_ZONE_HASH) {
2449                        slab = hash_sfind(&keg->uk_hash, mem);
2450                } else {
2451                        mem += keg->uk_pgoff;
2452                        slab = (uma_slab_t)mem;
2453                }
2454        } else {
2455    panic("uma virtual memory not supported!" );
2456        }
2457        MPASS(keg == slab->us_keg);
2458
2459        /* Do we need to remove from any lists? */
2460        if (slab->us_freecount+1 == keg->uk_ipers) {
2461                LIST_REMOVE(slab, us_link);
2462                LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2463        } else if (slab->us_freecount == 0) {
2464                LIST_REMOVE(slab, us_link);
2465                LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2466        }
2467
2468        /* Slab management stuff */
2469        freei = ((unsigned long)item - (unsigned long)slab->us_data)
2470                / keg->uk_rsize;
2471
2472#ifdef INVARIANTS
2473        if (!skip)
2474                uma_dbg_free(zone, slab, item);
2475#endif
2476
2477        if (keg->uk_flags & UMA_ZONE_REFCNT) {
2478                slabref = (uma_slabrefcnt_t)slab;
2479                slabref->us_freelist[freei].us_item = slab->us_firstfree;
2480        } else {
2481                slab->us_freelist[freei].us_item = slab->us_firstfree;
2482        }
2483        slab->us_firstfree = freei;
2484        slab->us_freecount++;
2485
2486        /* Zone statistics */
2487        keg->uk_free++;
2488
2489        clearfull = 0;
2490        if (keg->uk_flags & UMA_ZFLAG_FULL) {
2491                if (keg->uk_pages < keg->uk_maxpages) {
2492                        keg->uk_flags &= ~UMA_ZFLAG_FULL;
2493                        clearfull = 1;
2494                }
2495
2496                /*
2497                 * We can handle one more allocation. Since we're clearing ZFLAG_FULL,
2498                 * wake up all procs blocked on pages. This should be uncommon, so
2499                 * keeping this simple for now (rather than adding count of blocked
2500                 * threads etc).
2501                 */
2502                wakeup(keg);
2503        }
2504        if (clearfull) {
2505                zone_relock(zone, keg);
2506                zone->uz_flags &= ~UMA_ZFLAG_FULL;
2507                wakeup(zone);
2508                ZONE_UNLOCK(zone);
2509        } else
2510                KEG_UNLOCK(keg);
2511}
2512
2513/* See uma.h */
2514void
2515uma_zone_set_max(uma_zone_t zone, int nitems)
2516{
2517        uma_keg_t keg;
2518
2519        ZONE_LOCK(zone);
2520        keg = zone_first_keg(zone);
2521        keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
2522        if (keg->uk_maxpages * keg->uk_ipers < nitems)
2523                keg->uk_maxpages += keg->uk_ppera;
2524
2525        ZONE_UNLOCK(zone);
2526}
2527
2528/* See uma.h */
2529int
2530uma_zone_get_max(uma_zone_t zone)
2531{
2532        int nitems;
2533        uma_keg_t keg;
2534
2535        ZONE_LOCK(zone);
2536        keg = zone_first_keg(zone);
2537        nitems = keg->uk_maxpages * keg->uk_ipers;
2538        ZONE_UNLOCK(zone);
2539
2540        return (nitems);
2541}
2542
2543/* See uma.h */
2544int
2545uma_zone_get_cur(uma_zone_t zone)
2546{
2547        int64_t nitems;
2548        u_int i;
2549
2550        ZONE_LOCK(zone);
2551        nitems = zone->uz_allocs - zone->uz_frees;
2552        CPU_FOREACH(i) {
2553                /*
2554                 * See the comment in sysctl_vm_zone_stats() regarding the
2555                 * safety of accessing the per-cpu caches. With the zone lock
2556                 * held, it is safe, but can potentially result in stale data.
2557                 */
2558                nitems += zone->uz_cpu[i].uc_allocs -
2559                    zone->uz_cpu[i].uc_frees;
2560        }
2561        ZONE_UNLOCK(zone);
2562
2563        return (nitems < 0 ? 0 : nitems);
2564}
2565
2566/* See uma.h */
2567void
2568uma_zone_set_init(uma_zone_t zone, uma_init uminit)
2569{
2570        uma_keg_t keg;
2571
2572        ZONE_LOCK(zone);
2573        keg = zone_first_keg(zone);
2574        KASSERT(keg->uk_pages == 0,
2575            ("uma_zone_set_init on non-empty keg"));
2576        keg->uk_init = uminit;
2577        ZONE_UNLOCK(zone);
2578}
2579
2580/* See uma.h */
2581void
2582uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
2583{
2584        uma_keg_t keg;
2585
2586        ZONE_LOCK(zone);
2587        keg = zone_first_keg(zone);
2588        KASSERT(keg->uk_pages == 0,
2589            ("uma_zone_set_fini on non-empty keg"));
2590        keg->uk_fini = fini;
2591        ZONE_UNLOCK(zone);
2592}
2593
2594/* See uma.h */
2595void
2596uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
2597{
2598        ZONE_LOCK(zone);
2599        KASSERT(zone_first_keg(zone)->uk_pages == 0,
2600            ("uma_zone_set_zinit on non-empty keg"));
2601        zone->uz_init = zinit;
2602        ZONE_UNLOCK(zone);
2603}
2604
2605/* See uma.h */
2606void
2607uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
2608{
2609        ZONE_LOCK(zone);
2610        KASSERT(zone_first_keg(zone)->uk_pages == 0,
2611            ("uma_zone_set_zfini on non-empty keg"));
2612        zone->uz_fini = zfini;
2613        ZONE_UNLOCK(zone);
2614}
2615
2616/* See uma.h */
2617/* XXX uk_freef is not actually used with the zone locked */
2618void
2619uma_zone_set_freef(uma_zone_t zone, uma_free freef)
2620{
2621
2622        ZONE_LOCK(zone);
2623        zone_first_keg(zone)->uk_freef = freef;
2624        ZONE_UNLOCK(zone);
2625}
2626
2627/* See uma.h */
2628/* XXX uk_allocf is not actually used with the zone locked */
2629void
2630uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
2631{
2632        uma_keg_t keg;
2633
2634        ZONE_LOCK(zone);
2635        keg = zone_first_keg(zone);
2636        keg->uk_flags |= UMA_ZFLAG_PRIVALLOC;
2637        keg->uk_allocf = allocf;
2638        ZONE_UNLOCK(zone);
2639}
2640
2641/* See uma.h */
2642void
2643uma_prealloc(uma_zone_t zone, int items)
2644{
2645        int slabs;
2646        uma_slab_t slab;
2647        uma_keg_t keg;
2648
2649        keg = zone_first_keg(zone);
2650        ZONE_LOCK(zone);
2651        slabs = items / keg->uk_ipers;
2652        if (slabs * keg->uk_ipers < items)
2653                slabs++;
2654        while (slabs > 0) {
2655                slab = keg_alloc_slab(keg, zone, M_WAITOK);
2656                if (slab == NULL)
2657                        break;
2658                MPASS(slab->us_keg == keg);
2659                LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2660                slabs--;
2661        }
2662        ZONE_UNLOCK(zone);
2663}
2664
2665/* See uma.h */
2666void
2667uma_reclaim(void)
2668{
2669#ifdef UMA_DEBUG
2670        printf("UMA: vm asked us to release pages!\n");
2671#endif
2672        zone_foreach(zone_drain);
2673        /*
2674         * Some slabs may have been freed but this zone will be visited early
2675         * we visit again so that we can free pages that are empty once other
2676         * zones are drained.  We have to do the same for buckets.
2677         */
2678        zone_drain(slabzone);
2679        zone_drain(slabrefzone);
2680        bucket_zone_drain();
2681}
2682
2683/* See uma.h */
2684int
2685uma_zone_exhausted(uma_zone_t zone)
2686{
2687        int full;
2688
2689        ZONE_LOCK(zone);
2690        full = (zone->uz_flags & UMA_ZFLAG_FULL);
2691        ZONE_UNLOCK(zone);
2692        return (full);
2693}
2694
2695int
2696uma_zone_exhausted_nolock(uma_zone_t zone)
2697{
2698        return (zone->uz_flags & UMA_ZFLAG_FULL);
2699}
2700
2701void *
2702uma_large_malloc(int size, int wait)
2703{
2704        void *mem;
2705        uma_slab_t slab;
2706        u_int8_t flags;
2707
2708        slab = zone_alloc_item(slabzone, NULL, wait);
2709        if (slab == NULL)
2710                return (NULL);
2711        mem = page_alloc(NULL, size, &flags, wait);
2712        if (mem) {
2713                slab->us_data = mem;
2714                slab->us_flags = flags | UMA_SLAB_MALLOC;
2715                slab->us_size = size;
2716        } else {
2717                zone_free_item(slabzone, slab, NULL, SKIP_NONE,
2718                    ZFREE_STATFAIL | ZFREE_STATFREE);
2719        }
2720
2721        return (mem);
2722}
2723
2724void
2725uma_large_free(uma_slab_t slab)
2726{
2727        page_free(slab->us_data, slab->us_size, slab->us_flags);
2728        zone_free_item(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE);
2729}
2730
2731void
2732uma_print_stats(void)
2733{
2734        zone_foreach(uma_print_zone);
2735}
2736
2737static void
2738slab_print(uma_slab_t slab)
2739{
2740        printf("slab: keg %p, data %p, freecount %d, firstfree %d\n",
2741                slab->us_keg, slab->us_data, slab->us_freecount,
2742                slab->us_firstfree);
2743}
2744
2745static void
2746cache_print(uma_cache_t cache)
2747{
2748        printf("alloc: %p(%d), free: %p(%d)\n",
2749                cache->uc_allocbucket,
2750                cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
2751                cache->uc_freebucket,
2752                cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
2753}
2754
2755static void
2756uma_print_keg(uma_keg_t keg)
2757{
2758        uma_slab_t slab;
2759
2760        printf("keg: %s(%p) size %d(%d) flags %d ipers %d ppera %d "
2761            "out %d free %d limit %d\n",
2762            keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
2763            keg->uk_ipers, keg->uk_ppera,
2764            (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free,
2765            (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
2766        printf("Part slabs:\n");
2767        LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
2768                slab_print(slab);
2769        printf("Free slabs:\n");
2770        LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
2771                slab_print(slab);
2772        printf("Full slabs:\n");
2773        LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
2774                slab_print(slab);
2775}
2776
2777void
2778uma_print_zone(uma_zone_t zone)
2779{
2780        uma_cache_t cache;
2781        uma_klink_t kl;
2782        int i;
2783
2784        printf("zone: %s(%p) size %d flags %d\n",
2785            zone->uz_name, zone, zone->uz_size, zone->uz_flags);
2786        LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
2787                uma_print_keg(kl->kl_keg);
2788        for (i = 0; i <= mp_maxid; i++) {
2789                if (CPU_ABSENT(i))
2790                        continue;
2791                cache = &zone->uz_cpu[i];
2792                printf("CPU %d Cache:\n", i);
2793                cache_print(cache);
2794        }
2795}
2796
Note: See TracBrowser for help on using the repository browser.