source: rtems-libbsd/freebsd/sys/vm/uma_core.c @ e5db084

4.1155-freebsd-126-freebsd-12freebsd-9.3
Last change on this file since e5db084 was e5db084, checked in by Sebastian Huber <sebastian.huber@…>, on 03/06/15 at 12:58:45

ZONE(9): Enable per-processor cache for SMP

This prevents a potential deadlock via the Giant lock and is a
performance benefit.

  • Property mode set to 100644
File size: 84.4 KB
Line 
1#include <machine/rtems-bsd-kernel-space.h>
2
3/*-
4 * Copyright (c) 2002-2005, 2009 Jeffrey Roberson <jeff@FreeBSD.org>
5 * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
6 * Copyright (c) 2004-2006 Robert N. M. Watson
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice unmodified, this list of conditions, and the following
14 *    disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31/*
32 * uma_core.c  Implementation of the Universal Memory allocator
33 *
34 * This allocator is intended to replace the multitude of similar object caches
35 * in the standard FreeBSD kernel.  The intent is to be flexible as well as
36 * effecient.  A primary design goal is to return unused memory to the rest of
37 * the system.  This will make the system as a whole more flexible due to the
38 * ability to move memory to subsystems which most need it instead of leaving
39 * pools of reserved memory unused.
40 *
41 * The basic ideas stem from similar slab/zone based allocators whose algorithms
42 * are well known.
43 *
44 */
45
46/*
47 * TODO:
48 *      - Improve memory usage for large allocations
49 *      - Investigate cache size adjustments
50 */
51
52#include <sys/cdefs.h>
53__FBSDID("$FreeBSD$");
54
55/* I should really use ktr.. */
56/*
57#define UMA_DEBUG 1
58#define UMA_DEBUG_ALLOC 1
59#define UMA_DEBUG_ALLOC_1 1
60*/
61
62#include <rtems/bsd/local/opt_ddb.h>
63#include <rtems/bsd/local/opt_param.h>
64
65#include <rtems/bsd/sys/param.h>
66#include <sys/systm.h>
67#include <sys/kernel.h>
68#include <rtems/bsd/sys/types.h>
69#include <sys/queue.h>
70#include <sys/malloc.h>
71#include <sys/ktr.h>
72#include <rtems/bsd/sys/lock.h>
73#include <sys/sysctl.h>
74#include <sys/mutex.h>
75#include <sys/proc.h>
76#include <sys/sbuf.h>
77#include <sys/smp.h>
78#include <sys/vmmeter.h>
79
80#include <vm/vm.h>
81#include <vm/vm_object.h>
82#include <vm/vm_page.h>
83#include <vm/vm_param.h>
84#include <vm/vm_map.h>
85#include <vm/vm_kern.h>
86#include <vm/vm_extern.h>
87#include <vm/uma.h>
88#include <vm/uma_int.h>
89#include <vm/uma_dbg.h>
90
91#include <ddb/ddb.h>
92#ifdef __rtems__
93  #ifdef RTEMS_SMP
94    /*
95     * It is essential that we have a per-processor cache, otherwise the
96     * critical_enter()/critical_exit() protection would be insufficient.
97     */
98    #undef curcpu
99    #define curcpu rtems_get_current_processor()
100    #undef mp_maxid
101    #define mp_maxid rtems_get_processor_count()
102  #endif
103#endif /* __rtems__ */
104
105/*
106 * This is the zone and keg from which all zones are spawned.  The idea is that
107 * even the zone & keg heads are allocated from the allocator, so we use the
108 * bss section to bootstrap us.
109 */
110static struct uma_keg masterkeg;
111static struct uma_zone masterzone_k;
112static struct uma_zone masterzone_z;
113static uma_zone_t kegs = &masterzone_k;
114static uma_zone_t zones = &masterzone_z;
115
116/* This is the zone from which all of uma_slab_t's are allocated. */
117static uma_zone_t slabzone;
118static uma_zone_t slabrefzone;  /* With refcounters (for UMA_ZONE_REFCNT) */
119
120/*
121 * The initial hash tables come out of this zone so they can be allocated
122 * prior to malloc coming up.
123 */
124static uma_zone_t hashzone;
125
126/* The boot-time adjusted value for cache line alignment. */
127int uma_align_cache = 64 - 1;
128
129static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
130
131#ifndef __rtems__
132/*
133 * Are we allowed to allocate buckets?
134 */
135static int bucketdisable = 1;
136#endif /* __rtems__ */
137
138/* Linked list of all kegs in the system */
139static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
140
141/* This mutex protects the keg list */
142static struct mtx uma_mtx;
143
144#ifndef __rtems__
145/* Linked list of boot time pages */
146static LIST_HEAD(,uma_slab) uma_boot_pages =
147    LIST_HEAD_INITIALIZER(uma_boot_pages);
148
149/* This mutex protects the boot time pages list */
150static struct mtx uma_boot_pages_mtx;
151
152/* Is the VM done starting up? */
153static int booted = 0;
154#define UMA_STARTUP     1
155#define UMA_STARTUP2    2
156#endif /* __rtems__ */
157
158/* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
159static u_int uma_max_ipers;
160static u_int uma_max_ipers_ref;
161
162/*
163 * This is the handle used to schedule events that need to happen
164 * outside of the allocation fast path.
165 */
166static struct callout uma_callout;
167#define UMA_TIMEOUT     20              /* Seconds for callout interval. */
168
169/*
170 * This structure is passed as the zone ctor arg so that I don't have to create
171 * a special allocation function just for zones.
172 */
173struct uma_zctor_args {
174        const char *name;
175        size_t size;
176        uma_ctor ctor;
177        uma_dtor dtor;
178        uma_init uminit;
179        uma_fini fini;
180        uma_keg_t keg;
181        int align;
182        u_int32_t flags;
183};
184
185struct uma_kctor_args {
186        uma_zone_t zone;
187        size_t size;
188        uma_init uminit;
189        uma_fini fini;
190        int align;
191        u_int32_t flags;
192};
193
194struct uma_bucket_zone {
195        uma_zone_t      ubz_zone;
196        char            *ubz_name;
197        int             ubz_entries;
198};
199
200#define BUCKET_MAX      128
201
202struct uma_bucket_zone bucket_zones[] = {
203        { NULL, "16 Bucket", 16 },
204        { NULL, "32 Bucket", 32 },
205        { NULL, "64 Bucket", 64 },
206        { NULL, "128 Bucket", 128 },
207        { NULL, NULL, 0}
208};
209
210#define BUCKET_SHIFT    4
211#define BUCKET_ZONES    ((BUCKET_MAX >> BUCKET_SHIFT) + 1)
212
213/*
214 * bucket_size[] maps requested bucket sizes to zones that allocate a bucket
215 * of approximately the right size.
216 */
217static uint8_t bucket_size[BUCKET_ZONES];
218
219/*
220 * Flags and enumerations to be passed to internal functions.
221 */
222enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI };
223
224#define ZFREE_STATFAIL  0x00000001      /* Update zone failure statistic. */
225#define ZFREE_STATFREE  0x00000002      /* Update zone free statistic. */
226
227/* Prototypes.. */
228
229#ifndef __rtems__
230static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
231#endif /* __rtems__ */
232static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
233#ifndef __rtems__
234static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
235#endif /* __rtems__ */
236static void page_free(void *, int, u_int8_t);
237static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
238static void cache_drain(uma_zone_t);
239static void bucket_drain(uma_zone_t, uma_bucket_t);
240static void bucket_cache_drain(uma_zone_t zone);
241static int keg_ctor(void *, int, void *, int);
242static void keg_dtor(void *, int, void *);
243static int zone_ctor(void *, int, void *, int);
244static void zone_dtor(void *, int, void *);
245static int zero_init(void *, int, int);
246static void keg_small_init(uma_keg_t keg);
247static void keg_large_init(uma_keg_t keg);
248static void zone_foreach(void (*zfunc)(uma_zone_t));
249static void zone_timeout(uma_zone_t zone);
250static int hash_alloc(struct uma_hash *);
251static int hash_expand(struct uma_hash *, struct uma_hash *);
252static void hash_free(struct uma_hash *hash);
253static void uma_timeout(void *);
254static void uma_startup3(void);
255static void *zone_alloc_item(uma_zone_t, void *, int);
256static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip,
257    int);
258static void bucket_enable(void);
259static void bucket_init(void);
260static uma_bucket_t bucket_alloc(int, int);
261static void bucket_free(uma_bucket_t);
262static void bucket_zone_drain(void);
263static int zone_alloc_bucket(uma_zone_t zone, int flags);
264static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
265#ifndef __rtems__
266static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
267#endif /* __rtems__ */
268static void *slab_alloc_item(uma_zone_t zone, uma_slab_t slab);
269static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
270    uma_fini fini, int align, u_int32_t flags);
271static inline void zone_relock(uma_zone_t zone, uma_keg_t keg);
272static inline void keg_relock(uma_keg_t keg, uma_zone_t zone);
273
274void uma_print_zone(uma_zone_t);
275void uma_print_stats(void);
276static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
277static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
278
279SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
280
281SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
282    0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
283
284SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
285    0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
286
287/*
288 * This routine checks to see whether or not it's safe to enable buckets.
289 */
290
291static void
292bucket_enable(void)
293{
294#ifndef __rtems__
295        bucketdisable = vm_page_count_min();
296#endif /* __rtems__ */
297}
298
299/*
300 * Initialize bucket_zones, the array of zones of buckets of various sizes.
301 *
302 * For each zone, calculate the memory required for each bucket, consisting
303 * of the header and an array of pointers.  Initialize bucket_size[] to point
304 * the range of appropriate bucket sizes at the zone.
305 */
306static void
307bucket_init(void)
308{
309        struct uma_bucket_zone *ubz;
310        int i;
311        int j;
312
313        for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
314                int size;
315
316                ubz = &bucket_zones[j];
317                size = roundup(sizeof(struct uma_bucket), sizeof(void *));
318                size += sizeof(void *) * ubz->ubz_entries;
319                ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
320                    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
321                    UMA_ZFLAG_INTERNAL | UMA_ZFLAG_BUCKET);
322                for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
323                        bucket_size[i >> BUCKET_SHIFT] = j;
324        }
325}
326
327/*
328 * Given a desired number of entries for a bucket, return the zone from which
329 * to allocate the bucket.
330 */
331static struct uma_bucket_zone *
332bucket_zone_lookup(int entries)
333{
334        int idx;
335
336        idx = howmany(entries, 1 << BUCKET_SHIFT);
337        return (&bucket_zones[bucket_size[idx]]);
338}
339
340static uma_bucket_t
341bucket_alloc(int entries, int bflags)
342{
343        struct uma_bucket_zone *ubz;
344        uma_bucket_t bucket;
345
346#ifndef __rtems__
347        /*
348         * This is to stop us from allocating per cpu buckets while we're
349         * running out of vm.boot_pages.  Otherwise, we would exhaust the
350         * boot pages.  This also prevents us from allocating buckets in
351         * low memory situations.
352         */
353        if (bucketdisable)
354                return (NULL);
355#endif /* __rtems__ */
356
357        ubz = bucket_zone_lookup(entries);
358        bucket = zone_alloc_item(ubz->ubz_zone, NULL, bflags);
359        if (bucket) {
360#ifdef INVARIANTS
361                bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
362#endif
363                bucket->ub_cnt = 0;
364                bucket->ub_entries = ubz->ubz_entries;
365        }
366
367        return (bucket);
368}
369
370static void
371bucket_free(uma_bucket_t bucket)
372{
373        struct uma_bucket_zone *ubz;
374
375        ubz = bucket_zone_lookup(bucket->ub_entries);
376        zone_free_item(ubz->ubz_zone, bucket, NULL, SKIP_NONE,
377            ZFREE_STATFREE);
378}
379
380static void
381bucket_zone_drain(void)
382{
383        struct uma_bucket_zone *ubz;
384
385        for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
386                zone_drain(ubz->ubz_zone);
387}
388
389static inline uma_keg_t
390zone_first_keg(uma_zone_t zone)
391{
392
393        return (LIST_FIRST(&zone->uz_kegs)->kl_keg);
394}
395
396static void
397zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
398{
399        uma_klink_t klink;
400
401        LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
402                kegfn(klink->kl_keg);
403}
404
405/*
406 * Routine called by timeout which is used to fire off some time interval
407 * based calculations.  (stats, hash size, etc.)
408 *
409 * Arguments:
410 *      arg   Unused
411 *
412 * Returns:
413 *      Nothing
414 */
415static void
416uma_timeout(void *unused)
417{
418        bucket_enable();
419        zone_foreach(zone_timeout);
420
421        /* Reschedule this event */
422        callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
423}
424
425/*
426 * Routine to perform timeout driven calculations.  This expands the
427 * hashes and does per cpu statistics aggregation.
428 *
429 *  Returns nothing.
430 */
431static void
432keg_timeout(uma_keg_t keg)
433{
434
435        KEG_LOCK(keg);
436        /*
437         * Expand the keg hash table.
438         *
439         * This is done if the number of slabs is larger than the hash size.
440         * What I'm trying to do here is completely reduce collisions.  This
441         * may be a little aggressive.  Should I allow for two collisions max?
442         */
443        if (keg->uk_flags & UMA_ZONE_HASH &&
444            keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
445                struct uma_hash newhash;
446                struct uma_hash oldhash;
447                int ret;
448
449                /*
450                 * This is so involved because allocating and freeing
451                 * while the keg lock is held will lead to deadlock.
452                 * I have to do everything in stages and check for
453                 * races.
454                 */
455                newhash = keg->uk_hash;
456                KEG_UNLOCK(keg);
457                ret = hash_alloc(&newhash);
458                KEG_LOCK(keg);
459                if (ret) {
460                        if (hash_expand(&keg->uk_hash, &newhash)) {
461                                oldhash = keg->uk_hash;
462                                keg->uk_hash = newhash;
463                        } else
464                                oldhash = newhash;
465
466                        KEG_UNLOCK(keg);
467                        hash_free(&oldhash);
468                        KEG_LOCK(keg);
469                }
470        }
471        KEG_UNLOCK(keg);
472}
473
474static void
475zone_timeout(uma_zone_t zone)
476{
477
478        zone_foreach_keg(zone, &keg_timeout);
479}
480
481/*
482 * Allocate and zero fill the next sized hash table from the appropriate
483 * backing store.
484 *
485 * Arguments:
486 *      hash  A new hash structure with the old hash size in uh_hashsize
487 *
488 * Returns:
489 *      1 on sucess and 0 on failure.
490 */
491static int
492hash_alloc(struct uma_hash *hash)
493{
494        int oldsize;
495        int alloc;
496
497        oldsize = hash->uh_hashsize;
498
499        /* We're just going to go to a power of two greater */
500        if (oldsize)  {
501                hash->uh_hashsize = oldsize * 2;
502                alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
503                hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
504                    M_UMAHASH, M_NOWAIT);
505        } else {
506                alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
507                hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
508                    M_WAITOK);
509                hash->uh_hashsize = UMA_HASH_SIZE_INIT;
510        }
511        if (hash->uh_slab_hash) {
512                bzero(hash->uh_slab_hash, alloc);
513                hash->uh_hashmask = hash->uh_hashsize - 1;
514                return (1);
515        }
516
517        return (0);
518}
519
520/*
521 * Expands the hash table for HASH zones.  This is done from zone_timeout
522 * to reduce collisions.  This must not be done in the regular allocation
523 * path, otherwise, we can recurse on the vm while allocating pages.
524 *
525 * Arguments:
526 *      oldhash  The hash you want to expand
527 *      newhash  The hash structure for the new table
528 *
529 * Returns:
530 *      Nothing
531 *
532 * Discussion:
533 */
534static int
535hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
536{
537        uma_slab_t slab;
538        int hval;
539        int i;
540
541        if (!newhash->uh_slab_hash)
542                return (0);
543
544        if (oldhash->uh_hashsize >= newhash->uh_hashsize)
545                return (0);
546
547        /*
548         * I need to investigate hash algorithms for resizing without a
549         * full rehash.
550         */
551
552        for (i = 0; i < oldhash->uh_hashsize; i++)
553                while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
554                        slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
555                        SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
556                        hval = UMA_HASH(newhash, slab->us_data);
557                        SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
558                            slab, us_hlink);
559                }
560
561        return (1);
562}
563
564/*
565 * Free the hash bucket to the appropriate backing store.
566 *
567 * Arguments:
568 *      slab_hash  The hash bucket we're freeing
569 *      hashsize   The number of entries in that hash bucket
570 *
571 * Returns:
572 *      Nothing
573 */
574static void
575hash_free(struct uma_hash *hash)
576{
577        if (hash->uh_slab_hash == NULL)
578                return;
579        if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
580                zone_free_item(hashzone,
581                    hash->uh_slab_hash, NULL, SKIP_NONE, ZFREE_STATFREE);
582        else
583                free(hash->uh_slab_hash, M_UMAHASH);
584}
585
586/*
587 * Frees all outstanding items in a bucket
588 *
589 * Arguments:
590 *      zone   The zone to free to, must be unlocked.
591 *      bucket The free/alloc bucket with items, cpu queue must be locked.
592 *
593 * Returns:
594 *      Nothing
595 */
596
597static void
598bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
599{
600        void *item;
601
602        if (bucket == NULL)
603                return;
604
605        while (bucket->ub_cnt > 0)  {
606                bucket->ub_cnt--;
607                item = bucket->ub_bucket[bucket->ub_cnt];
608#ifdef INVARIANTS
609                bucket->ub_bucket[bucket->ub_cnt] = NULL;
610                KASSERT(item != NULL,
611                    ("bucket_drain: botched ptr, item is NULL"));
612#endif
613                zone_free_item(zone, item, NULL, SKIP_DTOR, 0);
614        }
615}
616
617/*
618 * Drains the per cpu caches for a zone.
619 *
620 * NOTE: This may only be called while the zone is being turn down, and not
621 * during normal operation.  This is necessary in order that we do not have
622 * to migrate CPUs to drain the per-CPU caches.
623 *
624 * Arguments:
625 *      zone     The zone to drain, must be unlocked.
626 *
627 * Returns:
628 *      Nothing
629 */
630static void
631cache_drain(uma_zone_t zone)
632{
633        uma_cache_t cache;
634        int cpu;
635
636        /*
637         * XXX: It is safe to not lock the per-CPU caches, because we're
638         * tearing down the zone anyway.  I.e., there will be no further use
639         * of the caches at this point.
640         *
641         * XXX: It would good to be able to assert that the zone is being
642         * torn down to prevent improper use of cache_drain().
643         *
644         * XXX: We lock the zone before passing into bucket_cache_drain() as
645         * it is used elsewhere.  Should the tear-down path be made special
646         * there in some form?
647         */
648        CPU_FOREACH(cpu) {
649                cache = &zone->uz_cpu[cpu];
650                bucket_drain(zone, cache->uc_allocbucket);
651                bucket_drain(zone, cache->uc_freebucket);
652                if (cache->uc_allocbucket != NULL)
653                        bucket_free(cache->uc_allocbucket);
654                if (cache->uc_freebucket != NULL)
655                        bucket_free(cache->uc_freebucket);
656                cache->uc_allocbucket = cache->uc_freebucket = NULL;
657        }
658        ZONE_LOCK(zone);
659        bucket_cache_drain(zone);
660        ZONE_UNLOCK(zone);
661}
662
663/*
664 * Drain the cached buckets from a zone.  Expects a locked zone on entry.
665 */
666static void
667bucket_cache_drain(uma_zone_t zone)
668{
669        uma_bucket_t bucket;
670
671        /*
672         * Drain the bucket queues and free the buckets, we just keep two per
673         * cpu (alloc/free).
674         */
675        while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
676                LIST_REMOVE(bucket, ub_link);
677                ZONE_UNLOCK(zone);
678                bucket_drain(zone, bucket);
679                bucket_free(bucket);
680                ZONE_LOCK(zone);
681        }
682
683        /* Now we do the free queue.. */
684        while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
685                LIST_REMOVE(bucket, ub_link);
686                bucket_free(bucket);
687        }
688}
689
690/*
691 * Frees pages from a keg back to the system.  This is done on demand from
692 * the pageout daemon.
693 *
694 * Returns nothing.
695 */
696static void
697keg_drain(uma_keg_t keg)
698{
699        struct slabhead freeslabs = { 0 };
700        uma_slab_t slab;
701        uma_slab_t n;
702        u_int8_t flags;
703        u_int8_t *mem;
704        int i;
705
706        /*
707         * We don't want to take pages from statically allocated kegs at this
708         * time
709         */
710        if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
711                return;
712
713#ifdef UMA_DEBUG
714        printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
715#endif
716        KEG_LOCK(keg);
717        if (keg->uk_free == 0)
718                goto finished;
719
720        slab = LIST_FIRST(&keg->uk_free_slab);
721        while (slab) {
722                n = LIST_NEXT(slab, us_link);
723
724                /* We have no where to free these to */
725                if (slab->us_flags & UMA_SLAB_BOOT) {
726                        slab = n;
727                        continue;
728                }
729
730                LIST_REMOVE(slab, us_link);
731                keg->uk_pages -= keg->uk_ppera;
732                keg->uk_free -= keg->uk_ipers;
733
734                if (keg->uk_flags & UMA_ZONE_HASH)
735                        UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
736
737                SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
738
739                slab = n;
740        }
741finished:
742        KEG_UNLOCK(keg);
743
744        while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
745                SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
746                if (keg->uk_fini)
747                        for (i = 0; i < keg->uk_ipers; i++)
748                                keg->uk_fini(
749                                    slab->us_data + (keg->uk_rsize * i),
750                                    keg->uk_size);
751                flags = slab->us_flags;
752                mem = slab->us_data;
753
754#ifndef __rtems__
755                if (keg->uk_flags & UMA_ZONE_VTOSLAB) {
756                        vm_object_t obj;
757
758                        if (flags & UMA_SLAB_KMEM)
759                                obj = kmem_object;
760                        else if (flags & UMA_SLAB_KERNEL)
761                                obj = kernel_object;
762                        else
763                                obj = NULL;
764                        for (i = 0; i < keg->uk_ppera; i++)
765                                vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
766                                    obj);
767                }
768#endif /* __rtems__ */
769                if (keg->uk_flags & UMA_ZONE_OFFPAGE)
770                        zone_free_item(keg->uk_slabzone, slab, NULL,
771                            SKIP_NONE, ZFREE_STATFREE);
772#ifdef UMA_DEBUG
773                printf("%s: Returning %d bytes.\n",
774                    keg->uk_name, UMA_SLAB_SIZE * keg->uk_ppera);
775#endif
776                keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
777        }
778}
779
780static void
781zone_drain_wait(uma_zone_t zone, int waitok)
782{
783
784        /*
785         * Set draining to interlock with zone_dtor() so we can release our
786         * locks as we go.  Only dtor() should do a WAITOK call since it
787         * is the only call that knows the structure will still be available
788         * when it wakes up.
789         */
790        ZONE_LOCK(zone);
791        while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
792                if (waitok == M_NOWAIT)
793                        goto out;
794                mtx_unlock(&uma_mtx);
795                msleep(zone, zone->uz_lock, PVM, "zonedrain", 1);
796                mtx_lock(&uma_mtx);
797        }
798        zone->uz_flags |= UMA_ZFLAG_DRAINING;
799        bucket_cache_drain(zone);
800        ZONE_UNLOCK(zone);
801        /*
802         * The DRAINING flag protects us from being freed while
803         * we're running.  Normally the uma_mtx would protect us but we
804         * must be able to release and acquire the right lock for each keg.
805         */
806        zone_foreach_keg(zone, &keg_drain);
807        ZONE_LOCK(zone);
808        zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
809        wakeup(zone);
810out:
811        ZONE_UNLOCK(zone);
812}
813
814void
815zone_drain(uma_zone_t zone)
816{
817
818        zone_drain_wait(zone, M_NOWAIT);
819}
820
821/*
822 * Allocate a new slab for a keg.  This does not insert the slab onto a list.
823 *
824 * Arguments:
825 *      wait  Shall we wait?
826 *
827 * Returns:
828 *      The slab that was allocated or NULL if there is no memory and the
829 *      caller specified M_NOWAIT.
830 */
831static uma_slab_t
832keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
833{
834        uma_slabrefcnt_t slabref;
835        uma_alloc allocf;
836        uma_slab_t slab;
837        u_int8_t *mem;
838        u_int8_t flags;
839        int i;
840
841        mtx_assert(&keg->uk_lock, MA_OWNED);
842        slab = NULL;
843
844#ifdef UMA_DEBUG
845        printf("slab_zalloc:  Allocating a new slab for %s\n", keg->uk_name);
846#endif
847        allocf = keg->uk_allocf;
848        KEG_UNLOCK(keg);
849
850        if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
851                slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
852                if (slab == NULL) {
853                        KEG_LOCK(keg);
854                        return NULL;
855                }
856        }
857
858        /*
859         * This reproduces the old vm_zone behavior of zero filling pages the
860         * first time they are added to a zone.
861         *
862         * Malloced items are zeroed in uma_zalloc.
863         */
864
865        if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
866                wait |= M_ZERO;
867        else
868                wait &= ~M_ZERO;
869
870        if (keg->uk_flags & UMA_ZONE_NODUMP)
871                wait |= M_NODUMP;
872
873        /* zone is passed for legacy reasons. */
874        mem = allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait);
875        if (mem == NULL) {
876                if (keg->uk_flags & UMA_ZONE_OFFPAGE)
877                        zone_free_item(keg->uk_slabzone, slab, NULL,
878                            SKIP_NONE, ZFREE_STATFREE);
879                KEG_LOCK(keg);
880                return (NULL);
881        }
882
883        /* Point the slab into the allocated memory */
884        if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
885                slab = (uma_slab_t )(mem + keg->uk_pgoff);
886
887        if (keg->uk_flags & UMA_ZONE_VTOSLAB)
888                for (i = 0; i < keg->uk_ppera; i++)
889                        vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
890
891        slab->us_keg = keg;
892        slab->us_data = mem;
893        slab->us_freecount = keg->uk_ipers;
894        slab->us_firstfree = 0;
895        slab->us_flags = flags;
896
897        if (keg->uk_flags & UMA_ZONE_REFCNT) {
898                slabref = (uma_slabrefcnt_t)slab;
899                for (i = 0; i < keg->uk_ipers; i++) {
900                        slabref->us_freelist[i].us_refcnt = 0;
901                        slabref->us_freelist[i].us_item = i+1;
902                }
903        } else {
904                for (i = 0; i < keg->uk_ipers; i++)
905                        slab->us_freelist[i].us_item = i+1;
906        }
907
908        if (keg->uk_init != NULL) {
909                for (i = 0; i < keg->uk_ipers; i++)
910                        if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
911                            keg->uk_size, wait) != 0)
912                                break;
913                if (i != keg->uk_ipers) {
914                        if (keg->uk_fini != NULL) {
915                                for (i--; i > -1; i--)
916                                        keg->uk_fini(slab->us_data +
917                                            (keg->uk_rsize * i),
918                                            keg->uk_size);
919                        }
920#ifndef __rtems__
921                        if (keg->uk_flags & UMA_ZONE_VTOSLAB) {
922                                vm_object_t obj;
923
924                                if (flags & UMA_SLAB_KMEM)
925                                        obj = kmem_object;
926                                else if (flags & UMA_SLAB_KERNEL)
927                                        obj = kernel_object;
928                                else
929                                        obj = NULL;
930                                for (i = 0; i < keg->uk_ppera; i++)
931                                        vsetobj((vm_offset_t)mem +
932                                            (i * PAGE_SIZE), obj);
933                        }
934#endif /* __rtems__ */
935                        if (keg->uk_flags & UMA_ZONE_OFFPAGE)
936                                zone_free_item(keg->uk_slabzone, slab,
937                                    NULL, SKIP_NONE, ZFREE_STATFREE);
938                        keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
939                            flags);
940                        KEG_LOCK(keg);
941                        return (NULL);
942                }
943        }
944        KEG_LOCK(keg);
945
946        if (keg->uk_flags & UMA_ZONE_HASH)
947                UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
948
949        keg->uk_pages += keg->uk_ppera;
950        keg->uk_free += keg->uk_ipers;
951
952        return (slab);
953}
954
955#ifndef __rtems__
956/*
957 * This function is intended to be used early on in place of page_alloc() so
958 * that we may use the boot time page cache to satisfy allocations before
959 * the VM is ready.
960 */
961static void *
962startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
963{
964        uma_keg_t keg;
965        uma_slab_t tmps;
966        int pages, check_pages;
967
968        keg = zone_first_keg(zone);
969        pages = howmany(bytes, PAGE_SIZE);
970        check_pages = pages - 1;
971        KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
972
973        /*
974         * Check our small startup cache to see if it has pages remaining.
975         */
976        mtx_lock(&uma_boot_pages_mtx);
977
978        /* First check if we have enough room. */
979        tmps = LIST_FIRST(&uma_boot_pages);
980        while (tmps != NULL && check_pages-- > 0)
981                tmps = LIST_NEXT(tmps, us_link);
982        if (tmps != NULL) {
983                /*
984                 * It's ok to lose tmps references.  The last one will
985                 * have tmps->us_data pointing to the start address of
986                 * "pages" contiguous pages of memory.
987                 */
988                while (pages-- > 0) {
989                        tmps = LIST_FIRST(&uma_boot_pages);
990                        LIST_REMOVE(tmps, us_link);
991                }
992                mtx_unlock(&uma_boot_pages_mtx);
993                *pflag = tmps->us_flags;
994                return (tmps->us_data);
995        }
996        mtx_unlock(&uma_boot_pages_mtx);
997        if (booted < UMA_STARTUP2)
998                panic("UMA: Increase vm.boot_pages");
999        /*
1000         * Now that we've booted reset these users to their real allocator.
1001         */
1002#ifdef UMA_MD_SMALL_ALLOC
1003        keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
1004#else
1005        keg->uk_allocf = page_alloc;
1006#endif
1007        return keg->uk_allocf(zone, bytes, pflag, wait);
1008}
1009#endif /* __rtems__ */
1010
1011/*
1012 * Allocates a number of pages from the system
1013 *
1014 * Arguments:
1015 *      bytes  The number of bytes requested
1016 *      wait  Shall we wait?
1017 *
1018 * Returns:
1019 *      A pointer to the alloced memory or possibly
1020 *      NULL if M_NOWAIT is set.
1021 */
1022static void *
1023page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
1024{
1025        void *p;        /* Returned page */
1026
1027        *pflag = UMA_SLAB_KMEM;
1028#ifndef __rtems__
1029        p = (void *) kmem_malloc(kmem_map, bytes, wait);
1030#else /* __rtems__ */
1031        p = rtems_bsd_page_alloc(bytes, wait);
1032#endif /* __rtems__ */
1033
1034        return (p);
1035}
1036
1037#ifndef __rtems__
1038/*
1039 * Allocates a number of pages from within an object
1040 *
1041 * Arguments:
1042 *      bytes  The number of bytes requested
1043 *      wait   Shall we wait?
1044 *
1045 * Returns:
1046 *      A pointer to the alloced memory or possibly
1047 *      NULL if M_NOWAIT is set.
1048 */
1049static void *
1050obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
1051{
1052        vm_object_t object;
1053        vm_offset_t retkva, zkva;
1054        vm_page_t p;
1055        int pages, startpages;
1056        uma_keg_t keg;
1057
1058        keg = zone_first_keg(zone);
1059        object = keg->uk_obj;
1060        retkva = 0;
1061
1062        /*
1063         * This looks a little weird since we're getting one page at a time.
1064         */
1065        VM_OBJECT_LOCK(object);
1066        p = TAILQ_LAST(&object->memq, pglist);
1067        pages = p != NULL ? p->pindex + 1 : 0;
1068        startpages = pages;
1069        zkva = keg->uk_kva + pages * PAGE_SIZE;
1070        for (; bytes > 0; bytes -= PAGE_SIZE) {
1071                p = vm_page_alloc(object, pages,
1072                    VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
1073                if (p == NULL) {
1074                        if (pages != startpages)
1075                                pmap_qremove(retkva, pages - startpages);
1076                        while (pages != startpages) {
1077                                pages--;
1078                                p = TAILQ_LAST(&object->memq, pglist);
1079                                vm_page_unwire(p, 0);
1080                                vm_page_free(p);
1081                        }
1082                        retkva = 0;
1083                        goto done;
1084                }
1085                pmap_qenter(zkva, &p, 1);
1086                if (retkva == 0)
1087                        retkva = zkva;
1088                zkva += PAGE_SIZE;
1089                pages += 1;
1090        }
1091done:
1092        VM_OBJECT_UNLOCK(object);
1093        *flags = UMA_SLAB_PRIV;
1094
1095        return ((void *)retkva);
1096}
1097#endif /* __rtems__ */
1098
1099/*
1100 * Frees a number of pages to the system
1101 *
1102 * Arguments:
1103 *      mem   A pointer to the memory to be freed
1104 *      size  The size of the memory being freed
1105 *      flags The original p->us_flags field
1106 *
1107 * Returns:
1108 *      Nothing
1109 */
1110static void
1111page_free(void *mem, int size, u_int8_t flags)
1112{
1113#ifndef __rtems__
1114        vm_map_t map;
1115
1116        if (flags & UMA_SLAB_KMEM)
1117                map = kmem_map;
1118        else if (flags & UMA_SLAB_KERNEL)
1119                map = kernel_map;
1120        else
1121                panic("UMA: page_free used with invalid flags %d", flags);
1122
1123        kmem_free(map, (vm_offset_t)mem, size);
1124#else /* __rtems__ */
1125        rtems_bsd_page_free(mem);
1126#endif /* __rtems__ */
1127}
1128
1129/*
1130 * Zero fill initializer
1131 *
1132 * Arguments/Returns follow uma_init specifications
1133 */
1134static int
1135zero_init(void *mem, int size, int flags)
1136{
1137        bzero(mem, size);
1138        return (0);
1139}
1140
1141/*
1142 * Finish creating a small uma keg.  This calculates ipers, and the keg size.
1143 *
1144 * Arguments
1145 *      keg  The zone we should initialize
1146 *
1147 * Returns
1148 *      Nothing
1149 */
1150static void
1151keg_small_init(uma_keg_t keg)
1152{
1153        u_int rsize;
1154        u_int memused;
1155        u_int wastedspace;
1156        u_int shsize;
1157
1158        KASSERT(keg != NULL, ("Keg is null in keg_small_init"));
1159        rsize = keg->uk_size;
1160
1161        if (rsize < UMA_SMALLEST_UNIT)
1162                rsize = UMA_SMALLEST_UNIT;
1163        if (rsize & keg->uk_align)
1164                rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1165
1166        keg->uk_rsize = rsize;
1167        keg->uk_ppera = 1;
1168
1169        if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1170                shsize = 0;
1171        } else if (keg->uk_flags & UMA_ZONE_REFCNT) {
1172                rsize += UMA_FRITMREF_SZ;       /* linkage & refcnt */
1173                shsize = sizeof(struct uma_slab_refcnt);
1174        } else {
1175                rsize += UMA_FRITM_SZ;  /* Account for linkage */
1176                shsize = sizeof(struct uma_slab);
1177        }
1178
1179        keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
1180        KASSERT(keg->uk_ipers != 0, ("keg_small_init: ipers is 0"));
1181        memused = keg->uk_ipers * rsize + shsize;
1182        wastedspace = UMA_SLAB_SIZE - memused;
1183
1184        /*
1185         * We can't do OFFPAGE if we're internal or if we've been
1186         * asked to not go to the VM for buckets.  If we do this we
1187         * may end up going to the VM (kmem_map) for slabs which we
1188         * do not want to do if we're UMA_ZFLAG_CACHEONLY as a
1189         * result of UMA_ZONE_VM, which clearly forbids it.
1190         */
1191        if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1192            (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1193                return;
1194
1195        if ((wastedspace >= UMA_MAX_WASTE) &&
1196            (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
1197                keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
1198                KASSERT(keg->uk_ipers <= 255,
1199                    ("keg_small_init: keg->uk_ipers too high!"));
1200#ifdef UMA_DEBUG
1201                printf("UMA decided we need offpage slab headers for "
1202                    "keg: %s, calculated wastedspace = %d, "
1203                    "maximum wasted space allowed = %d, "
1204                    "calculated ipers = %d, "
1205                    "new wasted space = %d\n", keg->uk_name, wastedspace,
1206                    UMA_MAX_WASTE, keg->uk_ipers,
1207                    UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
1208#endif
1209                keg->uk_flags |= UMA_ZONE_OFFPAGE;
1210                if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1211                        keg->uk_flags |= UMA_ZONE_HASH;
1212        }
1213}
1214
1215/*
1216 * Finish creating a large (> UMA_SLAB_SIZE) uma kegs.  Just give in and do
1217 * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
1218 * more complicated.
1219 *
1220 * Arguments
1221 *      keg  The keg we should initialize
1222 *
1223 * Returns
1224 *      Nothing
1225 */
1226static void
1227keg_large_init(uma_keg_t keg)
1228{
1229        int pages;
1230
1231        KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
1232        KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1233            ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
1234
1235        pages = keg->uk_size / UMA_SLAB_SIZE;
1236
1237        /* Account for remainder */
1238        if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
1239                pages++;
1240
1241        keg->uk_ppera = pages;
1242        keg->uk_ipers = 1;
1243        keg->uk_rsize = keg->uk_size;
1244
1245        /* We can't do OFFPAGE if we're internal, bail out here. */
1246        if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
1247                return;
1248
1249        keg->uk_flags |= UMA_ZONE_OFFPAGE;
1250        if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1251                keg->uk_flags |= UMA_ZONE_HASH;
1252}
1253
1254static void
1255keg_cachespread_init(uma_keg_t keg)
1256{
1257        int alignsize;
1258        int trailer;
1259        int pages;
1260        int rsize;
1261
1262        alignsize = keg->uk_align + 1;
1263        rsize = keg->uk_size;
1264        /*
1265         * We want one item to start on every align boundary in a page.  To
1266         * do this we will span pages.  We will also extend the item by the
1267         * size of align if it is an even multiple of align.  Otherwise, it
1268         * would fall on the same boundary every time.
1269         */
1270        if (rsize & keg->uk_align)
1271                rsize = (rsize & ~keg->uk_align) + alignsize;
1272        if ((rsize & alignsize) == 0)
1273                rsize += alignsize;
1274        trailer = rsize - keg->uk_size;
1275        pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
1276        pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
1277        keg->uk_rsize = rsize;
1278        keg->uk_ppera = pages;
1279        keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
1280        keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
1281        KASSERT(keg->uk_ipers <= uma_max_ipers,
1282            ("%s: keg->uk_ipers too high(%d) increase max_ipers", __func__,
1283            keg->uk_ipers));
1284}
1285
1286/*
1287 * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
1288 * the keg onto the global keg list.
1289 *
1290 * Arguments/Returns follow uma_ctor specifications
1291 *      udata  Actually uma_kctor_args
1292 */
1293static int
1294keg_ctor(void *mem, int size, void *udata, int flags)
1295{
1296        struct uma_kctor_args *arg = udata;
1297        uma_keg_t keg = mem;
1298        uma_zone_t zone;
1299
1300        bzero(keg, size);
1301        keg->uk_size = arg->size;
1302        keg->uk_init = arg->uminit;
1303        keg->uk_fini = arg->fini;
1304        keg->uk_align = arg->align;
1305        keg->uk_free = 0;
1306        keg->uk_pages = 0;
1307        keg->uk_flags = arg->flags;
1308        keg->uk_allocf = page_alloc;
1309        keg->uk_freef = page_free;
1310        keg->uk_recurse = 0;
1311        keg->uk_slabzone = NULL;
1312
1313        /*
1314         * The master zone is passed to us at keg-creation time.
1315         */
1316        zone = arg->zone;
1317        keg->uk_name = zone->uz_name;
1318
1319        if (arg->flags & UMA_ZONE_VM)
1320                keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1321
1322        if (arg->flags & UMA_ZONE_ZINIT)
1323                keg->uk_init = zero_init;
1324
1325        if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC)
1326                keg->uk_flags |= UMA_ZONE_VTOSLAB;
1327
1328        /*
1329         * The +UMA_FRITM_SZ added to uk_size is to account for the
1330         * linkage that is added to the size in keg_small_init().  If
1331         * we don't account for this here then we may end up in
1332         * keg_small_init() with a calculated 'ipers' of 0.
1333         */
1334        if (keg->uk_flags & UMA_ZONE_REFCNT) {
1335                if (keg->uk_flags & UMA_ZONE_CACHESPREAD)
1336                        keg_cachespread_init(keg);
1337                else if ((keg->uk_size+UMA_FRITMREF_SZ) >
1338                    (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
1339                        keg_large_init(keg);
1340                else
1341                        keg_small_init(keg);
1342        } else {
1343                if (keg->uk_flags & UMA_ZONE_CACHESPREAD)
1344                        keg_cachespread_init(keg);
1345                else if ((keg->uk_size+UMA_FRITM_SZ) >
1346                    (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1347                        keg_large_init(keg);
1348                else
1349                        keg_small_init(keg);
1350        }
1351
1352        if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1353                if (keg->uk_flags & UMA_ZONE_REFCNT)
1354                        keg->uk_slabzone = slabrefzone;
1355                else
1356                        keg->uk_slabzone = slabzone;
1357        }
1358
1359        /*
1360         * If we haven't booted yet we need allocations to go through the
1361         * startup cache until the vm is ready.
1362         */
1363        if (keg->uk_ppera == 1) {
1364#ifdef UMA_MD_SMALL_ALLOC
1365                keg->uk_allocf = uma_small_alloc;
1366                keg->uk_freef = uma_small_free;
1367
1368#ifndef __rtems__
1369                if (booted < UMA_STARTUP)
1370                        keg->uk_allocf = startup_alloc;
1371#endif /* __rtems__ */
1372#else
1373#ifndef __rtems__
1374                if (booted < UMA_STARTUP2)
1375                        keg->uk_allocf = startup_alloc;
1376#endif /* __rtems__ */
1377#endif
1378#ifndef __rtems__
1379        } else if (booted < UMA_STARTUP2 &&
1380            (keg->uk_flags & UMA_ZFLAG_INTERNAL))
1381                keg->uk_allocf = startup_alloc;
1382#else /* __rtems__ */
1383        }
1384#endif /* __rtems__ */
1385
1386        /*
1387         * Initialize keg's lock (shared among zones).
1388         */
1389        if (arg->flags & UMA_ZONE_MTXCLASS)
1390                KEG_LOCK_INIT(keg, 1);
1391        else
1392                KEG_LOCK_INIT(keg, 0);
1393
1394        /*
1395         * If we're putting the slab header in the actual page we need to
1396         * figure out where in each page it goes.  This calculates a right
1397         * justified offset into the memory on an ALIGN_PTR boundary.
1398         */
1399        if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1400                u_int totsize;
1401
1402                /* Size of the slab struct and free list */
1403                if (keg->uk_flags & UMA_ZONE_REFCNT)
1404                        totsize = sizeof(struct uma_slab_refcnt) +
1405                            keg->uk_ipers * UMA_FRITMREF_SZ;
1406                else
1407                        totsize = sizeof(struct uma_slab) +
1408                            keg->uk_ipers * UMA_FRITM_SZ;
1409
1410                if (totsize & UMA_ALIGN_PTR)
1411                        totsize = (totsize & ~UMA_ALIGN_PTR) +
1412                            (UMA_ALIGN_PTR + 1);
1413                keg->uk_pgoff = (UMA_SLAB_SIZE * keg->uk_ppera) - totsize;
1414
1415                if (keg->uk_flags & UMA_ZONE_REFCNT)
1416                        totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
1417                            + keg->uk_ipers * UMA_FRITMREF_SZ;
1418                else
1419                        totsize = keg->uk_pgoff + sizeof(struct uma_slab)
1420                            + keg->uk_ipers * UMA_FRITM_SZ;
1421
1422                /*
1423                 * The only way the following is possible is if with our
1424                 * UMA_ALIGN_PTR adjustments we are now bigger than
1425                 * UMA_SLAB_SIZE.  I haven't checked whether this is
1426                 * mathematically possible for all cases, so we make
1427                 * sure here anyway.
1428                 */
1429                if (totsize > UMA_SLAB_SIZE * keg->uk_ppera) {
1430                        printf("zone %s ipers %d rsize %d size %d\n",
1431                            zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1432                            keg->uk_size);
1433                        panic("UMA slab won't fit.");
1434                }
1435        }
1436
1437        if (keg->uk_flags & UMA_ZONE_HASH)
1438                hash_alloc(&keg->uk_hash);
1439
1440#ifdef UMA_DEBUG
1441        printf("UMA: %s(%p) size %d(%d) flags %#x ipers %d ppera %d out %d free %d\n",
1442            zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
1443            keg->uk_ipers, keg->uk_ppera,
1444            (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
1445#endif
1446
1447        LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1448
1449        mtx_lock(&uma_mtx);
1450        LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1451        mtx_unlock(&uma_mtx);
1452        return (0);
1453}
1454
1455/*
1456 * Zone header ctor.  This initializes all fields, locks, etc.
1457 *
1458 * Arguments/Returns follow uma_ctor specifications
1459 *      udata  Actually uma_zctor_args
1460 */
1461static int
1462zone_ctor(void *mem, int size, void *udata, int flags)
1463{
1464        struct uma_zctor_args *arg = udata;
1465        uma_zone_t zone = mem;
1466        uma_zone_t z;
1467        uma_keg_t keg;
1468
1469        bzero(zone, size);
1470        zone->uz_name = arg->name;
1471        zone->uz_ctor = arg->ctor;
1472        zone->uz_dtor = arg->dtor;
1473        zone->uz_slab = zone_fetch_slab;
1474        zone->uz_init = NULL;
1475        zone->uz_fini = NULL;
1476        zone->uz_allocs = 0;
1477        zone->uz_frees = 0;
1478        zone->uz_fails = 0;
1479        zone->uz_sleeps = 0;
1480        zone->uz_fills = zone->uz_count = 0;
1481        zone->uz_flags = 0;
1482        keg = arg->keg;
1483
1484        if (arg->flags & UMA_ZONE_SECONDARY) {
1485                KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1486                zone->uz_init = arg->uminit;
1487                zone->uz_fini = arg->fini;
1488                zone->uz_lock = &keg->uk_lock;
1489                zone->uz_flags |= UMA_ZONE_SECONDARY;
1490                mtx_lock(&uma_mtx);
1491                ZONE_LOCK(zone);
1492                LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1493                        if (LIST_NEXT(z, uz_link) == NULL) {
1494                                LIST_INSERT_AFTER(z, zone, uz_link);
1495                                break;
1496                        }
1497                }
1498                ZONE_UNLOCK(zone);
1499                mtx_unlock(&uma_mtx);
1500        } else if (keg == NULL) {
1501                if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1502                    arg->align, arg->flags)) == NULL)
1503                        return (ENOMEM);
1504        } else {
1505                struct uma_kctor_args karg;
1506                int error;
1507
1508                /* We should only be here from uma_startup() */
1509                karg.size = arg->size;
1510                karg.uminit = arg->uminit;
1511                karg.fini = arg->fini;
1512                karg.align = arg->align;
1513                karg.flags = arg->flags;
1514                karg.zone = zone;
1515                error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1516                    flags);
1517                if (error)
1518                        return (error);
1519        }
1520        /*
1521         * Link in the first keg.
1522         */
1523        zone->uz_klink.kl_keg = keg;
1524        LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
1525        zone->uz_lock = &keg->uk_lock;
1526        zone->uz_size = keg->uk_size;
1527        zone->uz_flags |= (keg->uk_flags &
1528            (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
1529
1530        /*
1531         * Some internal zones don't have room allocated for the per cpu
1532         * caches.  If we're internal, bail out here.
1533         */
1534        if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1535                KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
1536                    ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1537                return (0);
1538        }
1539
1540        if (keg->uk_flags & UMA_ZONE_MAXBUCKET)
1541                zone->uz_count = BUCKET_MAX;
1542        else if (keg->uk_ipers <= BUCKET_MAX)
1543                zone->uz_count = keg->uk_ipers;
1544        else
1545                zone->uz_count = BUCKET_MAX;
1546        return (0);
1547}
1548
1549/*
1550 * Keg header dtor.  This frees all data, destroys locks, frees the hash
1551 * table and removes the keg from the global list.
1552 *
1553 * Arguments/Returns follow uma_dtor specifications
1554 *      udata  unused
1555 */
1556static void
1557keg_dtor(void *arg, int size, void *udata)
1558{
1559        uma_keg_t keg;
1560
1561        keg = (uma_keg_t)arg;
1562        KEG_LOCK(keg);
1563        if (keg->uk_free != 0) {
1564                printf("Freed UMA keg (%s) was not empty (%d items). "
1565                    " Lost %d pages of memory.\n",
1566                    keg->uk_name ? keg->uk_name : "",
1567                    keg->uk_free, keg->uk_pages);
1568        }
1569        KEG_UNLOCK(keg);
1570
1571        hash_free(&keg->uk_hash);
1572
1573        KEG_LOCK_FINI(keg);
1574}
1575
1576/*
1577 * Zone header dtor.
1578 *
1579 * Arguments/Returns follow uma_dtor specifications
1580 *      udata  unused
1581 */
1582static void
1583zone_dtor(void *arg, int size, void *udata)
1584{
1585        uma_klink_t klink;
1586        uma_zone_t zone;
1587        uma_keg_t keg;
1588
1589        zone = (uma_zone_t)arg;
1590        keg = zone_first_keg(zone);
1591
1592        if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
1593                cache_drain(zone);
1594
1595        mtx_lock(&uma_mtx);
1596        LIST_REMOVE(zone, uz_link);
1597        mtx_unlock(&uma_mtx);
1598        /*
1599         * XXX there are some races here where
1600         * the zone can be drained but zone lock
1601         * released and then refilled before we
1602         * remove it... we dont care for now
1603         */
1604        zone_drain_wait(zone, M_WAITOK);
1605        /*
1606         * Unlink all of our kegs.
1607         */
1608        while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
1609                klink->kl_keg = NULL;
1610                LIST_REMOVE(klink, kl_link);
1611                if (klink == &zone->uz_klink)
1612                        continue;
1613                free(klink, M_TEMP);
1614        }
1615        /*
1616         * We only destroy kegs from non secondary zones.
1617         */
1618        if ((zone->uz_flags & UMA_ZONE_SECONDARY) == 0)  {
1619                mtx_lock(&uma_mtx);
1620                LIST_REMOVE(keg, uk_link);
1621                mtx_unlock(&uma_mtx);
1622                zone_free_item(kegs, keg, NULL, SKIP_NONE,
1623                    ZFREE_STATFREE);
1624        }
1625}
1626
1627/*
1628 * Traverses every zone in the system and calls a callback
1629 *
1630 * Arguments:
1631 *      zfunc  A pointer to a function which accepts a zone
1632 *              as an argument.
1633 *
1634 * Returns:
1635 *      Nothing
1636 */
1637static void
1638zone_foreach(void (*zfunc)(uma_zone_t))
1639{
1640        uma_keg_t keg;
1641        uma_zone_t zone;
1642
1643        mtx_lock(&uma_mtx);
1644        LIST_FOREACH(keg, &uma_kegs, uk_link) {
1645                LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1646                        zfunc(zone);
1647        }
1648        mtx_unlock(&uma_mtx);
1649}
1650
1651/* Public functions */
1652/* See uma.h */
1653void
1654uma_startup(void *bootmem, int boot_pages)
1655{
1656        struct uma_zctor_args args;
1657#ifndef __rtems__
1658        uma_slab_t slab;
1659#endif /* __rtems__ */
1660        u_int slabsize;
1661        u_int objsize, totsize, wsize;
1662#ifndef __rtems__
1663        int i;
1664#endif /* __rtems__ */
1665
1666#ifdef UMA_DEBUG
1667        printf("Creating uma keg headers zone and keg.\n");
1668#endif
1669        mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1670
1671        /*
1672         * Figure out the maximum number of items-per-slab we'll have if
1673         * we're using the OFFPAGE slab header to track free items, given
1674         * all possible object sizes and the maximum desired wastage
1675         * (UMA_MAX_WASTE).
1676         *
1677         * We iterate until we find an object size for
1678         * which the calculated wastage in keg_small_init() will be
1679         * enough to warrant OFFPAGE.  Since wastedspace versus objsize
1680         * is an overall increasing see-saw function, we find the smallest
1681         * objsize such that the wastage is always acceptable for objects
1682         * with that objsize or smaller.  Since a smaller objsize always
1683         * generates a larger possible uma_max_ipers, we use this computed
1684         * objsize to calculate the largest ipers possible.  Since the
1685         * ipers calculated for OFFPAGE slab headers is always larger than
1686         * the ipers initially calculated in keg_small_init(), we use
1687         * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
1688         * obtain the maximum ipers possible for offpage slab headers.
1689         *
1690         * It should be noted that ipers versus objsize is an inversly
1691         * proportional function which drops off rather quickly so as
1692         * long as our UMA_MAX_WASTE is such that the objsize we calculate
1693         * falls into the portion of the inverse relation AFTER the steep
1694         * falloff, then uma_max_ipers shouldn't be too high (~10 on i386).
1695         *
1696         * Note that we have 8-bits (1 byte) to use as a freelist index
1697         * inside the actual slab header itself and this is enough to
1698         * accomodate us.  In the worst case, a UMA_SMALLEST_UNIT sized
1699         * object with offpage slab header would have ipers =
1700         * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is
1701         * 1 greater than what our byte-integer freelist index can
1702         * accomodate, but we know that this situation never occurs as
1703         * for UMA_SMALLEST_UNIT-sized objects, we will never calculate
1704         * that we need to go to offpage slab headers.  Or, if we do,
1705         * then we trap that condition below and panic in the INVARIANTS case.
1706         */
1707        wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
1708        totsize = wsize;
1709        objsize = UMA_SMALLEST_UNIT;
1710        while (totsize >= wsize) {
1711                totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) /
1712                    (objsize + UMA_FRITM_SZ);
1713                totsize *= (UMA_FRITM_SZ + objsize);
1714                objsize++;
1715        }
1716        if (objsize > UMA_SMALLEST_UNIT)
1717                objsize--;
1718        uma_max_ipers = MAX(UMA_SLAB_SIZE / objsize, 64);
1719
1720        wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
1721        totsize = wsize;
1722        objsize = UMA_SMALLEST_UNIT;
1723        while (totsize >= wsize) {
1724                totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) /
1725                    (objsize + UMA_FRITMREF_SZ);
1726                totsize *= (UMA_FRITMREF_SZ + objsize);
1727                objsize++;
1728        }
1729        if (objsize > UMA_SMALLEST_UNIT)
1730                objsize--;
1731        uma_max_ipers_ref = MAX(UMA_SLAB_SIZE / objsize, 64);
1732
1733        KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255),
1734            ("uma_startup: calculated uma_max_ipers values too large!"));
1735
1736#ifdef UMA_DEBUG
1737        printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers);
1738        printf("Calculated uma_max_ipers_ref (for OFFPAGE) is %d\n",
1739            uma_max_ipers_ref);
1740#endif
1741
1742        /* "manually" create the initial zone */
1743        args.name = "UMA Kegs";
1744        args.size = sizeof(struct uma_keg);
1745        args.ctor = keg_ctor;
1746        args.dtor = keg_dtor;
1747        args.uminit = zero_init;
1748        args.fini = NULL;
1749        args.keg = &masterkeg;
1750        args.align = 32 - 1;
1751        args.flags = UMA_ZFLAG_INTERNAL;
1752        /* The initial zone has no Per cpu queues so it's smaller */
1753        zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1754
1755#ifndef __rtems__
1756#ifdef UMA_DEBUG
1757        printf("Filling boot free list.\n");
1758#endif
1759        for (i = 0; i < boot_pages; i++) {
1760                slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1761                slab->us_data = (u_int8_t *)slab;
1762                slab->us_flags = UMA_SLAB_BOOT;
1763                LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1764        }
1765        mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
1766#endif /* __rtems__ */
1767
1768#ifdef UMA_DEBUG
1769        printf("Creating uma zone headers zone and keg.\n");
1770#endif
1771        args.name = "UMA Zones";
1772        args.size = sizeof(struct uma_zone) +
1773            (sizeof(struct uma_cache) * (mp_maxid + 1));
1774        args.ctor = zone_ctor;
1775        args.dtor = zone_dtor;
1776        args.uminit = zero_init;
1777        args.fini = NULL;
1778        args.keg = NULL;
1779        args.align = 32 - 1;
1780        args.flags = UMA_ZFLAG_INTERNAL;
1781        /* The initial zone has no Per cpu queues so it's smaller */
1782        zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1783
1784#ifdef UMA_DEBUG
1785        printf("Initializing pcpu cache locks.\n");
1786#endif
1787#ifdef UMA_DEBUG
1788        printf("Creating slab and hash zones.\n");
1789#endif
1790
1791        /*
1792         * This is the max number of free list items we'll have with
1793         * offpage slabs.
1794         */
1795        slabsize = uma_max_ipers * UMA_FRITM_SZ;
1796        slabsize += sizeof(struct uma_slab);
1797
1798        /* Now make a zone for slab headers */
1799        slabzone = uma_zcreate("UMA Slabs",
1800                                slabsize,
1801                                NULL, NULL, NULL, NULL,
1802                                UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1803
1804        /*
1805         * We also create a zone for the bigger slabs with reference
1806         * counts in them, to accomodate UMA_ZONE_REFCNT zones.
1807         */
1808        slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ;
1809        slabsize += sizeof(struct uma_slab_refcnt);
1810        slabrefzone = uma_zcreate("UMA RCntSlabs",
1811                                  slabsize,
1812                                  NULL, NULL, NULL, NULL,
1813                                  UMA_ALIGN_PTR,
1814                                  UMA_ZFLAG_INTERNAL);
1815
1816        hashzone = uma_zcreate("UMA Hash",
1817            sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1818            NULL, NULL, NULL, NULL,
1819            UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1820
1821        bucket_init();
1822
1823#ifndef __rtems__
1824        booted = UMA_STARTUP;
1825#endif /* __rtems__ */
1826
1827#ifdef UMA_DEBUG
1828        printf("UMA startup complete.\n");
1829#endif
1830}
1831#ifdef __rtems__
1832static void
1833rtems_bsd_uma_startup(void *unused)
1834{
1835        (void) unused;
1836
1837        uma_startup(NULL, 0);
1838}
1839
1840SYSINIT(rtems_bsd_uma_startup, SI_SUB_VM, SI_ORDER_SECOND,
1841    rtems_bsd_uma_startup, NULL);
1842#endif /* __rtems__ */
1843
1844#ifndef __rtems__
1845/* see uma.h */
1846void
1847uma_startup2(void)
1848{
1849        booted = UMA_STARTUP2;
1850        bucket_enable();
1851#ifdef UMA_DEBUG
1852        printf("UMA startup2 complete.\n");
1853#endif
1854}
1855#endif /* __rtems__ */
1856
1857/*
1858 * Initialize our callout handle
1859 *
1860 */
1861
1862static void
1863uma_startup3(void)
1864{
1865#ifdef UMA_DEBUG
1866        printf("Starting callout.\n");
1867#endif
1868        callout_init(&uma_callout, CALLOUT_MPSAFE);
1869        callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1870#ifdef UMA_DEBUG
1871        printf("UMA startup3 complete.\n");
1872#endif
1873}
1874
1875static uma_keg_t
1876uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1877                int align, u_int32_t flags)
1878{
1879        struct uma_kctor_args args;
1880
1881        args.size = size;
1882        args.uminit = uminit;
1883        args.fini = fini;
1884        args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
1885        args.flags = flags;
1886        args.zone = zone;
1887        return (zone_alloc_item(kegs, &args, M_WAITOK));
1888}
1889
1890/* See uma.h */
1891void
1892uma_set_align(int align)
1893{
1894
1895        if (align != UMA_ALIGN_CACHE)
1896                uma_align_cache = align;
1897}
1898
1899/* See uma.h */
1900uma_zone_t
1901uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1902                uma_init uminit, uma_fini fini, int align, u_int32_t flags)
1903
1904{
1905        struct uma_zctor_args args;
1906
1907        /* This stuff is essential for the zone ctor */
1908        args.name = name;
1909        args.size = size;
1910        args.ctor = ctor;
1911        args.dtor = dtor;
1912        args.uminit = uminit;
1913        args.fini = fini;
1914        args.align = align;
1915        args.flags = flags;
1916        args.keg = NULL;
1917
1918        return (zone_alloc_item(zones, &args, M_WAITOK));
1919}
1920
1921/* See uma.h */
1922uma_zone_t
1923uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1924                    uma_init zinit, uma_fini zfini, uma_zone_t master)
1925{
1926        struct uma_zctor_args args;
1927        uma_keg_t keg;
1928
1929        keg = zone_first_keg(master);
1930        args.name = name;
1931        args.size = keg->uk_size;
1932        args.ctor = ctor;
1933        args.dtor = dtor;
1934        args.uminit = zinit;
1935        args.fini = zfini;
1936        args.align = keg->uk_align;
1937        args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
1938        args.keg = keg;
1939
1940        /* XXX Attaches only one keg of potentially many. */
1941        return (zone_alloc_item(zones, &args, M_WAITOK));
1942}
1943
1944#ifndef __rtems__
1945static void
1946zone_lock_pair(uma_zone_t a, uma_zone_t b)
1947{
1948        if (a < b) {
1949                ZONE_LOCK(a);
1950                mtx_lock_flags(b->uz_lock, MTX_DUPOK);
1951        } else {
1952                ZONE_LOCK(b);
1953                mtx_lock_flags(a->uz_lock, MTX_DUPOK);
1954        }
1955}
1956
1957static void
1958zone_unlock_pair(uma_zone_t a, uma_zone_t b)
1959{
1960
1961        ZONE_UNLOCK(a);
1962        ZONE_UNLOCK(b);
1963}
1964
1965int
1966uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
1967{
1968        uma_klink_t klink;
1969        uma_klink_t kl;
1970        int error;
1971
1972        error = 0;
1973        klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK | M_ZERO);
1974
1975        zone_lock_pair(zone, master);
1976        /*
1977         * zone must use vtoslab() to resolve objects and must already be
1978         * a secondary.
1979         */
1980        if ((zone->uz_flags & (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY))
1981            != (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY)) {
1982                error = EINVAL;
1983                goto out;
1984        }
1985        /*
1986         * The new master must also use vtoslab().
1987         */
1988        if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
1989                error = EINVAL;
1990                goto out;
1991        }
1992        /*
1993         * Both must either be refcnt, or not be refcnt.
1994         */
1995        if ((zone->uz_flags & UMA_ZONE_REFCNT) !=
1996            (master->uz_flags & UMA_ZONE_REFCNT)) {
1997                error = EINVAL;
1998                goto out;
1999        }
2000        /*
2001         * The underlying object must be the same size.  rsize
2002         * may be different.
2003         */
2004        if (master->uz_size != zone->uz_size) {
2005                error = E2BIG;
2006                goto out;
2007        }
2008        /*
2009         * Put it at the end of the list.
2010         */
2011        klink->kl_keg = zone_first_keg(master);
2012        LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
2013                if (LIST_NEXT(kl, kl_link) == NULL) {
2014                        LIST_INSERT_AFTER(kl, klink, kl_link);
2015                        break;
2016                }
2017        }
2018        klink = NULL;
2019        zone->uz_flags |= UMA_ZFLAG_MULTI;
2020        zone->uz_slab = zone_fetch_slab_multi;
2021
2022out:
2023        zone_unlock_pair(zone, master);
2024        if (klink != NULL)
2025                free(klink, M_TEMP);
2026
2027        return (error);
2028}
2029#endif /* __rtems__ */
2030
2031
2032/* See uma.h */
2033void
2034uma_zdestroy(uma_zone_t zone)
2035{
2036
2037        zone_free_item(zones, zone, NULL, SKIP_NONE, ZFREE_STATFREE);
2038}
2039
2040/* See uma.h */
2041void *
2042uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
2043{
2044        void *item;
2045        uma_cache_t cache;
2046        uma_bucket_t bucket;
2047        int cpu;
2048
2049        /* This is the fast path allocation */
2050#ifdef UMA_DEBUG_ALLOC_1
2051        printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
2052#endif
2053        CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
2054            zone->uz_name, flags);
2055
2056        if (flags & M_WAITOK) {
2057                WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
2058                    "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
2059        }
2060
2061        /*
2062         * If possible, allocate from the per-CPU cache.  There are two
2063         * requirements for safe access to the per-CPU cache: (1) the thread
2064         * accessing the cache must not be preempted or yield during access,
2065         * and (2) the thread must not migrate CPUs without switching which
2066         * cache it accesses.  We rely on a critical section to prevent
2067         * preemption and migration.  We release the critical section in
2068         * order to acquire the zone mutex if we are unable to allocate from
2069         * the current cache; when we re-acquire the critical section, we
2070         * must detect and handle migration if it has occurred.
2071         */
2072zalloc_restart:
2073        critical_enter();
2074        cpu = curcpu;
2075        cache = &zone->uz_cpu[cpu];
2076
2077zalloc_start:
2078        bucket = cache->uc_allocbucket;
2079
2080        if (bucket) {
2081                if (bucket->ub_cnt > 0) {
2082                        bucket->ub_cnt--;
2083                        item = bucket->ub_bucket[bucket->ub_cnt];
2084#ifdef INVARIANTS
2085                        bucket->ub_bucket[bucket->ub_cnt] = NULL;
2086#endif
2087                        KASSERT(item != NULL,
2088                            ("uma_zalloc: Bucket pointer mangled."));
2089                        cache->uc_allocs++;
2090                        critical_exit();
2091#ifdef INVARIANTS
2092                        ZONE_LOCK(zone);
2093                        uma_dbg_alloc(zone, NULL, item);
2094                        ZONE_UNLOCK(zone);
2095#endif
2096                        if (zone->uz_ctor != NULL) {
2097                                if (zone->uz_ctor(item, zone->uz_size,
2098                                    udata, flags) != 0) {
2099                                        zone_free_item(zone, item, udata,
2100                                            SKIP_DTOR, ZFREE_STATFAIL |
2101                                            ZFREE_STATFREE);
2102                                        return (NULL);
2103                                }
2104                        }
2105                        if (flags & M_ZERO)
2106                                bzero(item, zone->uz_size);
2107                        return (item);
2108                } else if (cache->uc_freebucket) {
2109                        /*
2110                         * We have run out of items in our allocbucket.
2111                         * See if we can switch with our free bucket.
2112                         */
2113                        if (cache->uc_freebucket->ub_cnt > 0) {
2114#ifdef UMA_DEBUG_ALLOC
2115                                printf("uma_zalloc: Swapping empty with"
2116                                    " alloc.\n");
2117#endif
2118                                bucket = cache->uc_freebucket;
2119                                cache->uc_freebucket = cache->uc_allocbucket;
2120                                cache->uc_allocbucket = bucket;
2121
2122                                goto zalloc_start;
2123                        }
2124                }
2125        }
2126        /*
2127         * Attempt to retrieve the item from the per-CPU cache has failed, so
2128         * we must go back to the zone.  This requires the zone lock, so we
2129         * must drop the critical section, then re-acquire it when we go back
2130         * to the cache.  Since the critical section is released, we may be
2131         * preempted or migrate.  As such, make sure not to maintain any
2132         * thread-local state specific to the cache from prior to releasing
2133         * the critical section.
2134         */
2135        critical_exit();
2136        ZONE_LOCK(zone);
2137        critical_enter();
2138        cpu = curcpu;
2139        cache = &zone->uz_cpu[cpu];
2140        bucket = cache->uc_allocbucket;
2141        if (bucket != NULL) {
2142                if (bucket->ub_cnt > 0) {
2143                        ZONE_UNLOCK(zone);
2144                        goto zalloc_start;
2145                }
2146                bucket = cache->uc_freebucket;
2147                if (bucket != NULL && bucket->ub_cnt > 0) {
2148                        ZONE_UNLOCK(zone);
2149                        goto zalloc_start;
2150                }
2151        }
2152
2153        /* Since we have locked the zone we may as well send back our stats */
2154        zone->uz_allocs += cache->uc_allocs;
2155        cache->uc_allocs = 0;
2156        zone->uz_frees += cache->uc_frees;
2157        cache->uc_frees = 0;
2158
2159        /* Our old one is now a free bucket */
2160        if (cache->uc_allocbucket) {
2161                KASSERT(cache->uc_allocbucket->ub_cnt == 0,
2162                    ("uma_zalloc_arg: Freeing a non free bucket."));
2163                LIST_INSERT_HEAD(&zone->uz_free_bucket,
2164                    cache->uc_allocbucket, ub_link);
2165                cache->uc_allocbucket = NULL;
2166        }
2167
2168        /* Check the free list for a new alloc bucket */
2169        if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
2170                KASSERT(bucket->ub_cnt != 0,
2171                    ("uma_zalloc_arg: Returning an empty bucket."));
2172
2173                LIST_REMOVE(bucket, ub_link);
2174                cache->uc_allocbucket = bucket;
2175                ZONE_UNLOCK(zone);
2176                goto zalloc_start;
2177        }
2178        /* We are no longer associated with this CPU. */
2179        critical_exit();
2180
2181        /* Bump up our uz_count so we get here less */
2182        if (zone->uz_count < BUCKET_MAX)
2183                zone->uz_count++;
2184
2185        /*
2186         * Now lets just fill a bucket and put it on the free list.  If that
2187         * works we'll restart the allocation from the begining.
2188         */
2189        if (zone_alloc_bucket(zone, flags)) {
2190                ZONE_UNLOCK(zone);
2191                goto zalloc_restart;
2192        }
2193        ZONE_UNLOCK(zone);
2194        /*
2195         * We may not be able to get a bucket so return an actual item.
2196         */
2197#ifdef UMA_DEBUG
2198        printf("uma_zalloc_arg: Bucketzone returned NULL\n");
2199#endif
2200
2201        item = zone_alloc_item(zone, udata, flags);
2202        return (item);
2203}
2204
2205static uma_slab_t
2206keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
2207{
2208        uma_slab_t slab;
2209
2210        mtx_assert(&keg->uk_lock, MA_OWNED);
2211        slab = NULL;
2212
2213        for (;;) {
2214                /*
2215                 * Find a slab with some space.  Prefer slabs that are partially
2216                 * used over those that are totally full.  This helps to reduce
2217                 * fragmentation.
2218                 */
2219                if (keg->uk_free != 0) {
2220                        if (!LIST_EMPTY(&keg->uk_part_slab)) {
2221                                slab = LIST_FIRST(&keg->uk_part_slab);
2222                        } else {
2223                                slab = LIST_FIRST(&keg->uk_free_slab);
2224                                LIST_REMOVE(slab, us_link);
2225                                LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
2226                                    us_link);
2227                        }
2228                        MPASS(slab->us_keg == keg);
2229                        return (slab);
2230                }
2231
2232                /*
2233                 * M_NOVM means don't ask at all!
2234                 */
2235                if (flags & M_NOVM)
2236                        break;
2237
2238                if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
2239                        keg->uk_flags |= UMA_ZFLAG_FULL;
2240                        /*
2241                         * If this is not a multi-zone, set the FULL bit.
2242                         * Otherwise slab_multi() takes care of it.
2243                         */
2244                        if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0)
2245                                zone->uz_flags |= UMA_ZFLAG_FULL;
2246                        if (flags & M_NOWAIT)
2247                                break;
2248                        zone->uz_sleeps++;
2249                        msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
2250                        continue;
2251                }
2252                keg->uk_recurse++;
2253                slab = keg_alloc_slab(keg, zone, flags);
2254                keg->uk_recurse--;
2255                /*
2256                 * If we got a slab here it's safe to mark it partially used
2257                 * and return.  We assume that the caller is going to remove
2258                 * at least one item.
2259                 */
2260                if (slab) {
2261                        MPASS(slab->us_keg == keg);
2262                        LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2263                        return (slab);
2264                }
2265                /*
2266                 * We might not have been able to get a slab but another cpu
2267                 * could have while we were unlocked.  Check again before we
2268                 * fail.
2269                 */
2270                flags |= M_NOVM;
2271        }
2272        return (slab);
2273}
2274
2275static inline void
2276zone_relock(uma_zone_t zone, uma_keg_t keg)
2277{
2278        if (zone->uz_lock != &keg->uk_lock) {
2279                KEG_UNLOCK(keg);
2280                ZONE_LOCK(zone);
2281        }
2282}
2283
2284static inline void
2285keg_relock(uma_keg_t keg, uma_zone_t zone)
2286{
2287        if (zone->uz_lock != &keg->uk_lock) {
2288                ZONE_UNLOCK(zone);
2289                KEG_LOCK(keg);
2290        }
2291}
2292
2293static uma_slab_t
2294zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
2295{
2296        uma_slab_t slab;
2297
2298        if (keg == NULL)
2299                keg = zone_first_keg(zone);
2300        /*
2301         * This is to prevent us from recursively trying to allocate
2302         * buckets.  The problem is that if an allocation forces us to
2303         * grab a new bucket we will call page_alloc, which will go off
2304         * and cause the vm to allocate vm_map_entries.  If we need new
2305         * buckets there too we will recurse in kmem_alloc and bad
2306         * things happen.  So instead we return a NULL bucket, and make
2307         * the code that allocates buckets smart enough to deal with it
2308         */
2309        if (keg->uk_flags & UMA_ZFLAG_BUCKET && keg->uk_recurse != 0)
2310                return (NULL);
2311
2312        for (;;) {
2313                slab = keg_fetch_slab(keg, zone, flags);
2314                if (slab)
2315                        return (slab);
2316                if (flags & (M_NOWAIT | M_NOVM))
2317                        break;
2318        }
2319        return (NULL);
2320}
2321
2322#ifndef __rtems__
2323/*
2324 * uma_zone_fetch_slab_multi:  Fetches a slab from one available keg.  Returns
2325 * with the keg locked.  Caller must call zone_relock() afterwards if the
2326 * zone lock is required.  On NULL the zone lock is held.
2327 *
2328 * The last pointer is used to seed the search.  It is not required.
2329 */
2330static uma_slab_t
2331zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
2332{
2333        uma_klink_t klink;
2334        uma_slab_t slab;
2335        uma_keg_t keg;
2336        int flags;
2337        int empty;
2338        int full;
2339
2340        /*
2341         * Don't wait on the first pass.  This will skip limit tests
2342         * as well.  We don't want to block if we can find a provider
2343         * without blocking.
2344         */
2345        flags = (rflags & ~M_WAITOK) | M_NOWAIT;
2346        /*
2347         * Use the last slab allocated as a hint for where to start
2348         * the search.
2349         */
2350        if (last) {
2351                slab = keg_fetch_slab(last, zone, flags);
2352                if (slab)
2353                        return (slab);
2354                zone_relock(zone, last);
2355                last = NULL;
2356        }
2357        /*
2358         * Loop until we have a slab incase of transient failures
2359         * while M_WAITOK is specified.  I'm not sure this is 100%
2360         * required but we've done it for so long now.
2361         */
2362        for (;;) {
2363                empty = 0;
2364                full = 0;
2365                /*
2366                 * Search the available kegs for slabs.  Be careful to hold the
2367                 * correct lock while calling into the keg layer.
2368                 */
2369                LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
2370                        keg = klink->kl_keg;
2371                        keg_relock(keg, zone);
2372                        if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
2373                                slab = keg_fetch_slab(keg, zone, flags);
2374                                if (slab)
2375                                        return (slab);
2376                        }
2377                        if (keg->uk_flags & UMA_ZFLAG_FULL)
2378                                full++;
2379                        else
2380                                empty++;
2381                        zone_relock(zone, keg);
2382                }
2383                if (rflags & (M_NOWAIT | M_NOVM))
2384                        break;
2385                flags = rflags;
2386                /*
2387                 * All kegs are full.  XXX We can't atomically check all kegs
2388                 * and sleep so just sleep for a short period and retry.
2389                 */
2390                if (full && !empty) {
2391                        zone->uz_flags |= UMA_ZFLAG_FULL;
2392                        zone->uz_sleeps++;
2393                        msleep(zone, zone->uz_lock, PVM, "zonelimit", hz/100);
2394                        zone->uz_flags &= ~UMA_ZFLAG_FULL;
2395                        continue;
2396                }
2397        }
2398        return (NULL);
2399}
2400#endif /* __rtems__ */
2401
2402static void *
2403slab_alloc_item(uma_zone_t zone, uma_slab_t slab)
2404{
2405        uma_keg_t keg;
2406        uma_slabrefcnt_t slabref;
2407        void *item;
2408        u_int8_t freei;
2409
2410        keg = slab->us_keg;
2411        mtx_assert(&keg->uk_lock, MA_OWNED);
2412
2413        freei = slab->us_firstfree;
2414        if (keg->uk_flags & UMA_ZONE_REFCNT) {
2415                slabref = (uma_slabrefcnt_t)slab;
2416                slab->us_firstfree = slabref->us_freelist[freei].us_item;
2417        } else {
2418                slab->us_firstfree = slab->us_freelist[freei].us_item;
2419        }
2420        item = slab->us_data + (keg->uk_rsize * freei);
2421
2422        slab->us_freecount--;
2423        keg->uk_free--;
2424#ifdef INVARIANTS
2425        uma_dbg_alloc(zone, slab, item);
2426#endif
2427        /* Move this slab to the full list */
2428        if (slab->us_freecount == 0) {
2429                LIST_REMOVE(slab, us_link);
2430                LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2431        }
2432
2433        return (item);
2434}
2435
2436static int
2437zone_alloc_bucket(uma_zone_t zone, int flags)
2438{
2439        uma_bucket_t bucket;
2440        uma_slab_t slab;
2441        uma_keg_t keg;
2442        int16_t saved;
2443        int max, origflags = flags;
2444
2445        /*
2446         * Try this zone's free list first so we don't allocate extra buckets.
2447         */
2448        if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2449                KASSERT(bucket->ub_cnt == 0,
2450                    ("zone_alloc_bucket: Bucket on free list is not empty."));
2451                LIST_REMOVE(bucket, ub_link);
2452        } else {
2453                int bflags;
2454
2455                bflags = (flags & ~M_ZERO);
2456                if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
2457                        bflags |= M_NOVM;
2458
2459                ZONE_UNLOCK(zone);
2460                bucket = bucket_alloc(zone->uz_count, bflags);
2461                ZONE_LOCK(zone);
2462        }
2463
2464        if (bucket == NULL) {
2465                return (0);
2466        }
2467
2468#ifdef SMP
2469        /*
2470         * This code is here to limit the number of simultaneous bucket fills
2471         * for any given zone to the number of per cpu caches in this zone. This
2472         * is done so that we don't allocate more memory than we really need.
2473         */
2474        if (zone->uz_fills >= mp_ncpus)
2475                goto done;
2476
2477#endif
2478        zone->uz_fills++;
2479
2480        max = MIN(bucket->ub_entries, zone->uz_count);
2481        /* Try to keep the buckets totally full */
2482        saved = bucket->ub_cnt;
2483        slab = NULL;
2484        keg = NULL;
2485        while (bucket->ub_cnt < max &&
2486            (slab = zone->uz_slab(zone, keg, flags)) != NULL) {
2487                keg = slab->us_keg;
2488                while (slab->us_freecount && bucket->ub_cnt < max) {
2489                        bucket->ub_bucket[bucket->ub_cnt++] =
2490                            slab_alloc_item(zone, slab);
2491                }
2492
2493                /* Don't block on the next fill */
2494                flags |= M_NOWAIT;
2495        }
2496        if (slab)
2497                zone_relock(zone, keg);
2498
2499        /*
2500         * We unlock here because we need to call the zone's init.
2501         * It should be safe to unlock because the slab dealt with
2502         * above is already on the appropriate list within the keg
2503         * and the bucket we filled is not yet on any list, so we
2504         * own it.
2505         */
2506        if (zone->uz_init != NULL) {
2507                int i;
2508
2509                ZONE_UNLOCK(zone);
2510                for (i = saved; i < bucket->ub_cnt; i++)
2511                        if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
2512                            origflags) != 0)
2513                                break;
2514                /*
2515                 * If we couldn't initialize the whole bucket, put the
2516                 * rest back onto the freelist.
2517                 */
2518                if (i != bucket->ub_cnt) {
2519                        int j;
2520
2521                        for (j = i; j < bucket->ub_cnt; j++) {
2522                                zone_free_item(zone, bucket->ub_bucket[j],
2523                                    NULL, SKIP_FINI, 0);
2524#ifdef INVARIANTS
2525                                bucket->ub_bucket[j] = NULL;
2526#endif
2527                        }
2528                        bucket->ub_cnt = i;
2529                }
2530                ZONE_LOCK(zone);
2531        }
2532
2533        zone->uz_fills--;
2534        if (bucket->ub_cnt != 0) {
2535                LIST_INSERT_HEAD(&zone->uz_full_bucket,
2536                    bucket, ub_link);
2537                return (1);
2538        }
2539#ifdef SMP
2540done:
2541#endif
2542        bucket_free(bucket);
2543
2544        return (0);
2545}
2546/*
2547 * Allocates an item for an internal zone
2548 *
2549 * Arguments
2550 *      zone   The zone to alloc for.
2551 *      udata  The data to be passed to the constructor.
2552 *      flags  M_WAITOK, M_NOWAIT, M_ZERO.
2553 *
2554 * Returns
2555 *      NULL if there is no memory and M_NOWAIT is set
2556 *      An item if successful
2557 */
2558
2559static void *
2560zone_alloc_item(uma_zone_t zone, void *udata, int flags)
2561{
2562        uma_slab_t slab;
2563        void *item;
2564
2565        item = NULL;
2566
2567#ifdef UMA_DEBUG_ALLOC
2568        printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2569#endif
2570        ZONE_LOCK(zone);
2571
2572        slab = zone->uz_slab(zone, NULL, flags);
2573        if (slab == NULL) {
2574                zone->uz_fails++;
2575                ZONE_UNLOCK(zone);
2576                return (NULL);
2577        }
2578
2579        item = slab_alloc_item(zone, slab);
2580
2581        zone_relock(zone, slab->us_keg);
2582        zone->uz_allocs++;
2583        ZONE_UNLOCK(zone);
2584
2585        /*
2586         * We have to call both the zone's init (not the keg's init)
2587         * and the zone's ctor.  This is because the item is going from
2588         * a keg slab directly to the user, and the user is expecting it
2589         * to be both zone-init'd as well as zone-ctor'd.
2590         */
2591        if (zone->uz_init != NULL) {
2592                if (zone->uz_init(item, zone->uz_size, flags) != 0) {
2593                        zone_free_item(zone, item, udata, SKIP_FINI,
2594                            ZFREE_STATFAIL | ZFREE_STATFREE);
2595                        return (NULL);
2596                }
2597        }
2598        if (zone->uz_ctor != NULL) {
2599                if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2600                        zone_free_item(zone, item, udata, SKIP_DTOR,
2601                            ZFREE_STATFAIL | ZFREE_STATFREE);
2602                        return (NULL);
2603                }
2604        }
2605        if (flags & M_ZERO)
2606                bzero(item, zone->uz_size);
2607
2608        return (item);
2609}
2610
2611/* See uma.h */
2612void
2613uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
2614{
2615        uma_cache_t cache;
2616        uma_bucket_t bucket;
2617        int bflags;
2618        int cpu;
2619
2620#ifdef UMA_DEBUG_ALLOC_1
2621        printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2622#endif
2623        CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2624            zone->uz_name);
2625
2626        /* uma_zfree(..., NULL) does nothing, to match free(9). */
2627        if (item == NULL)
2628                return;
2629
2630        if (zone->uz_dtor)
2631                zone->uz_dtor(item, zone->uz_size, udata);
2632
2633#ifdef INVARIANTS
2634        ZONE_LOCK(zone);
2635        if (zone->uz_flags & UMA_ZONE_MALLOC)
2636                uma_dbg_free(zone, udata, item);
2637        else
2638                uma_dbg_free(zone, NULL, item);
2639        ZONE_UNLOCK(zone);
2640#endif
2641        /*
2642         * The race here is acceptable.  If we miss it we'll just have to wait
2643         * a little longer for the limits to be reset.
2644         */
2645        if (zone->uz_flags & UMA_ZFLAG_FULL)
2646                goto zfree_internal;
2647
2648        /*
2649         * If possible, free to the per-CPU cache.  There are two
2650         * requirements for safe access to the per-CPU cache: (1) the thread
2651         * accessing the cache must not be preempted or yield during access,
2652         * and (2) the thread must not migrate CPUs without switching which
2653         * cache it accesses.  We rely on a critical section to prevent
2654         * preemption and migration.  We release the critical section in
2655         * order to acquire the zone mutex if we are unable to free to the
2656         * current cache; when we re-acquire the critical section, we must
2657         * detect and handle migration if it has occurred.
2658         */
2659zfree_restart:
2660        critical_enter();
2661        cpu = curcpu;
2662        cache = &zone->uz_cpu[cpu];
2663
2664zfree_start:
2665        bucket = cache->uc_freebucket;
2666
2667        if (bucket) {
2668                /*
2669                 * Do we have room in our bucket? It is OK for this uz count
2670                 * check to be slightly out of sync.
2671                 */
2672
2673                if (bucket->ub_cnt < bucket->ub_entries) {
2674                        KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2675                            ("uma_zfree: Freeing to non free bucket index."));
2676                        bucket->ub_bucket[bucket->ub_cnt] = item;
2677                        bucket->ub_cnt++;
2678                        cache->uc_frees++;
2679                        critical_exit();
2680                        return;
2681                } else if (cache->uc_allocbucket) {
2682#ifdef UMA_DEBUG_ALLOC
2683                        printf("uma_zfree: Swapping buckets.\n");
2684#endif
2685                        /*
2686                         * We have run out of space in our freebucket.
2687                         * See if we can switch with our alloc bucket.
2688                         */
2689                        if (cache->uc_allocbucket->ub_cnt <
2690                            cache->uc_freebucket->ub_cnt) {
2691                                bucket = cache->uc_freebucket;
2692                                cache->uc_freebucket = cache->uc_allocbucket;
2693                                cache->uc_allocbucket = bucket;
2694                                goto zfree_start;
2695                        }
2696                }
2697        }
2698        /*
2699         * We can get here for two reasons:
2700         *
2701         * 1) The buckets are NULL
2702         * 2) The alloc and free buckets are both somewhat full.
2703         *
2704         * We must go back the zone, which requires acquiring the zone lock,
2705         * which in turn means we must release and re-acquire the critical
2706         * section.  Since the critical section is released, we may be
2707         * preempted or migrate.  As such, make sure not to maintain any
2708         * thread-local state specific to the cache from prior to releasing
2709         * the critical section.
2710         */
2711        critical_exit();
2712        ZONE_LOCK(zone);
2713        critical_enter();
2714        cpu = curcpu;
2715        cache = &zone->uz_cpu[cpu];
2716        if (cache->uc_freebucket != NULL) {
2717                if (cache->uc_freebucket->ub_cnt <
2718                    cache->uc_freebucket->ub_entries) {
2719                        ZONE_UNLOCK(zone);
2720                        goto zfree_start;
2721                }
2722                if (cache->uc_allocbucket != NULL &&
2723                    (cache->uc_allocbucket->ub_cnt <
2724                    cache->uc_freebucket->ub_cnt)) {
2725                        ZONE_UNLOCK(zone);
2726                        goto zfree_start;
2727                }
2728        }
2729
2730        /* Since we have locked the zone we may as well send back our stats */
2731        zone->uz_allocs += cache->uc_allocs;
2732        cache->uc_allocs = 0;
2733        zone->uz_frees += cache->uc_frees;
2734        cache->uc_frees = 0;
2735
2736        bucket = cache->uc_freebucket;
2737        cache->uc_freebucket = NULL;
2738
2739        /* Can we throw this on the zone full list? */
2740        if (bucket != NULL) {
2741#ifdef UMA_DEBUG_ALLOC
2742                printf("uma_zfree: Putting old bucket on the free list.\n");
2743#endif
2744                /* ub_cnt is pointing to the last free item */
2745                KASSERT(bucket->ub_cnt != 0,
2746                    ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2747                LIST_INSERT_HEAD(&zone->uz_full_bucket,
2748                    bucket, ub_link);
2749        }
2750        if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2751                LIST_REMOVE(bucket, ub_link);
2752                ZONE_UNLOCK(zone);
2753                cache->uc_freebucket = bucket;
2754                goto zfree_start;
2755        }
2756        /* We are no longer associated with this CPU. */
2757        critical_exit();
2758
2759        /* And the zone.. */
2760        ZONE_UNLOCK(zone);
2761
2762#ifdef UMA_DEBUG_ALLOC
2763        printf("uma_zfree: Allocating new free bucket.\n");
2764#endif
2765        bflags = M_NOWAIT;
2766
2767        if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
2768                bflags |= M_NOVM;
2769        bucket = bucket_alloc(zone->uz_count, bflags);
2770        if (bucket) {
2771                ZONE_LOCK(zone);
2772                LIST_INSERT_HEAD(&zone->uz_free_bucket,
2773                    bucket, ub_link);
2774                ZONE_UNLOCK(zone);
2775                goto zfree_restart;
2776        }
2777
2778        /*
2779         * If nothing else caught this, we'll just do an internal free.
2780         */
2781zfree_internal:
2782        zone_free_item(zone, item, udata, SKIP_DTOR, ZFREE_STATFREE);
2783
2784        return;
2785}
2786
2787/*
2788 * Frees an item to an INTERNAL zone or allocates a free bucket
2789 *
2790 * Arguments:
2791 *      zone   The zone to free to
2792 *      item   The item we're freeing
2793 *      udata  User supplied data for the dtor
2794 *      skip   Skip dtors and finis
2795 */
2796static void
2797zone_free_item(uma_zone_t zone, void *item, void *udata,
2798    enum zfreeskip skip, int flags)
2799{
2800        uma_slab_t slab;
2801        uma_slabrefcnt_t slabref;
2802        uma_keg_t keg;
2803        u_int8_t *mem;
2804        u_int8_t freei;
2805        int clearfull;
2806
2807        if (skip < SKIP_DTOR && zone->uz_dtor)
2808                zone->uz_dtor(item, zone->uz_size, udata);
2809
2810        if (skip < SKIP_FINI && zone->uz_fini)
2811                zone->uz_fini(item, zone->uz_size);
2812
2813        ZONE_LOCK(zone);
2814
2815        if (flags & ZFREE_STATFAIL)
2816                zone->uz_fails++;
2817        if (flags & ZFREE_STATFREE)
2818                zone->uz_frees++;
2819
2820        if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
2821                mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
2822                keg = zone_first_keg(zone); /* Must only be one. */
2823                if (zone->uz_flags & UMA_ZONE_HASH) {
2824                        slab = hash_sfind(&keg->uk_hash, mem);
2825                } else {
2826                        mem += keg->uk_pgoff;
2827                        slab = (uma_slab_t)mem;
2828                }
2829        } else {
2830                /* This prevents redundant lookups via free(). */
2831                if ((zone->uz_flags & UMA_ZONE_MALLOC) && udata != NULL)
2832                        slab = (uma_slab_t)udata;
2833                else
2834                        slab = vtoslab((vm_offset_t)item);
2835                keg = slab->us_keg;
2836                keg_relock(keg, zone);
2837        }
2838        MPASS(keg == slab->us_keg);
2839
2840        /* Do we need to remove from any lists? */
2841        if (slab->us_freecount+1 == keg->uk_ipers) {
2842                LIST_REMOVE(slab, us_link);
2843                LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2844        } else if (slab->us_freecount == 0) {
2845                LIST_REMOVE(slab, us_link);
2846                LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2847        }
2848
2849        /* Slab management stuff */
2850        freei = ((unsigned long)item - (unsigned long)slab->us_data)
2851                / keg->uk_rsize;
2852
2853#ifdef INVARIANTS
2854        if (!skip)
2855                uma_dbg_free(zone, slab, item);
2856#endif
2857
2858        if (keg->uk_flags & UMA_ZONE_REFCNT) {
2859                slabref = (uma_slabrefcnt_t)slab;
2860                slabref->us_freelist[freei].us_item = slab->us_firstfree;
2861        } else {
2862                slab->us_freelist[freei].us_item = slab->us_firstfree;
2863        }
2864        slab->us_firstfree = freei;
2865        slab->us_freecount++;
2866
2867        /* Zone statistics */
2868        keg->uk_free++;
2869
2870        clearfull = 0;
2871        if (keg->uk_flags & UMA_ZFLAG_FULL) {
2872                if (keg->uk_pages < keg->uk_maxpages) {
2873                        keg->uk_flags &= ~UMA_ZFLAG_FULL;
2874                        clearfull = 1;
2875                }
2876
2877                /*
2878                 * We can handle one more allocation. Since we're clearing ZFLAG_FULL,
2879                 * wake up all procs blocked on pages. This should be uncommon, so
2880                 * keeping this simple for now (rather than adding count of blocked
2881                 * threads etc).
2882                 */
2883                wakeup(keg);
2884        }
2885        if (clearfull) {
2886                zone_relock(zone, keg);
2887                zone->uz_flags &= ~UMA_ZFLAG_FULL;
2888                wakeup(zone);
2889                ZONE_UNLOCK(zone);
2890        } else
2891                KEG_UNLOCK(keg);
2892}
2893
2894/* See uma.h */
2895int
2896uma_zone_set_max(uma_zone_t zone, int nitems)
2897{
2898        uma_keg_t keg;
2899
2900        ZONE_LOCK(zone);
2901        keg = zone_first_keg(zone);
2902        keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
2903        if (keg->uk_maxpages * keg->uk_ipers < nitems)
2904                keg->uk_maxpages += keg->uk_ppera;
2905        nitems = keg->uk_maxpages * keg->uk_ipers;
2906        ZONE_UNLOCK(zone);
2907
2908        return (nitems);
2909}
2910
2911/* See uma.h */
2912int
2913uma_zone_get_max(uma_zone_t zone)
2914{
2915        int nitems;
2916        uma_keg_t keg;
2917
2918        ZONE_LOCK(zone);
2919        keg = zone_first_keg(zone);
2920        nitems = keg->uk_maxpages * keg->uk_ipers;
2921        ZONE_UNLOCK(zone);
2922
2923        return (nitems);
2924}
2925
2926/* See uma.h */
2927int
2928uma_zone_get_cur(uma_zone_t zone)
2929{
2930        int64_t nitems;
2931        u_int i;
2932
2933        ZONE_LOCK(zone);
2934        nitems = zone->uz_allocs - zone->uz_frees;
2935        CPU_FOREACH(i) {
2936                /*
2937                 * See the comment in sysctl_vm_zone_stats() regarding the
2938                 * safety of accessing the per-cpu caches. With the zone lock
2939                 * held, it is safe, but can potentially result in stale data.
2940                 */
2941                nitems += zone->uz_cpu[i].uc_allocs -
2942                    zone->uz_cpu[i].uc_frees;
2943        }
2944        ZONE_UNLOCK(zone);
2945
2946        return (nitems < 0 ? 0 : nitems);
2947}
2948
2949/* See uma.h */
2950void
2951uma_zone_set_init(uma_zone_t zone, uma_init uminit)
2952{
2953        uma_keg_t keg;
2954
2955        ZONE_LOCK(zone);
2956        keg = zone_first_keg(zone);
2957        KASSERT(keg->uk_pages == 0,
2958            ("uma_zone_set_init on non-empty keg"));
2959        keg->uk_init = uminit;
2960        ZONE_UNLOCK(zone);
2961}
2962
2963/* See uma.h */
2964void
2965uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
2966{
2967        uma_keg_t keg;
2968
2969        ZONE_LOCK(zone);
2970        keg = zone_first_keg(zone);
2971        KASSERT(keg->uk_pages == 0,
2972            ("uma_zone_set_fini on non-empty keg"));
2973        keg->uk_fini = fini;
2974        ZONE_UNLOCK(zone);
2975}
2976
2977/* See uma.h */
2978void
2979uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
2980{
2981        ZONE_LOCK(zone);
2982        KASSERT(zone_first_keg(zone)->uk_pages == 0,
2983            ("uma_zone_set_zinit on non-empty keg"));
2984        zone->uz_init = zinit;
2985        ZONE_UNLOCK(zone);
2986}
2987
2988/* See uma.h */
2989void
2990uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
2991{
2992        ZONE_LOCK(zone);
2993        KASSERT(zone_first_keg(zone)->uk_pages == 0,
2994            ("uma_zone_set_zfini on non-empty keg"));
2995        zone->uz_fini = zfini;
2996        ZONE_UNLOCK(zone);
2997}
2998
2999/* See uma.h */
3000/* XXX uk_freef is not actually used with the zone locked */
3001void
3002uma_zone_set_freef(uma_zone_t zone, uma_free freef)
3003{
3004
3005        ZONE_LOCK(zone);
3006        zone_first_keg(zone)->uk_freef = freef;
3007        ZONE_UNLOCK(zone);
3008}
3009
3010/* See uma.h */
3011/* XXX uk_allocf is not actually used with the zone locked */
3012void
3013uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
3014{
3015        uma_keg_t keg;
3016
3017        ZONE_LOCK(zone);
3018        keg = zone_first_keg(zone);
3019        keg->uk_flags |= UMA_ZFLAG_PRIVALLOC;
3020        keg->uk_allocf = allocf;
3021        ZONE_UNLOCK(zone);
3022}
3023
3024#ifndef __rtems__
3025/* See uma.h */
3026int
3027uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
3028{
3029        uma_keg_t keg;
3030        vm_offset_t kva;
3031        int pages;
3032
3033        keg = zone_first_keg(zone);
3034        pages = count / keg->uk_ipers;
3035
3036        if (pages * keg->uk_ipers < count)
3037                pages++;
3038
3039        kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE);
3040
3041        if (kva == 0)
3042                return (0);
3043        if (obj == NULL)
3044                obj = vm_object_allocate(OBJT_PHYS, pages);
3045        else {
3046                VM_OBJECT_LOCK_INIT(obj, "uma object");
3047                _vm_object_allocate(OBJT_PHYS, pages, obj);
3048        }
3049        ZONE_LOCK(zone);
3050        keg->uk_kva = kva;
3051        keg->uk_obj = obj;
3052        keg->uk_maxpages = pages;
3053        keg->uk_allocf = obj_alloc;
3054        keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC;
3055        ZONE_UNLOCK(zone);
3056        return (1);
3057}
3058#endif /* __rtems__ */
3059
3060/* See uma.h */
3061void
3062uma_prealloc(uma_zone_t zone, int items)
3063{
3064        int slabs;
3065        uma_slab_t slab;
3066        uma_keg_t keg;
3067
3068        keg = zone_first_keg(zone);
3069        ZONE_LOCK(zone);
3070        slabs = items / keg->uk_ipers;
3071        if (slabs * keg->uk_ipers < items)
3072                slabs++;
3073        while (slabs > 0) {
3074                slab = keg_alloc_slab(keg, zone, M_WAITOK);
3075                if (slab == NULL)
3076                        break;
3077                MPASS(slab->us_keg == keg);
3078                LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
3079                slabs--;
3080        }
3081        ZONE_UNLOCK(zone);
3082}
3083
3084/* See uma.h */
3085u_int32_t *
3086uma_find_refcnt(uma_zone_t zone, void *item)
3087{
3088        uma_slabrefcnt_t slabref;
3089        uma_keg_t keg;
3090        u_int32_t *refcnt;
3091        int idx;
3092
3093        slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item &
3094            (~UMA_SLAB_MASK));
3095        keg = slabref->us_keg;
3096        KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT,
3097            ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
3098        idx = ((unsigned long)item - (unsigned long)slabref->us_data)
3099            / keg->uk_rsize;
3100        refcnt = &slabref->us_freelist[idx].us_refcnt;
3101        return refcnt;
3102}
3103
3104/* See uma.h */
3105void
3106uma_reclaim(void)
3107{
3108#ifdef UMA_DEBUG
3109        printf("UMA: vm asked us to release pages!\n");
3110#endif
3111        bucket_enable();
3112        zone_foreach(zone_drain);
3113        /*
3114         * Some slabs may have been freed but this zone will be visited early
3115         * we visit again so that we can free pages that are empty once other
3116         * zones are drained.  We have to do the same for buckets.
3117         */
3118        zone_drain(slabzone);
3119        zone_drain(slabrefzone);
3120        bucket_zone_drain();
3121}
3122
3123/* See uma.h */
3124int
3125uma_zone_exhausted(uma_zone_t zone)
3126{
3127        int full;
3128
3129        ZONE_LOCK(zone);
3130        full = (zone->uz_flags & UMA_ZFLAG_FULL);
3131        ZONE_UNLOCK(zone);
3132        return (full); 
3133}
3134
3135int
3136uma_zone_exhausted_nolock(uma_zone_t zone)
3137{
3138        return (zone->uz_flags & UMA_ZFLAG_FULL);
3139}
3140
3141#ifndef __rtems__
3142void *
3143uma_large_malloc(int size, int wait)
3144{
3145        void *mem;
3146        uma_slab_t slab;
3147        u_int8_t flags;
3148
3149        slab = zone_alloc_item(slabzone, NULL, wait);
3150        if (slab == NULL)
3151                return (NULL);
3152        mem = page_alloc(NULL, size, &flags, wait);
3153        if (mem) {
3154                vsetslab((vm_offset_t)mem, slab);
3155                slab->us_data = mem;
3156                slab->us_flags = flags | UMA_SLAB_MALLOC;
3157                slab->us_size = size;
3158        } else {
3159                zone_free_item(slabzone, slab, NULL, SKIP_NONE,
3160                    ZFREE_STATFAIL | ZFREE_STATFREE);
3161        }
3162
3163        return (mem);
3164}
3165
3166void
3167uma_large_free(uma_slab_t slab)
3168{
3169        vsetobj((vm_offset_t)slab->us_data, kmem_object);
3170        page_free(slab->us_data, slab->us_size, slab->us_flags);
3171        zone_free_item(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE);
3172}
3173#endif /* __rtems__ */
3174
3175void
3176uma_print_stats(void)
3177{
3178        zone_foreach(uma_print_zone);
3179}
3180
3181static void
3182slab_print(uma_slab_t slab)
3183{
3184        printf("slab: keg %p, data %p, freecount %d, firstfree %d\n",
3185                slab->us_keg, slab->us_data, slab->us_freecount,
3186                slab->us_firstfree);
3187}
3188
3189static void
3190cache_print(uma_cache_t cache)
3191{
3192        printf("alloc: %p(%d), free: %p(%d)\n",
3193                cache->uc_allocbucket,
3194                cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
3195                cache->uc_freebucket,
3196                cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
3197}
3198
3199static void
3200uma_print_keg(uma_keg_t keg)
3201{
3202        uma_slab_t slab;
3203
3204        printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
3205            "out %d free %d limit %d\n",
3206            keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
3207            keg->uk_ipers, keg->uk_ppera,
3208            (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free,
3209            (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
3210        printf("Part slabs:\n");
3211        LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
3212                slab_print(slab);
3213        printf("Free slabs:\n");
3214        LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
3215                slab_print(slab);
3216        printf("Full slabs:\n");
3217        LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
3218                slab_print(slab);
3219}
3220
3221void
3222uma_print_zone(uma_zone_t zone)
3223{
3224        uma_cache_t cache;
3225        uma_klink_t kl;
3226        int i;
3227
3228        printf("zone: %s(%p) size %d flags %#x\n",
3229            zone->uz_name, zone, zone->uz_size, zone->uz_flags);
3230        LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
3231                uma_print_keg(kl->kl_keg);
3232        CPU_FOREACH(i) {
3233                cache = &zone->uz_cpu[i];
3234                printf("CPU %d Cache:\n", i);
3235                cache_print(cache);
3236        }
3237}
3238
3239#ifndef __rtems__
3240#ifdef DDB
3241/*
3242 * Generate statistics across both the zone and its per-cpu cache's.  Return
3243 * desired statistics if the pointer is non-NULL for that statistic.
3244 *
3245 * Note: does not update the zone statistics, as it can't safely clear the
3246 * per-CPU cache statistic.
3247 *
3248 * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
3249 * safe from off-CPU; we should modify the caches to track this information
3250 * directly so that we don't have to.
3251 */
3252static void
3253uma_zone_sumstat(uma_zone_t z, int *cachefreep, u_int64_t *allocsp,
3254    u_int64_t *freesp, u_int64_t *sleepsp)
3255{
3256        uma_cache_t cache;
3257        u_int64_t allocs, frees, sleeps;
3258        int cachefree, cpu;
3259
3260        allocs = frees = sleeps = 0;
3261        cachefree = 0;
3262        CPU_FOREACH(cpu) {
3263                cache = &z->uz_cpu[cpu];
3264                if (cache->uc_allocbucket != NULL)
3265                        cachefree += cache->uc_allocbucket->ub_cnt;
3266                if (cache->uc_freebucket != NULL)
3267                        cachefree += cache->uc_freebucket->ub_cnt;
3268                allocs += cache->uc_allocs;
3269                frees += cache->uc_frees;
3270        }
3271        allocs += z->uz_allocs;
3272        frees += z->uz_frees;
3273        sleeps += z->uz_sleeps;
3274        if (cachefreep != NULL)
3275                *cachefreep = cachefree;
3276        if (allocsp != NULL)
3277                *allocsp = allocs;
3278        if (freesp != NULL)
3279                *freesp = frees;
3280        if (sleepsp != NULL)
3281                *sleepsp = sleeps;
3282}
3283#endif /* DDB */
3284#endif /* __rtems__ */
3285
3286static int
3287sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
3288{
3289        uma_keg_t kz;
3290        uma_zone_t z;
3291        int count;
3292
3293        count = 0;
3294        mtx_lock(&uma_mtx);
3295        LIST_FOREACH(kz, &uma_kegs, uk_link) {
3296                LIST_FOREACH(z, &kz->uk_zones, uz_link)
3297                        count++;
3298        }
3299        mtx_unlock(&uma_mtx);
3300        return (sysctl_handle_int(oidp, &count, 0, req));
3301}
3302
3303static int
3304sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
3305{
3306        struct uma_stream_header ush;
3307        struct uma_type_header uth;
3308        struct uma_percpu_stat ups;
3309        uma_bucket_t bucket;
3310        struct sbuf sbuf;
3311        uma_cache_t cache;
3312        uma_klink_t kl;
3313        uma_keg_t kz;
3314        uma_zone_t z;
3315        uma_keg_t k;
3316        int count, error, i;
3317
3318        error = sysctl_wire_old_buffer(req, 0);
3319        if (error != 0)
3320                return (error);
3321        sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
3322
3323        count = 0;
3324        mtx_lock(&uma_mtx);
3325        LIST_FOREACH(kz, &uma_kegs, uk_link) {
3326                LIST_FOREACH(z, &kz->uk_zones, uz_link)
3327                        count++;
3328        }
3329
3330        /*
3331         * Insert stream header.
3332         */
3333        bzero(&ush, sizeof(ush));
3334        ush.ush_version = UMA_STREAM_VERSION;
3335        ush.ush_maxcpus = (mp_maxid + 1);
3336        ush.ush_count = count;
3337        (void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
3338
3339        LIST_FOREACH(kz, &uma_kegs, uk_link) {
3340                LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3341                        bzero(&uth, sizeof(uth));
3342                        ZONE_LOCK(z);
3343                        strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
3344                        uth.uth_align = kz->uk_align;
3345                        uth.uth_size = kz->uk_size;
3346                        uth.uth_rsize = kz->uk_rsize;
3347                        LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
3348                                k = kl->kl_keg;
3349                                uth.uth_maxpages += k->uk_maxpages;
3350                                uth.uth_pages += k->uk_pages;
3351                                uth.uth_keg_free += k->uk_free;
3352                                uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
3353                                    * k->uk_ipers;
3354                        }
3355
3356                        /*
3357                         * A zone is secondary is it is not the first entry
3358                         * on the keg's zone list.
3359                         */
3360                        if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
3361                            (LIST_FIRST(&kz->uk_zones) != z))
3362                                uth.uth_zone_flags = UTH_ZONE_SECONDARY;
3363
3364                        LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
3365                                uth.uth_zone_free += bucket->ub_cnt;
3366                        uth.uth_allocs = z->uz_allocs;
3367                        uth.uth_frees = z->uz_frees;
3368                        uth.uth_fails = z->uz_fails;
3369                        uth.uth_sleeps = z->uz_sleeps;
3370                        (void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
3371                        /*
3372                         * While it is not normally safe to access the cache
3373                         * bucket pointers while not on the CPU that owns the
3374                         * cache, we only allow the pointers to be exchanged
3375                         * without the zone lock held, not invalidated, so
3376                         * accept the possible race associated with bucket
3377                         * exchange during monitoring.
3378                         */
3379                        for (i = 0; i < (mp_maxid + 1); i++) {
3380                                bzero(&ups, sizeof(ups));
3381                                if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
3382                                        goto skip;
3383                                if (CPU_ABSENT(i))
3384                                        goto skip;
3385                                cache = &z->uz_cpu[i];
3386                                if (cache->uc_allocbucket != NULL)
3387                                        ups.ups_cache_free +=
3388                                            cache->uc_allocbucket->ub_cnt;
3389                                if (cache->uc_freebucket != NULL)
3390                                        ups.ups_cache_free +=
3391                                            cache->uc_freebucket->ub_cnt;
3392                                ups.ups_allocs = cache->uc_allocs;
3393                                ups.ups_frees = cache->uc_frees;
3394skip:
3395                                (void)sbuf_bcat(&sbuf, &ups, sizeof(ups));
3396                        }
3397                        ZONE_UNLOCK(z);
3398                }
3399        }
3400        mtx_unlock(&uma_mtx);
3401        error = sbuf_finish(&sbuf);
3402        sbuf_delete(&sbuf);
3403        return (error);
3404}
3405
3406#ifndef __rtems__
3407#ifdef DDB
3408DB_SHOW_COMMAND(uma, db_show_uma)
3409{
3410        u_int64_t allocs, frees, sleeps;
3411        uma_bucket_t bucket;
3412        uma_keg_t kz;
3413        uma_zone_t z;
3414        int cachefree;
3415
3416        db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
3417            "Requests", "Sleeps");
3418        LIST_FOREACH(kz, &uma_kegs, uk_link) {
3419                LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3420                        if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
3421                                allocs = z->uz_allocs;
3422                                frees = z->uz_frees;
3423                                sleeps = z->uz_sleeps;
3424                                cachefree = 0;
3425                        } else
3426                                uma_zone_sumstat(z, &cachefree, &allocs,
3427                                    &frees, &sleeps);
3428                        if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
3429                            (LIST_FIRST(&kz->uk_zones) != z)))
3430                                cachefree += kz->uk_free;
3431                        LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
3432                                cachefree += bucket->ub_cnt;
3433                        db_printf("%18s %8ju %8jd %8d %12ju %8ju\n", z->uz_name,
3434                            (uintmax_t)kz->uk_size,
3435                            (intmax_t)(allocs - frees), cachefree,
3436                            (uintmax_t)allocs, sleeps);
3437                        if (db_pager_quit)
3438                                return;
3439                }
3440        }
3441}
3442#endif
3443#endif /* __rtems__ */
Note: See TracBrowser for help on using the repository browser.