source: rtems-libbsd/freebsd/sys/vm/uma_core.c @ 66659ff

4.1155-freebsd-126-freebsd-12freebsd-9.3
Last change on this file since 66659ff was 66659ff, checked in by Sebastian Huber <sebastian.huber@…>, on 11/06/13 at 15:20:21

Update to FreeBSD 9.2

  • Property mode set to 100644
File size: 84.8 KB
Line 
1#include <machine/rtems-bsd-kernel-space.h>
2
3/*-
4 * Copyright (c) 2002-2005, 2009 Jeffrey Roberson <jeff@FreeBSD.org>
5 * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
6 * Copyright (c) 2004-2006 Robert N. M. Watson
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice unmodified, this list of conditions, and the following
14 *    disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31/*
32 * uma_core.c  Implementation of the Universal Memory allocator
33 *
34 * This allocator is intended to replace the multitude of similar object caches
35 * in the standard FreeBSD kernel.  The intent is to be flexible as well as
36 * effecient.  A primary design goal is to return unused memory to the rest of
37 * the system.  This will make the system as a whole more flexible due to the
38 * ability to move memory to subsystems which most need it instead of leaving
39 * pools of reserved memory unused.
40 *
41 * The basic ideas stem from similar slab/zone based allocators whose algorithms
42 * are well known.
43 *
44 */
45
46/*
47 * TODO:
48 *      - Improve memory usage for large allocations
49 *      - Investigate cache size adjustments
50 */
51
52#include <sys/cdefs.h>
53__FBSDID("$FreeBSD$");
54
55/* I should really use ktr.. */
56/*
57#define UMA_DEBUG 1
58#define UMA_DEBUG_ALLOC 1
59#define UMA_DEBUG_ALLOC_1 1
60*/
61
62#include <rtems/bsd/local/opt_ddb.h>
63#include <rtems/bsd/local/opt_param.h>
64
65#include <rtems/bsd/sys/param.h>
66#include <sys/systm.h>
67#include <sys/kernel.h>
68#include <rtems/bsd/sys/types.h>
69#include <sys/queue.h>
70#include <sys/malloc.h>
71#include <sys/ktr.h>
72#include <rtems/bsd/sys/lock.h>
73#include <sys/sysctl.h>
74#include <sys/mutex.h>
75#include <sys/proc.h>
76#include <sys/sbuf.h>
77#include <sys/smp.h>
78#include <sys/vmmeter.h>
79
80#include <vm/vm.h>
81#include <vm/vm_object.h>
82#include <vm/vm_page.h>
83#include <vm/vm_param.h>
84#include <vm/vm_map.h>
85#include <vm/vm_kern.h>
86#include <vm/vm_extern.h>
87#include <vm/uma.h>
88#include <vm/uma_int.h>
89#include <vm/uma_dbg.h>
90
91#include <ddb/ddb.h>
92
93#ifdef __rtems__
94rtems_bsd_chunk_control rtems_bsd_uma_chunks;
95#endif /* __rtems__ */
96/*
97 * This is the zone and keg from which all zones are spawned.  The idea is that
98 * even the zone & keg heads are allocated from the allocator, so we use the
99 * bss section to bootstrap us.
100 */
101static struct uma_keg masterkeg;
102static struct uma_zone masterzone_k;
103static struct uma_zone masterzone_z;
104static uma_zone_t kegs = &masterzone_k;
105static uma_zone_t zones = &masterzone_z;
106
107/* This is the zone from which all of uma_slab_t's are allocated. */
108static uma_zone_t slabzone;
109static uma_zone_t slabrefzone;  /* With refcounters (for UMA_ZONE_REFCNT) */
110
111/*
112 * The initial hash tables come out of this zone so they can be allocated
113 * prior to malloc coming up.
114 */
115static uma_zone_t hashzone;
116
117/* The boot-time adjusted value for cache line alignment. */
118int uma_align_cache = 64 - 1;
119
120static MALLOC_DEFINE(M_UMAHASH, "UMAHash", "UMA Hash Buckets");
121
122#ifndef __rtems__
123/*
124 * Are we allowed to allocate buckets?
125 */
126static int bucketdisable = 1;
127#endif /* __rtems__ */
128
129/* Linked list of all kegs in the system */
130static LIST_HEAD(,uma_keg) uma_kegs = LIST_HEAD_INITIALIZER(uma_kegs);
131
132/* This mutex protects the keg list */
133static struct mtx uma_mtx;
134
135#ifndef __rtems__
136/* Linked list of boot time pages */
137static LIST_HEAD(,uma_slab) uma_boot_pages =
138    LIST_HEAD_INITIALIZER(uma_boot_pages);
139
140/* This mutex protects the boot time pages list */
141static struct mtx uma_boot_pages_mtx;
142
143/* Is the VM done starting up? */
144static int booted = 0;
145#define UMA_STARTUP     1
146#define UMA_STARTUP2    2
147#endif /* __rtems__ */
148
149/* Maximum number of allowed items-per-slab if the slab header is OFFPAGE */
150static u_int uma_max_ipers;
151static u_int uma_max_ipers_ref;
152
153/*
154 * This is the handle used to schedule events that need to happen
155 * outside of the allocation fast path.
156 */
157static struct callout uma_callout;
158#define UMA_TIMEOUT     20              /* Seconds for callout interval. */
159
160/*
161 * This structure is passed as the zone ctor arg so that I don't have to create
162 * a special allocation function just for zones.
163 */
164struct uma_zctor_args {
165        const char *name;
166        size_t size;
167        uma_ctor ctor;
168        uma_dtor dtor;
169        uma_init uminit;
170        uma_fini fini;
171        uma_keg_t keg;
172        int align;
173        u_int32_t flags;
174};
175
176struct uma_kctor_args {
177        uma_zone_t zone;
178        size_t size;
179        uma_init uminit;
180        uma_fini fini;
181        int align;
182        u_int32_t flags;
183};
184
185struct uma_bucket_zone {
186        uma_zone_t      ubz_zone;
187        char            *ubz_name;
188        int             ubz_entries;
189};
190
191#define BUCKET_MAX      128
192
193struct uma_bucket_zone bucket_zones[] = {
194        { NULL, "16 Bucket", 16 },
195        { NULL, "32 Bucket", 32 },
196        { NULL, "64 Bucket", 64 },
197        { NULL, "128 Bucket", 128 },
198        { NULL, NULL, 0}
199};
200
201#define BUCKET_SHIFT    4
202#define BUCKET_ZONES    ((BUCKET_MAX >> BUCKET_SHIFT) + 1)
203
204/*
205 * bucket_size[] maps requested bucket sizes to zones that allocate a bucket
206 * of approximately the right size.
207 */
208static uint8_t bucket_size[BUCKET_ZONES];
209
210/*
211 * Flags and enumerations to be passed to internal functions.
212 */
213enum zfreeskip { SKIP_NONE, SKIP_DTOR, SKIP_FINI };
214
215#define ZFREE_STATFAIL  0x00000001      /* Update zone failure statistic. */
216#define ZFREE_STATFREE  0x00000002      /* Update zone free statistic. */
217
218/* Prototypes.. */
219
220#ifndef __rtems__
221static void *obj_alloc(uma_zone_t, int, u_int8_t *, int);
222#endif /* __rtems__ */
223static void *page_alloc(uma_zone_t, int, u_int8_t *, int);
224#ifndef __rtems__
225static void *startup_alloc(uma_zone_t, int, u_int8_t *, int);
226#endif /* __rtems__ */
227static void page_free(void *, int, u_int8_t);
228static uma_slab_t keg_alloc_slab(uma_keg_t, uma_zone_t, int);
229static void cache_drain(uma_zone_t);
230static void bucket_drain(uma_zone_t, uma_bucket_t);
231static void bucket_cache_drain(uma_zone_t zone);
232static int keg_ctor(void *, int, void *, int);
233static void keg_dtor(void *, int, void *);
234static int zone_ctor(void *, int, void *, int);
235static void zone_dtor(void *, int, void *);
236static int zero_init(void *, int, int);
237static void keg_small_init(uma_keg_t keg);
238static void keg_large_init(uma_keg_t keg);
239static void zone_foreach(void (*zfunc)(uma_zone_t));
240static void zone_timeout(uma_zone_t zone);
241static int hash_alloc(struct uma_hash *);
242static int hash_expand(struct uma_hash *, struct uma_hash *);
243static void hash_free(struct uma_hash *hash);
244static void uma_timeout(void *);
245static void uma_startup3(void);
246static void *zone_alloc_item(uma_zone_t, void *, int);
247static void zone_free_item(uma_zone_t, void *, void *, enum zfreeskip,
248    int);
249static void bucket_enable(void);
250static void bucket_init(void);
251static uma_bucket_t bucket_alloc(int, int);
252static void bucket_free(uma_bucket_t);
253static void bucket_zone_drain(void);
254static int zone_alloc_bucket(uma_zone_t zone, int flags);
255static uma_slab_t zone_fetch_slab(uma_zone_t zone, uma_keg_t last, int flags);
256#ifndef __rtems__
257static uma_slab_t zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int flags);
258#endif /* __rtems__ */
259static void *slab_alloc_item(uma_zone_t zone, uma_slab_t slab);
260static uma_keg_t uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit,
261    uma_fini fini, int align, u_int32_t flags);
262static inline void zone_relock(uma_zone_t zone, uma_keg_t keg);
263static inline void keg_relock(uma_keg_t keg, uma_zone_t zone);
264
265void uma_print_zone(uma_zone_t);
266void uma_print_stats(void);
267#ifndef __rtems__
268static int sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS);
269static int sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS);
270#endif
271
272SYSINIT(uma_startup3, SI_SUB_VM_CONF, SI_ORDER_SECOND, uma_startup3, NULL);
273
274#ifndef __rtems__
275SYSCTL_PROC(_vm, OID_AUTO, zone_count, CTLFLAG_RD|CTLTYPE_INT,
276    0, 0, sysctl_vm_zone_count, "I", "Number of UMA zones");
277
278SYSCTL_PROC(_vm, OID_AUTO, zone_stats, CTLFLAG_RD|CTLTYPE_STRUCT,
279    0, 0, sysctl_vm_zone_stats, "s,struct uma_type_header", "Zone Stats");
280#endif /* __rtems__ */
281
282/*
283 * This routine checks to see whether or not it's safe to enable buckets.
284 */
285
286static void
287bucket_enable(void)
288{
289#ifndef __rtems__
290        bucketdisable = vm_page_count_min();
291#endif /* __rtems__ */
292}
293
294/*
295 * Initialize bucket_zones, the array of zones of buckets of various sizes.
296 *
297 * For each zone, calculate the memory required for each bucket, consisting
298 * of the header and an array of pointers.  Initialize bucket_size[] to point
299 * the range of appropriate bucket sizes at the zone.
300 */
301static void
302bucket_init(void)
303{
304        struct uma_bucket_zone *ubz;
305        int i;
306        int j;
307
308        for (i = 0, j = 0; bucket_zones[j].ubz_entries != 0; j++) {
309                int size;
310
311                ubz = &bucket_zones[j];
312                size = roundup(sizeof(struct uma_bucket), sizeof(void *));
313                size += sizeof(void *) * ubz->ubz_entries;
314                ubz->ubz_zone = uma_zcreate(ubz->ubz_name, size,
315                    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR,
316                    UMA_ZFLAG_INTERNAL | UMA_ZFLAG_BUCKET);
317                for (; i <= ubz->ubz_entries; i += (1 << BUCKET_SHIFT))
318                        bucket_size[i >> BUCKET_SHIFT] = j;
319        }
320}
321
322/*
323 * Given a desired number of entries for a bucket, return the zone from which
324 * to allocate the bucket.
325 */
326static struct uma_bucket_zone *
327bucket_zone_lookup(int entries)
328{
329        int idx;
330
331        idx = howmany(entries, 1 << BUCKET_SHIFT);
332        return (&bucket_zones[bucket_size[idx]]);
333}
334
335static uma_bucket_t
336bucket_alloc(int entries, int bflags)
337{
338        struct uma_bucket_zone *ubz;
339        uma_bucket_t bucket;
340
341#ifndef __rtems__
342        /*
343         * This is to stop us from allocating per cpu buckets while we're
344         * running out of vm.boot_pages.  Otherwise, we would exhaust the
345         * boot pages.  This also prevents us from allocating buckets in
346         * low memory situations.
347         */
348        if (bucketdisable)
349                return (NULL);
350#endif /* __rtems__ */
351
352        ubz = bucket_zone_lookup(entries);
353        bucket = zone_alloc_item(ubz->ubz_zone, NULL, bflags);
354        if (bucket) {
355#ifdef INVARIANTS
356                bzero(bucket->ub_bucket, sizeof(void *) * ubz->ubz_entries);
357#endif
358                bucket->ub_cnt = 0;
359                bucket->ub_entries = ubz->ubz_entries;
360        }
361
362        return (bucket);
363}
364
365static void
366bucket_free(uma_bucket_t bucket)
367{
368        struct uma_bucket_zone *ubz;
369
370        ubz = bucket_zone_lookup(bucket->ub_entries);
371        zone_free_item(ubz->ubz_zone, bucket, NULL, SKIP_NONE,
372            ZFREE_STATFREE);
373}
374
375static void
376bucket_zone_drain(void)
377{
378        struct uma_bucket_zone *ubz;
379
380        for (ubz = &bucket_zones[0]; ubz->ubz_entries != 0; ubz++)
381                zone_drain(ubz->ubz_zone);
382}
383
384static inline uma_keg_t
385zone_first_keg(uma_zone_t zone)
386{
387
388        return (LIST_FIRST(&zone->uz_kegs)->kl_keg);
389}
390
391static void
392zone_foreach_keg(uma_zone_t zone, void (*kegfn)(uma_keg_t))
393{
394        uma_klink_t klink;
395
396        LIST_FOREACH(klink, &zone->uz_kegs, kl_link)
397                kegfn(klink->kl_keg);
398}
399
400/*
401 * Routine called by timeout which is used to fire off some time interval
402 * based calculations.  (stats, hash size, etc.)
403 *
404 * Arguments:
405 *      arg   Unused
406 *
407 * Returns:
408 *      Nothing
409 */
410static void
411uma_timeout(void *unused)
412{
413        bucket_enable();
414        zone_foreach(zone_timeout);
415
416        /* Reschedule this event */
417        callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
418}
419
420/*
421 * Routine to perform timeout driven calculations.  This expands the
422 * hashes and does per cpu statistics aggregation.
423 *
424 *  Returns nothing.
425 */
426static void
427keg_timeout(uma_keg_t keg)
428{
429
430        KEG_LOCK(keg);
431        /*
432         * Expand the keg hash table.
433         *
434         * This is done if the number of slabs is larger than the hash size.
435         * What I'm trying to do here is completely reduce collisions.  This
436         * may be a little aggressive.  Should I allow for two collisions max?
437         */
438        if (keg->uk_flags & UMA_ZONE_HASH &&
439            keg->uk_pages / keg->uk_ppera >= keg->uk_hash.uh_hashsize) {
440                struct uma_hash newhash;
441                struct uma_hash oldhash;
442                int ret;
443
444                /*
445                 * This is so involved because allocating and freeing
446                 * while the keg lock is held will lead to deadlock.
447                 * I have to do everything in stages and check for
448                 * races.
449                 */
450                newhash = keg->uk_hash;
451                KEG_UNLOCK(keg);
452                ret = hash_alloc(&newhash);
453                KEG_LOCK(keg);
454                if (ret) {
455                        if (hash_expand(&keg->uk_hash, &newhash)) {
456                                oldhash = keg->uk_hash;
457                                keg->uk_hash = newhash;
458                        } else
459                                oldhash = newhash;
460
461                        KEG_UNLOCK(keg);
462                        hash_free(&oldhash);
463                        KEG_LOCK(keg);
464                }
465        }
466        KEG_UNLOCK(keg);
467}
468
469static void
470zone_timeout(uma_zone_t zone)
471{
472
473        zone_foreach_keg(zone, &keg_timeout);
474}
475
476/*
477 * Allocate and zero fill the next sized hash table from the appropriate
478 * backing store.
479 *
480 * Arguments:
481 *      hash  A new hash structure with the old hash size in uh_hashsize
482 *
483 * Returns:
484 *      1 on sucess and 0 on failure.
485 */
486static int
487hash_alloc(struct uma_hash *hash)
488{
489        int oldsize;
490        int alloc;
491
492        oldsize = hash->uh_hashsize;
493
494        /* We're just going to go to a power of two greater */
495        if (oldsize)  {
496                hash->uh_hashsize = oldsize * 2;
497                alloc = sizeof(hash->uh_slab_hash[0]) * hash->uh_hashsize;
498                hash->uh_slab_hash = (struct slabhead *)malloc(alloc,
499                    M_UMAHASH, M_NOWAIT);
500        } else {
501                alloc = sizeof(hash->uh_slab_hash[0]) * UMA_HASH_SIZE_INIT;
502                hash->uh_slab_hash = zone_alloc_item(hashzone, NULL,
503                    M_WAITOK);
504                hash->uh_hashsize = UMA_HASH_SIZE_INIT;
505        }
506        if (hash->uh_slab_hash) {
507                bzero(hash->uh_slab_hash, alloc);
508                hash->uh_hashmask = hash->uh_hashsize - 1;
509                return (1);
510        }
511
512        return (0);
513}
514
515/*
516 * Expands the hash table for HASH zones.  This is done from zone_timeout
517 * to reduce collisions.  This must not be done in the regular allocation
518 * path, otherwise, we can recurse on the vm while allocating pages.
519 *
520 * Arguments:
521 *      oldhash  The hash you want to expand
522 *      newhash  The hash structure for the new table
523 *
524 * Returns:
525 *      Nothing
526 *
527 * Discussion:
528 */
529static int
530hash_expand(struct uma_hash *oldhash, struct uma_hash *newhash)
531{
532        uma_slab_t slab;
533        int hval;
534        int i;
535
536        if (!newhash->uh_slab_hash)
537                return (0);
538
539        if (oldhash->uh_hashsize >= newhash->uh_hashsize)
540                return (0);
541
542        /*
543         * I need to investigate hash algorithms for resizing without a
544         * full rehash.
545         */
546
547        for (i = 0; i < oldhash->uh_hashsize; i++)
548                while (!SLIST_EMPTY(&oldhash->uh_slab_hash[i])) {
549                        slab = SLIST_FIRST(&oldhash->uh_slab_hash[i]);
550                        SLIST_REMOVE_HEAD(&oldhash->uh_slab_hash[i], us_hlink);
551                        hval = UMA_HASH(newhash, slab->us_data);
552                        SLIST_INSERT_HEAD(&newhash->uh_slab_hash[hval],
553                            slab, us_hlink);
554                }
555
556        return (1);
557}
558
559/*
560 * Free the hash bucket to the appropriate backing store.
561 *
562 * Arguments:
563 *      slab_hash  The hash bucket we're freeing
564 *      hashsize   The number of entries in that hash bucket
565 *
566 * Returns:
567 *      Nothing
568 */
569static void
570hash_free(struct uma_hash *hash)
571{
572        if (hash->uh_slab_hash == NULL)
573                return;
574        if (hash->uh_hashsize == UMA_HASH_SIZE_INIT)
575                zone_free_item(hashzone,
576                    hash->uh_slab_hash, NULL, SKIP_NONE, ZFREE_STATFREE);
577        else
578                free(hash->uh_slab_hash, M_UMAHASH);
579}
580
581/*
582 * Frees all outstanding items in a bucket
583 *
584 * Arguments:
585 *      zone   The zone to free to, must be unlocked.
586 *      bucket The free/alloc bucket with items, cpu queue must be locked.
587 *
588 * Returns:
589 *      Nothing
590 */
591
592static void
593bucket_drain(uma_zone_t zone, uma_bucket_t bucket)
594{
595        void *item;
596
597        if (bucket == NULL)
598                return;
599
600        while (bucket->ub_cnt > 0)  {
601                bucket->ub_cnt--;
602                item = bucket->ub_bucket[bucket->ub_cnt];
603#ifdef INVARIANTS
604                bucket->ub_bucket[bucket->ub_cnt] = NULL;
605                KASSERT(item != NULL,
606                    ("bucket_drain: botched ptr, item is NULL"));
607#endif
608                zone_free_item(zone, item, NULL, SKIP_DTOR, 0);
609        }
610}
611
612/*
613 * Drains the per cpu caches for a zone.
614 *
615 * NOTE: This may only be called while the zone is being turn down, and not
616 * during normal operation.  This is necessary in order that we do not have
617 * to migrate CPUs to drain the per-CPU caches.
618 *
619 * Arguments:
620 *      zone     The zone to drain, must be unlocked.
621 *
622 * Returns:
623 *      Nothing
624 */
625static void
626cache_drain(uma_zone_t zone)
627{
628        uma_cache_t cache;
629        int cpu;
630
631        /*
632         * XXX: It is safe to not lock the per-CPU caches, because we're
633         * tearing down the zone anyway.  I.e., there will be no further use
634         * of the caches at this point.
635         *
636         * XXX: It would good to be able to assert that the zone is being
637         * torn down to prevent improper use of cache_drain().
638         *
639         * XXX: We lock the zone before passing into bucket_cache_drain() as
640         * it is used elsewhere.  Should the tear-down path be made special
641         * there in some form?
642         */
643        CPU_FOREACH(cpu) {
644                cache = &zone->uz_cpu[cpu];
645                bucket_drain(zone, cache->uc_allocbucket);
646                bucket_drain(zone, cache->uc_freebucket);
647                if (cache->uc_allocbucket != NULL)
648                        bucket_free(cache->uc_allocbucket);
649                if (cache->uc_freebucket != NULL)
650                        bucket_free(cache->uc_freebucket);
651                cache->uc_allocbucket = cache->uc_freebucket = NULL;
652        }
653        ZONE_LOCK(zone);
654        bucket_cache_drain(zone);
655        ZONE_UNLOCK(zone);
656}
657
658/*
659 * Drain the cached buckets from a zone.  Expects a locked zone on entry.
660 */
661static void
662bucket_cache_drain(uma_zone_t zone)
663{
664        uma_bucket_t bucket;
665
666        /*
667         * Drain the bucket queues and free the buckets, we just keep two per
668         * cpu (alloc/free).
669         */
670        while ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
671                LIST_REMOVE(bucket, ub_link);
672                ZONE_UNLOCK(zone);
673                bucket_drain(zone, bucket);
674                bucket_free(bucket);
675                ZONE_LOCK(zone);
676        }
677
678        /* Now we do the free queue.. */
679        while ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
680                LIST_REMOVE(bucket, ub_link);
681                bucket_free(bucket);
682        }
683}
684
685/*
686 * Frees pages from a keg back to the system.  This is done on demand from
687 * the pageout daemon.
688 *
689 * Returns nothing.
690 */
691static void
692keg_drain(uma_keg_t keg)
693{
694        struct slabhead freeslabs = { 0 };
695        uma_slab_t slab;
696        uma_slab_t n;
697        u_int8_t flags;
698        u_int8_t *mem;
699        int i;
700
701        /*
702         * We don't want to take pages from statically allocated kegs at this
703         * time
704         */
705        if (keg->uk_flags & UMA_ZONE_NOFREE || keg->uk_freef == NULL)
706                return;
707
708#ifdef UMA_DEBUG
709        printf("%s free items: %u\n", keg->uk_name, keg->uk_free);
710#endif
711        KEG_LOCK(keg);
712        if (keg->uk_free == 0)
713                goto finished;
714
715        slab = LIST_FIRST(&keg->uk_free_slab);
716        while (slab) {
717                n = LIST_NEXT(slab, us_link);
718
719                /* We have no where to free these to */
720                if (slab->us_flags & UMA_SLAB_BOOT) {
721                        slab = n;
722                        continue;
723                }
724
725                LIST_REMOVE(slab, us_link);
726                keg->uk_pages -= keg->uk_ppera;
727                keg->uk_free -= keg->uk_ipers;
728
729                if (keg->uk_flags & UMA_ZONE_HASH)
730                        UMA_HASH_REMOVE(&keg->uk_hash, slab, slab->us_data);
731
732                SLIST_INSERT_HEAD(&freeslabs, slab, us_hlink);
733
734                slab = n;
735        }
736finished:
737        KEG_UNLOCK(keg);
738
739        while ((slab = SLIST_FIRST(&freeslabs)) != NULL) {
740                SLIST_REMOVE(&freeslabs, slab, uma_slab, us_hlink);
741                if (keg->uk_fini)
742                        for (i = 0; i < keg->uk_ipers; i++)
743                                keg->uk_fini(
744                                    slab->us_data + (keg->uk_rsize * i),
745                                    keg->uk_size);
746                flags = slab->us_flags;
747                mem = slab->us_data;
748
749#ifndef __rtems__
750                if (keg->uk_flags & UMA_ZONE_VTOSLAB) {
751                        vm_object_t obj;
752
753                        if (flags & UMA_SLAB_KMEM)
754                                obj = kmem_object;
755                        else if (flags & UMA_SLAB_KERNEL)
756                                obj = kernel_object;
757                        else
758                                obj = NULL;
759                        for (i = 0; i < keg->uk_ppera; i++)
760                                vsetobj((vm_offset_t)mem + (i * PAGE_SIZE),
761                                    obj);
762                }
763#endif /* __rtems__ */
764                if (keg->uk_flags & UMA_ZONE_OFFPAGE)
765                        zone_free_item(keg->uk_slabzone, slab, NULL,
766                            SKIP_NONE, ZFREE_STATFREE);
767#ifdef UMA_DEBUG
768                printf("%s: Returning %d bytes.\n",
769                    keg->uk_name, UMA_SLAB_SIZE * keg->uk_ppera);
770#endif
771                keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera, flags);
772        }
773}
774
775static void
776zone_drain_wait(uma_zone_t zone, int waitok)
777{
778
779        /*
780         * Set draining to interlock with zone_dtor() so we can release our
781         * locks as we go.  Only dtor() should do a WAITOK call since it
782         * is the only call that knows the structure will still be available
783         * when it wakes up.
784         */
785        ZONE_LOCK(zone);
786        while (zone->uz_flags & UMA_ZFLAG_DRAINING) {
787                if (waitok == M_NOWAIT)
788                        goto out;
789                mtx_unlock(&uma_mtx);
790                msleep(zone, zone->uz_lock, PVM, "zonedrain", 1);
791                mtx_lock(&uma_mtx);
792        }
793        zone->uz_flags |= UMA_ZFLAG_DRAINING;
794        bucket_cache_drain(zone);
795        ZONE_UNLOCK(zone);
796        /*
797         * The DRAINING flag protects us from being freed while
798         * we're running.  Normally the uma_mtx would protect us but we
799         * must be able to release and acquire the right lock for each keg.
800         */
801        zone_foreach_keg(zone, &keg_drain);
802        ZONE_LOCK(zone);
803        zone->uz_flags &= ~UMA_ZFLAG_DRAINING;
804        wakeup(zone);
805out:
806        ZONE_UNLOCK(zone);
807}
808
809void
810zone_drain(uma_zone_t zone)
811{
812
813        zone_drain_wait(zone, M_NOWAIT);
814}
815
816/*
817 * Allocate a new slab for a keg.  This does not insert the slab onto a list.
818 *
819 * Arguments:
820 *      wait  Shall we wait?
821 *
822 * Returns:
823 *      The slab that was allocated or NULL if there is no memory and the
824 *      caller specified M_NOWAIT.
825 */
826static uma_slab_t
827keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int wait)
828{
829        uma_slabrefcnt_t slabref;
830        uma_alloc allocf;
831        uma_slab_t slab;
832        u_int8_t *mem;
833        u_int8_t flags;
834        int i;
835
836        mtx_assert(&keg->uk_lock, MA_OWNED);
837        slab = NULL;
838
839#ifdef UMA_DEBUG
840        printf("slab_zalloc:  Allocating a new slab for %s\n", keg->uk_name);
841#endif
842        allocf = keg->uk_allocf;
843        KEG_UNLOCK(keg);
844
845        if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
846                slab = zone_alloc_item(keg->uk_slabzone, NULL, wait);
847                if (slab == NULL) {
848                        KEG_LOCK(keg);
849                        return NULL;
850                }
851        }
852
853        /*
854         * This reproduces the old vm_zone behavior of zero filling pages the
855         * first time they are added to a zone.
856         *
857         * Malloced items are zeroed in uma_zalloc.
858         */
859
860        if ((keg->uk_flags & UMA_ZONE_MALLOC) == 0)
861                wait |= M_ZERO;
862        else
863                wait &= ~M_ZERO;
864
865        if (keg->uk_flags & UMA_ZONE_NODUMP)
866                wait |= M_NODUMP;
867
868        /* zone is passed for legacy reasons. */
869        mem = allocf(zone, keg->uk_ppera * UMA_SLAB_SIZE, &flags, wait);
870        if (mem == NULL) {
871                if (keg->uk_flags & UMA_ZONE_OFFPAGE)
872                        zone_free_item(keg->uk_slabzone, slab, NULL,
873                            SKIP_NONE, ZFREE_STATFREE);
874                KEG_LOCK(keg);
875                return (NULL);
876        }
877
878        /* Point the slab into the allocated memory */
879        if (!(keg->uk_flags & UMA_ZONE_OFFPAGE))
880                slab = (uma_slab_t )(mem + keg->uk_pgoff);
881
882        if (keg->uk_flags & UMA_ZONE_VTOSLAB)
883#ifndef __rtems__
884                for (i = 0; i < keg->uk_ppera; i++)
885                        vsetslab((vm_offset_t)mem + (i * PAGE_SIZE), slab);
886#else /* __rtems__ */
887                vsetslab((vm_offset_t)mem, slab);
888#endif /* __rtems__ */
889
890        slab->us_keg = keg;
891        slab->us_data = mem;
892        slab->us_freecount = keg->uk_ipers;
893        slab->us_firstfree = 0;
894        slab->us_flags = flags;
895
896        if (keg->uk_flags & UMA_ZONE_REFCNT) {
897                slabref = (uma_slabrefcnt_t)slab;
898                for (i = 0; i < keg->uk_ipers; i++) {
899                        slabref->us_freelist[i].us_refcnt = 0;
900                        slabref->us_freelist[i].us_item = i+1;
901                }
902        } else {
903                for (i = 0; i < keg->uk_ipers; i++)
904                        slab->us_freelist[i].us_item = i+1;
905        }
906
907        if (keg->uk_init != NULL) {
908                for (i = 0; i < keg->uk_ipers; i++)
909                        if (keg->uk_init(slab->us_data + (keg->uk_rsize * i),
910                            keg->uk_size, wait) != 0)
911                                break;
912                if (i != keg->uk_ipers) {
913                        if (keg->uk_fini != NULL) {
914                                for (i--; i > -1; i--)
915                                        keg->uk_fini(slab->us_data +
916                                            (keg->uk_rsize * i),
917                                            keg->uk_size);
918                        }
919#ifndef __rtems__
920                        if (keg->uk_flags & UMA_ZONE_VTOSLAB) {
921                                vm_object_t obj;
922
923                                if (flags & UMA_SLAB_KMEM)
924                                        obj = kmem_object;
925                                else if (flags & UMA_SLAB_KERNEL)
926                                        obj = kernel_object;
927                                else
928                                        obj = NULL;
929                                for (i = 0; i < keg->uk_ppera; i++)
930                                        vsetobj((vm_offset_t)mem +
931                                            (i * PAGE_SIZE), obj);
932                        }
933#endif /* __rtems__ */
934                        if (keg->uk_flags & UMA_ZONE_OFFPAGE)
935                                zone_free_item(keg->uk_slabzone, slab,
936                                    NULL, SKIP_NONE, ZFREE_STATFREE);
937                        keg->uk_freef(mem, UMA_SLAB_SIZE * keg->uk_ppera,
938                            flags);
939                        KEG_LOCK(keg);
940                        return (NULL);
941                }
942        }
943        KEG_LOCK(keg);
944
945        if (keg->uk_flags & UMA_ZONE_HASH)
946                UMA_HASH_INSERT(&keg->uk_hash, slab, mem);
947
948        keg->uk_pages += keg->uk_ppera;
949        keg->uk_free += keg->uk_ipers;
950
951        return (slab);
952}
953
954#ifndef __rtems__
955/*
956 * This function is intended to be used early on in place of page_alloc() so
957 * that we may use the boot time page cache to satisfy allocations before
958 * the VM is ready.
959 */
960static void *
961startup_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
962{
963        uma_keg_t keg;
964        uma_slab_t tmps;
965        int pages, check_pages;
966
967        keg = zone_first_keg(zone);
968        pages = howmany(bytes, PAGE_SIZE);
969        check_pages = pages - 1;
970        KASSERT(pages > 0, ("startup_alloc can't reserve 0 pages\n"));
971
972        /*
973         * Check our small startup cache to see if it has pages remaining.
974         */
975        mtx_lock(&uma_boot_pages_mtx);
976
977        /* First check if we have enough room. */
978        tmps = LIST_FIRST(&uma_boot_pages);
979        while (tmps != NULL && check_pages-- > 0)
980                tmps = LIST_NEXT(tmps, us_link);
981        if (tmps != NULL) {
982                /*
983                 * It's ok to lose tmps references.  The last one will
984                 * have tmps->us_data pointing to the start address of
985                 * "pages" contiguous pages of memory.
986                 */
987                while (pages-- > 0) {
988                        tmps = LIST_FIRST(&uma_boot_pages);
989                        LIST_REMOVE(tmps, us_link);
990                }
991                mtx_unlock(&uma_boot_pages_mtx);
992                *pflag = tmps->us_flags;
993                return (tmps->us_data);
994        }
995        mtx_unlock(&uma_boot_pages_mtx);
996        if (booted < UMA_STARTUP2)
997                panic("UMA: Increase vm.boot_pages");
998        /*
999         * Now that we've booted reset these users to their real allocator.
1000         */
1001#ifdef UMA_MD_SMALL_ALLOC
1002        keg->uk_allocf = (keg->uk_ppera > 1) ? page_alloc : uma_small_alloc;
1003#else
1004        keg->uk_allocf = page_alloc;
1005#endif
1006        return keg->uk_allocf(zone, bytes, pflag, wait);
1007}
1008#endif /* __rtems__ */
1009
1010/*
1011 * Allocates a number of pages from the system
1012 *
1013 * Arguments:
1014 *      bytes  The number of bytes requested
1015 *      wait  Shall we wait?
1016 *
1017 * Returns:
1018 *      A pointer to the alloced memory or possibly
1019 *      NULL if M_NOWAIT is set.
1020 */
1021static void *
1022page_alloc(uma_zone_t zone, int bytes, u_int8_t *pflag, int wait)
1023{
1024        void *p;        /* Returned page */
1025
1026        *pflag = UMA_SLAB_KMEM;
1027#ifndef __rtems__
1028        p = (void *) kmem_malloc(kmem_map, bytes, wait);
1029#else /* __rtems__ */
1030        p = rtems_bsd_chunk_alloc(&rtems_bsd_uma_chunks, bytes);
1031#endif /* __rtems__ */
1032
1033        return (p);
1034}
1035
1036#ifndef __rtems__
1037/*
1038 * Allocates a number of pages from within an object
1039 *
1040 * Arguments:
1041 *      bytes  The number of bytes requested
1042 *      wait   Shall we wait?
1043 *
1044 * Returns:
1045 *      A pointer to the alloced memory or possibly
1046 *      NULL if M_NOWAIT is set.
1047 */
1048static void *
1049obj_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
1050{
1051        vm_object_t object;
1052        vm_offset_t retkva, zkva;
1053        vm_page_t p;
1054        int pages, startpages;
1055        uma_keg_t keg;
1056
1057        keg = zone_first_keg(zone);
1058        object = keg->uk_obj;
1059        retkva = 0;
1060
1061        /*
1062         * This looks a little weird since we're getting one page at a time.
1063         */
1064        VM_OBJECT_LOCK(object);
1065        p = TAILQ_LAST(&object->memq, pglist);
1066        pages = p != NULL ? p->pindex + 1 : 0;
1067        startpages = pages;
1068        zkva = keg->uk_kva + pages * PAGE_SIZE;
1069        for (; bytes > 0; bytes -= PAGE_SIZE) {
1070                p = vm_page_alloc(object, pages,
1071                    VM_ALLOC_INTERRUPT | VM_ALLOC_WIRED);
1072                if (p == NULL) {
1073                        if (pages != startpages)
1074                                pmap_qremove(retkva, pages - startpages);
1075                        while (pages != startpages) {
1076                                pages--;
1077                                p = TAILQ_LAST(&object->memq, pglist);
1078                                vm_page_unwire(p, 0);
1079                                vm_page_free(p);
1080                        }
1081                        retkva = 0;
1082                        goto done;
1083                }
1084                pmap_qenter(zkva, &p, 1);
1085                if (retkva == 0)
1086                        retkva = zkva;
1087                zkva += PAGE_SIZE;
1088                pages += 1;
1089        }
1090done:
1091        VM_OBJECT_UNLOCK(object);
1092        *flags = UMA_SLAB_PRIV;
1093
1094        return ((void *)retkva);
1095}
1096#endif /* __rtems__ */
1097
1098/*
1099 * Frees a number of pages to the system
1100 *
1101 * Arguments:
1102 *      mem   A pointer to the memory to be freed
1103 *      size  The size of the memory being freed
1104 *      flags The original p->us_flags field
1105 *
1106 * Returns:
1107 *      Nothing
1108 */
1109static void
1110page_free(void *mem, int size, u_int8_t flags)
1111{
1112#ifndef __rtems__
1113        vm_map_t map;
1114
1115        if (flags & UMA_SLAB_KMEM)
1116                map = kmem_map;
1117        else if (flags & UMA_SLAB_KERNEL)
1118                map = kernel_map;
1119        else
1120                panic("UMA: page_free used with invalid flags %d", flags);
1121
1122        kmem_free(map, (vm_offset_t)mem, size);
1123#else /* __rtems__ */
1124        rtems_bsd_chunk_free(&rtems_bsd_uma_chunks, mem);
1125#endif /* __rtems__ */
1126}
1127
1128/*
1129 * Zero fill initializer
1130 *
1131 * Arguments/Returns follow uma_init specifications
1132 */
1133static int
1134zero_init(void *mem, int size, int flags)
1135{
1136        bzero(mem, size);
1137        return (0);
1138}
1139
1140/*
1141 * Finish creating a small uma keg.  This calculates ipers, and the keg size.
1142 *
1143 * Arguments
1144 *      keg  The zone we should initialize
1145 *
1146 * Returns
1147 *      Nothing
1148 */
1149static void
1150keg_small_init(uma_keg_t keg)
1151{
1152        u_int rsize;
1153        u_int memused;
1154        u_int wastedspace;
1155        u_int shsize;
1156
1157        KASSERT(keg != NULL, ("Keg is null in keg_small_init"));
1158        rsize = keg->uk_size;
1159
1160        if (rsize < UMA_SMALLEST_UNIT)
1161                rsize = UMA_SMALLEST_UNIT;
1162        if (rsize & keg->uk_align)
1163                rsize = (rsize & ~keg->uk_align) + (keg->uk_align + 1);
1164
1165        keg->uk_rsize = rsize;
1166        keg->uk_ppera = 1;
1167
1168        if (keg->uk_flags & UMA_ZONE_REFCNT) {
1169                rsize += UMA_FRITMREF_SZ;       /* linkage & refcnt */
1170                shsize = sizeof(struct uma_slab_refcnt);
1171        } else {
1172                rsize += UMA_FRITM_SZ;  /* Account for linkage */
1173                shsize = sizeof(struct uma_slab);
1174        }
1175
1176        keg->uk_ipers = (UMA_SLAB_SIZE - shsize) / rsize;
1177        KASSERT(keg->uk_ipers != 0, ("keg_small_init: ipers is 0"));
1178        memused = keg->uk_ipers * rsize + shsize;
1179        wastedspace = UMA_SLAB_SIZE - memused;
1180
1181        /*
1182         * We can't do OFFPAGE if we're internal or if we've been
1183         * asked to not go to the VM for buckets.  If we do this we
1184         * may end up going to the VM (kmem_map) for slabs which we
1185         * do not want to do if we're UMA_ZFLAG_CACHEONLY as a
1186         * result of UMA_ZONE_VM, which clearly forbids it.
1187         */
1188        if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) ||
1189            (keg->uk_flags & UMA_ZFLAG_CACHEONLY))
1190                return;
1191
1192        if ((wastedspace >= UMA_MAX_WASTE) &&
1193            (keg->uk_ipers < (UMA_SLAB_SIZE / keg->uk_rsize))) {
1194                keg->uk_ipers = UMA_SLAB_SIZE / keg->uk_rsize;
1195                KASSERT(keg->uk_ipers <= 255,
1196                    ("keg_small_init: keg->uk_ipers too high!"));
1197#ifdef UMA_DEBUG
1198                printf("UMA decided we need offpage slab headers for "
1199                    "keg: %s, calculated wastedspace = %d, "
1200                    "maximum wasted space allowed = %d, "
1201                    "calculated ipers = %d, "
1202                    "new wasted space = %d\n", keg->uk_name, wastedspace,
1203                    UMA_MAX_WASTE, keg->uk_ipers,
1204                    UMA_SLAB_SIZE - keg->uk_ipers * keg->uk_rsize);
1205#endif
1206                keg->uk_flags |= UMA_ZONE_OFFPAGE;
1207                if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1208                        keg->uk_flags |= UMA_ZONE_HASH;
1209        }
1210}
1211
1212/*
1213 * Finish creating a large (> UMA_SLAB_SIZE) uma kegs.  Just give in and do
1214 * OFFPAGE for now.  When I can allow for more dynamic slab sizes this will be
1215 * more complicated.
1216 *
1217 * Arguments
1218 *      keg  The keg we should initialize
1219 *
1220 * Returns
1221 *      Nothing
1222 */
1223static void
1224keg_large_init(uma_keg_t keg)
1225{
1226        int pages;
1227
1228        KASSERT(keg != NULL, ("Keg is null in keg_large_init"));
1229        KASSERT((keg->uk_flags & UMA_ZFLAG_CACHEONLY) == 0,
1230            ("keg_large_init: Cannot large-init a UMA_ZFLAG_CACHEONLY keg"));
1231
1232        pages = keg->uk_size / UMA_SLAB_SIZE;
1233
1234        /* Account for remainder */
1235        if ((pages * UMA_SLAB_SIZE) < keg->uk_size)
1236                pages++;
1237
1238        keg->uk_ppera = pages;
1239        keg->uk_ipers = 1;
1240        keg->uk_rsize = keg->uk_size;
1241
1242        /* We can't do OFFPAGE if we're internal, bail out here. */
1243        if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
1244                return;
1245
1246        keg->uk_flags |= UMA_ZONE_OFFPAGE;
1247        if ((keg->uk_flags & UMA_ZONE_VTOSLAB) == 0)
1248                keg->uk_flags |= UMA_ZONE_HASH;
1249}
1250
1251static void
1252keg_cachespread_init(uma_keg_t keg)
1253{
1254        int alignsize;
1255        int trailer;
1256        int pages;
1257        int rsize;
1258
1259        alignsize = keg->uk_align + 1;
1260        rsize = keg->uk_size;
1261        /*
1262         * We want one item to start on every align boundary in a page.  To
1263         * do this we will span pages.  We will also extend the item by the
1264         * size of align if it is an even multiple of align.  Otherwise, it
1265         * would fall on the same boundary every time.
1266         */
1267        if (rsize & keg->uk_align)
1268                rsize = (rsize & ~keg->uk_align) + alignsize;
1269        if ((rsize & alignsize) == 0)
1270                rsize += alignsize;
1271        trailer = rsize - keg->uk_size;
1272        pages = (rsize * (PAGE_SIZE / alignsize)) / PAGE_SIZE;
1273        pages = MIN(pages, (128 * 1024) / PAGE_SIZE);
1274        keg->uk_rsize = rsize;
1275        keg->uk_ppera = pages;
1276        keg->uk_ipers = ((pages * PAGE_SIZE) + trailer) / rsize;
1277        keg->uk_flags |= UMA_ZONE_OFFPAGE | UMA_ZONE_VTOSLAB;
1278        KASSERT(keg->uk_ipers <= uma_max_ipers,
1279            ("keg_small_init: keg->uk_ipers too high(%d) increase max_ipers",
1280            keg->uk_ipers));
1281}
1282
1283/*
1284 * Keg header ctor.  This initializes all fields, locks, etc.  And inserts
1285 * the keg onto the global keg list.
1286 *
1287 * Arguments/Returns follow uma_ctor specifications
1288 *      udata  Actually uma_kctor_args
1289 */
1290static int
1291keg_ctor(void *mem, int size, void *udata, int flags)
1292{
1293        struct uma_kctor_args *arg = udata;
1294        uma_keg_t keg = mem;
1295        uma_zone_t zone;
1296
1297        bzero(keg, size);
1298        keg->uk_size = arg->size;
1299        keg->uk_init = arg->uminit;
1300        keg->uk_fini = arg->fini;
1301        keg->uk_align = arg->align;
1302        keg->uk_free = 0;
1303        keg->uk_pages = 0;
1304        keg->uk_flags = arg->flags;
1305        keg->uk_allocf = page_alloc;
1306        keg->uk_freef = page_free;
1307        keg->uk_recurse = 0;
1308        keg->uk_slabzone = NULL;
1309
1310        /*
1311         * The master zone is passed to us at keg-creation time.
1312         */
1313        zone = arg->zone;
1314        keg->uk_name = zone->uz_name;
1315
1316        if (arg->flags & UMA_ZONE_VM)
1317                keg->uk_flags |= UMA_ZFLAG_CACHEONLY;
1318
1319        if (arg->flags & UMA_ZONE_ZINIT)
1320                keg->uk_init = zero_init;
1321
1322        if (arg->flags & UMA_ZONE_REFCNT || arg->flags & UMA_ZONE_MALLOC)
1323                keg->uk_flags |= UMA_ZONE_VTOSLAB;
1324
1325        /*
1326         * The +UMA_FRITM_SZ added to uk_size is to account for the
1327         * linkage that is added to the size in keg_small_init().  If
1328         * we don't account for this here then we may end up in
1329         * keg_small_init() with a calculated 'ipers' of 0.
1330         */
1331        if (keg->uk_flags & UMA_ZONE_REFCNT) {
1332                if (keg->uk_flags & UMA_ZONE_CACHESPREAD)
1333                        keg_cachespread_init(keg);
1334                else if ((keg->uk_size+UMA_FRITMREF_SZ) >
1335                    (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)))
1336                        keg_large_init(keg);
1337                else
1338                        keg_small_init(keg);
1339        } else {
1340                if (keg->uk_flags & UMA_ZONE_CACHESPREAD)
1341                        keg_cachespread_init(keg);
1342                else if ((keg->uk_size+UMA_FRITM_SZ) >
1343                    (UMA_SLAB_SIZE - sizeof(struct uma_slab)))
1344                        keg_large_init(keg);
1345                else
1346                        keg_small_init(keg);
1347        }
1348
1349        if (keg->uk_flags & UMA_ZONE_OFFPAGE) {
1350                if (keg->uk_flags & UMA_ZONE_REFCNT)
1351                        keg->uk_slabzone = slabrefzone;
1352                else
1353                        keg->uk_slabzone = slabzone;
1354        }
1355
1356        /*
1357         * If we haven't booted yet we need allocations to go through the
1358         * startup cache until the vm is ready.
1359         */
1360        if (keg->uk_ppera == 1) {
1361#ifdef UMA_MD_SMALL_ALLOC
1362                keg->uk_allocf = uma_small_alloc;
1363                keg->uk_freef = uma_small_free;
1364
1365#ifndef __rtems__
1366                if (booted < UMA_STARTUP)
1367                        keg->uk_allocf = startup_alloc;
1368#endif /* __rtems__ */
1369#else
1370#ifndef __rtems__
1371                if (booted < UMA_STARTUP2)
1372                        keg->uk_allocf = startup_alloc;
1373#endif /* __rtems__ */
1374#endif
1375#ifndef __rtems__
1376        } else if (booted < UMA_STARTUP2 &&
1377            (keg->uk_flags & UMA_ZFLAG_INTERNAL))
1378                keg->uk_allocf = startup_alloc;
1379#else /* __rtems__ */
1380        }
1381#endif /* __rtems__ */
1382
1383        /*
1384         * Initialize keg's lock (shared among zones).
1385         */
1386        if (arg->flags & UMA_ZONE_MTXCLASS)
1387                KEG_LOCK_INIT(keg, 1);
1388        else
1389                KEG_LOCK_INIT(keg, 0);
1390
1391        /*
1392         * If we're putting the slab header in the actual page we need to
1393         * figure out where in each page it goes.  This calculates a right
1394         * justified offset into the memory on an ALIGN_PTR boundary.
1395         */
1396        if (!(keg->uk_flags & UMA_ZONE_OFFPAGE)) {
1397                u_int totsize;
1398
1399                /* Size of the slab struct and free list */
1400                if (keg->uk_flags & UMA_ZONE_REFCNT)
1401                        totsize = sizeof(struct uma_slab_refcnt) +
1402                            keg->uk_ipers * UMA_FRITMREF_SZ;
1403                else
1404                        totsize = sizeof(struct uma_slab) +
1405                            keg->uk_ipers * UMA_FRITM_SZ;
1406
1407                if (totsize & UMA_ALIGN_PTR)
1408                        totsize = (totsize & ~UMA_ALIGN_PTR) +
1409                            (UMA_ALIGN_PTR + 1);
1410                keg->uk_pgoff = (UMA_SLAB_SIZE * keg->uk_ppera) - totsize;
1411
1412                if (keg->uk_flags & UMA_ZONE_REFCNT)
1413                        totsize = keg->uk_pgoff + sizeof(struct uma_slab_refcnt)
1414                            + keg->uk_ipers * UMA_FRITMREF_SZ;
1415                else
1416                        totsize = keg->uk_pgoff + sizeof(struct uma_slab)
1417                            + keg->uk_ipers * UMA_FRITM_SZ;
1418
1419                /*
1420                 * The only way the following is possible is if with our
1421                 * UMA_ALIGN_PTR adjustments we are now bigger than
1422                 * UMA_SLAB_SIZE.  I haven't checked whether this is
1423                 * mathematically possible for all cases, so we make
1424                 * sure here anyway.
1425                 */
1426                if (totsize > UMA_SLAB_SIZE * keg->uk_ppera) {
1427                        printf("zone %s ipers %d rsize %d size %d\n",
1428                            zone->uz_name, keg->uk_ipers, keg->uk_rsize,
1429                            keg->uk_size);
1430                        panic("UMA slab won't fit.");
1431                }
1432        }
1433
1434        if (keg->uk_flags & UMA_ZONE_HASH)
1435                hash_alloc(&keg->uk_hash);
1436
1437#ifdef UMA_DEBUG
1438        printf("UMA: %s(%p) size %d(%d) flags %#x ipers %d ppera %d out %d free %d\n",
1439            zone->uz_name, zone, keg->uk_size, keg->uk_rsize, keg->uk_flags,
1440            keg->uk_ipers, keg->uk_ppera,
1441            (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free);
1442#endif
1443
1444        LIST_INSERT_HEAD(&keg->uk_zones, zone, uz_link);
1445
1446        mtx_lock(&uma_mtx);
1447        LIST_INSERT_HEAD(&uma_kegs, keg, uk_link);
1448        mtx_unlock(&uma_mtx);
1449        return (0);
1450}
1451
1452/*
1453 * Zone header ctor.  This initializes all fields, locks, etc.
1454 *
1455 * Arguments/Returns follow uma_ctor specifications
1456 *      udata  Actually uma_zctor_args
1457 */
1458static int
1459zone_ctor(void *mem, int size, void *udata, int flags)
1460{
1461        struct uma_zctor_args *arg = udata;
1462        uma_zone_t zone = mem;
1463        uma_zone_t z;
1464        uma_keg_t keg;
1465
1466        bzero(zone, size);
1467        zone->uz_name = arg->name;
1468        zone->uz_ctor = arg->ctor;
1469        zone->uz_dtor = arg->dtor;
1470        zone->uz_slab = zone_fetch_slab;
1471        zone->uz_init = NULL;
1472        zone->uz_fini = NULL;
1473        zone->uz_allocs = 0;
1474        zone->uz_frees = 0;
1475        zone->uz_fails = 0;
1476        zone->uz_sleeps = 0;
1477        zone->uz_fills = zone->uz_count = 0;
1478        zone->uz_flags = 0;
1479        keg = arg->keg;
1480
1481        if (arg->flags & UMA_ZONE_SECONDARY) {
1482                KASSERT(arg->keg != NULL, ("Secondary zone on zero'd keg"));
1483                zone->uz_init = arg->uminit;
1484                zone->uz_fini = arg->fini;
1485                zone->uz_lock = &keg->uk_lock;
1486                zone->uz_flags |= UMA_ZONE_SECONDARY;
1487                mtx_lock(&uma_mtx);
1488                ZONE_LOCK(zone);
1489                LIST_FOREACH(z, &keg->uk_zones, uz_link) {
1490                        if (LIST_NEXT(z, uz_link) == NULL) {
1491                                LIST_INSERT_AFTER(z, zone, uz_link);
1492                                break;
1493                        }
1494                }
1495                ZONE_UNLOCK(zone);
1496                mtx_unlock(&uma_mtx);
1497        } else if (keg == NULL) {
1498                if ((keg = uma_kcreate(zone, arg->size, arg->uminit, arg->fini,
1499                    arg->align, arg->flags)) == NULL)
1500                        return (ENOMEM);
1501        } else {
1502                struct uma_kctor_args karg;
1503                int error;
1504
1505                /* We should only be here from uma_startup() */
1506                karg.size = arg->size;
1507                karg.uminit = arg->uminit;
1508                karg.fini = arg->fini;
1509                karg.align = arg->align;
1510                karg.flags = arg->flags;
1511                karg.zone = zone;
1512                error = keg_ctor(arg->keg, sizeof(struct uma_keg), &karg,
1513                    flags);
1514                if (error)
1515                        return (error);
1516        }
1517        /*
1518         * Link in the first keg.
1519         */
1520        zone->uz_klink.kl_keg = keg;
1521        LIST_INSERT_HEAD(&zone->uz_kegs, &zone->uz_klink, kl_link);
1522        zone->uz_lock = &keg->uk_lock;
1523        zone->uz_size = keg->uk_size;
1524        zone->uz_flags |= (keg->uk_flags &
1525            (UMA_ZONE_INHERIT | UMA_ZFLAG_INHERIT));
1526
1527        /*
1528         * Some internal zones don't have room allocated for the per cpu
1529         * caches.  If we're internal, bail out here.
1530         */
1531        if (keg->uk_flags & UMA_ZFLAG_INTERNAL) {
1532                KASSERT((zone->uz_flags & UMA_ZONE_SECONDARY) == 0,
1533                    ("Secondary zone requested UMA_ZFLAG_INTERNAL"));
1534                return (0);
1535        }
1536
1537        if (keg->uk_flags & UMA_ZONE_MAXBUCKET)
1538                zone->uz_count = BUCKET_MAX;
1539        else if (keg->uk_ipers <= BUCKET_MAX)
1540                zone->uz_count = keg->uk_ipers;
1541        else
1542                zone->uz_count = BUCKET_MAX;
1543        return (0);
1544}
1545
1546/*
1547 * Keg header dtor.  This frees all data, destroys locks, frees the hash
1548 * table and removes the keg from the global list.
1549 *
1550 * Arguments/Returns follow uma_dtor specifications
1551 *      udata  unused
1552 */
1553static void
1554keg_dtor(void *arg, int size, void *udata)
1555{
1556        uma_keg_t keg;
1557
1558        keg = (uma_keg_t)arg;
1559        KEG_LOCK(keg);
1560        if (keg->uk_free != 0) {
1561                printf("Freed UMA keg was not empty (%d items). "
1562                    " Lost %d pages of memory.\n",
1563                    keg->uk_free, keg->uk_pages);
1564        }
1565        KEG_UNLOCK(keg);
1566
1567        hash_free(&keg->uk_hash);
1568
1569        KEG_LOCK_FINI(keg);
1570}
1571
1572/*
1573 * Zone header dtor.
1574 *
1575 * Arguments/Returns follow uma_dtor specifications
1576 *      udata  unused
1577 */
1578static void
1579zone_dtor(void *arg, int size, void *udata)
1580{
1581        uma_klink_t klink;
1582        uma_zone_t zone;
1583        uma_keg_t keg;
1584
1585        zone = (uma_zone_t)arg;
1586        keg = zone_first_keg(zone);
1587
1588        if (!(zone->uz_flags & UMA_ZFLAG_INTERNAL))
1589                cache_drain(zone);
1590
1591        mtx_lock(&uma_mtx);
1592        LIST_REMOVE(zone, uz_link);
1593        mtx_unlock(&uma_mtx);
1594        /*
1595         * XXX there are some races here where
1596         * the zone can be drained but zone lock
1597         * released and then refilled before we
1598         * remove it... we dont care for now
1599         */
1600        zone_drain_wait(zone, M_WAITOK);
1601        /*
1602         * Unlink all of our kegs.
1603         */
1604        while ((klink = LIST_FIRST(&zone->uz_kegs)) != NULL) {
1605                klink->kl_keg = NULL;
1606                LIST_REMOVE(klink, kl_link);
1607                if (klink == &zone->uz_klink)
1608                        continue;
1609                free(klink, M_TEMP);
1610        }
1611        /*
1612         * We only destroy kegs from non secondary zones.
1613         */
1614        if ((zone->uz_flags & UMA_ZONE_SECONDARY) == 0)  {
1615                mtx_lock(&uma_mtx);
1616                LIST_REMOVE(keg, uk_link);
1617                mtx_unlock(&uma_mtx);
1618                zone_free_item(kegs, keg, NULL, SKIP_NONE,
1619                    ZFREE_STATFREE);
1620        }
1621}
1622
1623/*
1624 * Traverses every zone in the system and calls a callback
1625 *
1626 * Arguments:
1627 *      zfunc  A pointer to a function which accepts a zone
1628 *              as an argument.
1629 *
1630 * Returns:
1631 *      Nothing
1632 */
1633static void
1634zone_foreach(void (*zfunc)(uma_zone_t))
1635{
1636        uma_keg_t keg;
1637        uma_zone_t zone;
1638
1639        mtx_lock(&uma_mtx);
1640        LIST_FOREACH(keg, &uma_kegs, uk_link) {
1641                LIST_FOREACH(zone, &keg->uk_zones, uz_link)
1642                        zfunc(zone);
1643        }
1644        mtx_unlock(&uma_mtx);
1645}
1646
1647#ifdef __rtems__
1648static void
1649rtems_bsd_uma_chunk_info_ctor(rtems_bsd_chunk_control *self,
1650    rtems_bsd_chunk_info *info)
1651{
1652        rtems_bsd_uma_chunk_info *uci = (rtems_bsd_uma_chunk_info *) info;
1653
1654        uci->slab = NULL;
1655}
1656#endif /* __rtems__ */
1657/* Public functions */
1658/* See uma.h */
1659void
1660uma_startup(void *bootmem, int boot_pages)
1661{
1662        struct uma_zctor_args args;
1663#ifndef __rtems__
1664        uma_slab_t slab;
1665#endif /* __rtems__ */
1666        u_int slabsize;
1667        u_int objsize, totsize, wsize;
1668#ifndef __rtems__
1669        int i;
1670#endif /* __rtems__ */
1671
1672#ifdef UMA_DEBUG
1673        printf("Creating uma keg headers zone and keg.\n");
1674#endif
1675#ifdef __rtems__
1676        rtems_bsd_chunk_init(&rtems_bsd_uma_chunks,
1677            sizeof(rtems_bsd_uma_chunk_info), rtems_bsd_uma_chunk_info_ctor,
1678            rtems_bsd_chunk_info_dtor_default);
1679#endif /* __rtems__ */
1680        mtx_init(&uma_mtx, "UMA lock", NULL, MTX_DEF);
1681
1682        /*
1683         * Figure out the maximum number of items-per-slab we'll have if
1684         * we're using the OFFPAGE slab header to track free items, given
1685         * all possible object sizes and the maximum desired wastage
1686         * (UMA_MAX_WASTE).
1687         *
1688         * We iterate until we find an object size for
1689         * which the calculated wastage in keg_small_init() will be
1690         * enough to warrant OFFPAGE.  Since wastedspace versus objsize
1691         * is an overall increasing see-saw function, we find the smallest
1692         * objsize such that the wastage is always acceptable for objects
1693         * with that objsize or smaller.  Since a smaller objsize always
1694         * generates a larger possible uma_max_ipers, we use this computed
1695         * objsize to calculate the largest ipers possible.  Since the
1696         * ipers calculated for OFFPAGE slab headers is always larger than
1697         * the ipers initially calculated in keg_small_init(), we use
1698         * the former's equation (UMA_SLAB_SIZE / keg->uk_rsize) to
1699         * obtain the maximum ipers possible for offpage slab headers.
1700         *
1701         * It should be noted that ipers versus objsize is an inversly
1702         * proportional function which drops off rather quickly so as
1703         * long as our UMA_MAX_WASTE is such that the objsize we calculate
1704         * falls into the portion of the inverse relation AFTER the steep
1705         * falloff, then uma_max_ipers shouldn't be too high (~10 on i386).
1706         *
1707         * Note that we have 8-bits (1 byte) to use as a freelist index
1708         * inside the actual slab header itself and this is enough to
1709         * accomodate us.  In the worst case, a UMA_SMALLEST_UNIT sized
1710         * object with offpage slab header would have ipers =
1711         * UMA_SLAB_SIZE / UMA_SMALLEST_UNIT (currently = 256), which is
1712         * 1 greater than what our byte-integer freelist index can
1713         * accomodate, but we know that this situation never occurs as
1714         * for UMA_SMALLEST_UNIT-sized objects, we will never calculate
1715         * that we need to go to offpage slab headers.  Or, if we do,
1716         * then we trap that condition below and panic in the INVARIANTS case.
1717         */
1718        wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab) - UMA_MAX_WASTE;
1719        totsize = wsize;
1720        objsize = UMA_SMALLEST_UNIT;
1721        while (totsize >= wsize) {
1722                totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab)) /
1723                    (objsize + UMA_FRITM_SZ);
1724                totsize *= (UMA_FRITM_SZ + objsize);
1725                objsize++;
1726        }
1727        if (objsize > UMA_SMALLEST_UNIT)
1728                objsize--;
1729        uma_max_ipers = MAX(UMA_SLAB_SIZE / objsize, 64);
1730
1731        wsize = UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt) - UMA_MAX_WASTE;
1732        totsize = wsize;
1733        objsize = UMA_SMALLEST_UNIT;
1734        while (totsize >= wsize) {
1735                totsize = (UMA_SLAB_SIZE - sizeof(struct uma_slab_refcnt)) /
1736                    (objsize + UMA_FRITMREF_SZ);
1737                totsize *= (UMA_FRITMREF_SZ + objsize);
1738                objsize++;
1739        }
1740        if (objsize > UMA_SMALLEST_UNIT)
1741                objsize--;
1742        uma_max_ipers_ref = MAX(UMA_SLAB_SIZE / objsize, 64);
1743
1744        KASSERT((uma_max_ipers_ref <= 255) && (uma_max_ipers <= 255),
1745            ("uma_startup: calculated uma_max_ipers values too large!"));
1746
1747#ifdef UMA_DEBUG
1748        printf("Calculated uma_max_ipers (for OFFPAGE) is %d\n", uma_max_ipers);
1749        printf("Calculated uma_max_ipers_slab (for OFFPAGE) is %d\n",
1750            uma_max_ipers_ref);
1751#endif
1752
1753        /* "manually" create the initial zone */
1754        args.name = "UMA Kegs";
1755        args.size = sizeof(struct uma_keg);
1756        args.ctor = keg_ctor;
1757        args.dtor = keg_dtor;
1758        args.uminit = zero_init;
1759        args.fini = NULL;
1760        args.keg = &masterkeg;
1761        args.align = 32 - 1;
1762        args.flags = UMA_ZFLAG_INTERNAL;
1763        /* The initial zone has no Per cpu queues so it's smaller */
1764        zone_ctor(kegs, sizeof(struct uma_zone), &args, M_WAITOK);
1765
1766#ifndef __rtems__
1767#ifdef UMA_DEBUG
1768        printf("Filling boot free list.\n");
1769#endif
1770        for (i = 0; i < boot_pages; i++) {
1771                slab = (uma_slab_t)((u_int8_t *)bootmem + (i * UMA_SLAB_SIZE));
1772                slab->us_data = (u_int8_t *)slab;
1773                slab->us_flags = UMA_SLAB_BOOT;
1774                LIST_INSERT_HEAD(&uma_boot_pages, slab, us_link);
1775        }
1776        mtx_init(&uma_boot_pages_mtx, "UMA boot pages", NULL, MTX_DEF);
1777#endif /* __rtems__ */
1778
1779#ifdef UMA_DEBUG
1780        printf("Creating uma zone headers zone and keg.\n");
1781#endif
1782        args.name = "UMA Zones";
1783        args.size = sizeof(struct uma_zone) +
1784            (sizeof(struct uma_cache) * (mp_maxid + 1));
1785        args.ctor = zone_ctor;
1786        args.dtor = zone_dtor;
1787        args.uminit = zero_init;
1788        args.fini = NULL;
1789        args.keg = NULL;
1790        args.align = 32 - 1;
1791        args.flags = UMA_ZFLAG_INTERNAL;
1792        /* The initial zone has no Per cpu queues so it's smaller */
1793        zone_ctor(zones, sizeof(struct uma_zone), &args, M_WAITOK);
1794
1795#ifdef UMA_DEBUG
1796        printf("Initializing pcpu cache locks.\n");
1797#endif
1798#ifdef UMA_DEBUG
1799        printf("Creating slab and hash zones.\n");
1800#endif
1801
1802        /*
1803         * This is the max number of free list items we'll have with
1804         * offpage slabs.
1805         */
1806        slabsize = uma_max_ipers * UMA_FRITM_SZ;
1807        slabsize += sizeof(struct uma_slab);
1808
1809        /* Now make a zone for slab headers */
1810        slabzone = uma_zcreate("UMA Slabs",
1811                                slabsize,
1812                                NULL, NULL, NULL, NULL,
1813                                UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1814
1815        /*
1816         * We also create a zone for the bigger slabs with reference
1817         * counts in them, to accomodate UMA_ZONE_REFCNT zones.
1818         */
1819        slabsize = uma_max_ipers_ref * UMA_FRITMREF_SZ;
1820        slabsize += sizeof(struct uma_slab_refcnt);
1821        slabrefzone = uma_zcreate("UMA RCntSlabs",
1822                                  slabsize,
1823                                  NULL, NULL, NULL, NULL,
1824                                  UMA_ALIGN_PTR,
1825                                  UMA_ZFLAG_INTERNAL);
1826
1827        hashzone = uma_zcreate("UMA Hash",
1828            sizeof(struct slabhead *) * UMA_HASH_SIZE_INIT,
1829            NULL, NULL, NULL, NULL,
1830            UMA_ALIGN_PTR, UMA_ZFLAG_INTERNAL);
1831
1832        bucket_init();
1833
1834#ifndef __rtems__
1835        booted = UMA_STARTUP;
1836#endif /* __rtems__ */
1837
1838#ifdef UMA_DEBUG
1839        printf("UMA startup complete.\n");
1840#endif
1841}
1842#ifdef __rtems__
1843static void
1844rtems_bsd_uma_startup(void *unused)
1845{
1846        (void) unused;
1847
1848        uma_startup(NULL, 0);
1849}
1850
1851SYSINIT(rtems_bsd_uma_startup, SI_SUB_VM, SI_ORDER_FIRST,
1852    rtems_bsd_uma_startup, NULL);
1853#endif /* __rtems__ */
1854
1855#ifndef __rtems__
1856/* see uma.h */
1857void
1858uma_startup2(void)
1859{
1860        booted = UMA_STARTUP2;
1861        bucket_enable();
1862#ifdef UMA_DEBUG
1863        printf("UMA startup2 complete.\n");
1864#endif
1865}
1866#endif /* __rtems__ */
1867
1868/*
1869 * Initialize our callout handle
1870 *
1871 */
1872
1873static void
1874uma_startup3(void)
1875{
1876#ifdef UMA_DEBUG
1877        printf("Starting callout.\n");
1878#endif
1879        callout_init(&uma_callout, CALLOUT_MPSAFE);
1880        callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
1881#ifdef UMA_DEBUG
1882        printf("UMA startup3 complete.\n");
1883#endif
1884}
1885
1886static uma_keg_t
1887uma_kcreate(uma_zone_t zone, size_t size, uma_init uminit, uma_fini fini,
1888                int align, u_int32_t flags)
1889{
1890        struct uma_kctor_args args;
1891
1892        args.size = size;
1893        args.uminit = uminit;
1894        args.fini = fini;
1895        args.align = (align == UMA_ALIGN_CACHE) ? uma_align_cache : align;
1896        args.flags = flags;
1897        args.zone = zone;
1898        return (zone_alloc_item(kegs, &args, M_WAITOK));
1899}
1900
1901/* See uma.h */
1902void
1903uma_set_align(int align)
1904{
1905
1906        if (align != UMA_ALIGN_CACHE)
1907                uma_align_cache = align;
1908}
1909
1910/* See uma.h */
1911uma_zone_t
1912uma_zcreate(const char *name, size_t size, uma_ctor ctor, uma_dtor dtor,
1913                uma_init uminit, uma_fini fini, int align, u_int32_t flags)
1914
1915{
1916        struct uma_zctor_args args;
1917
1918        /* This stuff is essential for the zone ctor */
1919        args.name = name;
1920        args.size = size;
1921        args.ctor = ctor;
1922        args.dtor = dtor;
1923        args.uminit = uminit;
1924        args.fini = fini;
1925        args.align = align;
1926        args.flags = flags;
1927        args.keg = NULL;
1928
1929        return (zone_alloc_item(zones, &args, M_WAITOK));
1930}
1931
1932/* See uma.h */
1933uma_zone_t
1934uma_zsecond_create(char *name, uma_ctor ctor, uma_dtor dtor,
1935                    uma_init zinit, uma_fini zfini, uma_zone_t master)
1936{
1937        struct uma_zctor_args args;
1938        uma_keg_t keg;
1939
1940        keg = zone_first_keg(master);
1941        args.name = name;
1942        args.size = keg->uk_size;
1943        args.ctor = ctor;
1944        args.dtor = dtor;
1945        args.uminit = zinit;
1946        args.fini = zfini;
1947        args.align = keg->uk_align;
1948        args.flags = keg->uk_flags | UMA_ZONE_SECONDARY;
1949        args.keg = keg;
1950
1951        /* XXX Attaches only one keg of potentially many. */
1952        return (zone_alloc_item(zones, &args, M_WAITOK));
1953}
1954
1955#ifndef __rtems__
1956static void
1957zone_lock_pair(uma_zone_t a, uma_zone_t b)
1958{
1959        if (a < b) {
1960                ZONE_LOCK(a);
1961                mtx_lock_flags(b->uz_lock, MTX_DUPOK);
1962        } else {
1963                ZONE_LOCK(b);
1964                mtx_lock_flags(a->uz_lock, MTX_DUPOK);
1965        }
1966}
1967
1968static void
1969zone_unlock_pair(uma_zone_t a, uma_zone_t b)
1970{
1971
1972        ZONE_UNLOCK(a);
1973        ZONE_UNLOCK(b);
1974}
1975
1976int
1977uma_zsecond_add(uma_zone_t zone, uma_zone_t master)
1978{
1979        uma_klink_t klink;
1980        uma_klink_t kl;
1981        int error;
1982
1983        error = 0;
1984        klink = malloc(sizeof(*klink), M_TEMP, M_WAITOK | M_ZERO);
1985
1986        zone_lock_pair(zone, master);
1987        /*
1988         * zone must use vtoslab() to resolve objects and must already be
1989         * a secondary.
1990         */
1991        if ((zone->uz_flags & (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY))
1992            != (UMA_ZONE_VTOSLAB | UMA_ZONE_SECONDARY)) {
1993                error = EINVAL;
1994                goto out;
1995        }
1996        /*
1997         * The new master must also use vtoslab().
1998         */
1999        if ((zone->uz_flags & UMA_ZONE_VTOSLAB) != UMA_ZONE_VTOSLAB) {
2000                error = EINVAL;
2001                goto out;
2002        }
2003        /*
2004         * Both must either be refcnt, or not be refcnt.
2005         */
2006        if ((zone->uz_flags & UMA_ZONE_REFCNT) !=
2007            (master->uz_flags & UMA_ZONE_REFCNT)) {
2008                error = EINVAL;
2009                goto out;
2010        }
2011        /*
2012         * The underlying object must be the same size.  rsize
2013         * may be different.
2014         */
2015        if (master->uz_size != zone->uz_size) {
2016                error = E2BIG;
2017                goto out;
2018        }
2019        /*
2020         * Put it at the end of the list.
2021         */
2022        klink->kl_keg = zone_first_keg(master);
2023        LIST_FOREACH(kl, &zone->uz_kegs, kl_link) {
2024                if (LIST_NEXT(kl, kl_link) == NULL) {
2025                        LIST_INSERT_AFTER(kl, klink, kl_link);
2026                        break;
2027                }
2028        }
2029        klink = NULL;
2030        zone->uz_flags |= UMA_ZFLAG_MULTI;
2031        zone->uz_slab = zone_fetch_slab_multi;
2032
2033out:
2034        zone_unlock_pair(zone, master);
2035        if (klink != NULL)
2036                free(klink, M_TEMP);
2037
2038        return (error);
2039}
2040#endif /* __rtems__ */
2041
2042
2043/* See uma.h */
2044void
2045uma_zdestroy(uma_zone_t zone)
2046{
2047
2048        zone_free_item(zones, zone, NULL, SKIP_NONE, ZFREE_STATFREE);
2049}
2050
2051/* See uma.h */
2052void *
2053uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
2054{
2055        void *item;
2056        uma_cache_t cache;
2057        uma_bucket_t bucket;
2058        int cpu;
2059
2060        /* This is the fast path allocation */
2061#ifdef UMA_DEBUG_ALLOC_1
2062        printf("Allocating one item from %s(%p)\n", zone->uz_name, zone);
2063#endif
2064        CTR3(KTR_UMA, "uma_zalloc_arg thread %x zone %s flags %d", curthread,
2065            zone->uz_name, flags);
2066
2067        if (flags & M_WAITOK) {
2068                WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
2069                    "uma_zalloc_arg: zone \"%s\"", zone->uz_name);
2070        }
2071
2072        /*
2073         * If possible, allocate from the per-CPU cache.  There are two
2074         * requirements for safe access to the per-CPU cache: (1) the thread
2075         * accessing the cache must not be preempted or yield during access,
2076         * and (2) the thread must not migrate CPUs without switching which
2077         * cache it accesses.  We rely on a critical section to prevent
2078         * preemption and migration.  We release the critical section in
2079         * order to acquire the zone mutex if we are unable to allocate from
2080         * the current cache; when we re-acquire the critical section, we
2081         * must detect and handle migration if it has occurred.
2082         */
2083zalloc_restart:
2084        critical_enter();
2085        cpu = curcpu;
2086        cache = &zone->uz_cpu[cpu];
2087
2088zalloc_start:
2089        bucket = cache->uc_allocbucket;
2090
2091        if (bucket) {
2092                if (bucket->ub_cnt > 0) {
2093                        bucket->ub_cnt--;
2094                        item = bucket->ub_bucket[bucket->ub_cnt];
2095#ifdef INVARIANTS
2096                        bucket->ub_bucket[bucket->ub_cnt] = NULL;
2097#endif
2098                        KASSERT(item != NULL,
2099                            ("uma_zalloc: Bucket pointer mangled."));
2100                        cache->uc_allocs++;
2101                        critical_exit();
2102#ifdef INVARIANTS
2103                        ZONE_LOCK(zone);
2104                        uma_dbg_alloc(zone, NULL, item);
2105                        ZONE_UNLOCK(zone);
2106#endif
2107                        if (zone->uz_ctor != NULL) {
2108                                if (zone->uz_ctor(item, zone->uz_size,
2109                                    udata, flags) != 0) {
2110                                        zone_free_item(zone, item, udata,
2111                                            SKIP_DTOR, ZFREE_STATFAIL |
2112                                            ZFREE_STATFREE);
2113                                        return (NULL);
2114                                }
2115                        }
2116                        if (flags & M_ZERO)
2117                                bzero(item, zone->uz_size);
2118                        return (item);
2119                } else if (cache->uc_freebucket) {
2120                        /*
2121                         * We have run out of items in our allocbucket.
2122                         * See if we can switch with our free bucket.
2123                         */
2124                        if (cache->uc_freebucket->ub_cnt > 0) {
2125#ifdef UMA_DEBUG_ALLOC
2126                                printf("uma_zalloc: Swapping empty with"
2127                                    " alloc.\n");
2128#endif
2129                                bucket = cache->uc_freebucket;
2130                                cache->uc_freebucket = cache->uc_allocbucket;
2131                                cache->uc_allocbucket = bucket;
2132
2133                                goto zalloc_start;
2134                        }
2135                }
2136        }
2137        /*
2138         * Attempt to retrieve the item from the per-CPU cache has failed, so
2139         * we must go back to the zone.  This requires the zone lock, so we
2140         * must drop the critical section, then re-acquire it when we go back
2141         * to the cache.  Since the critical section is released, we may be
2142         * preempted or migrate.  As such, make sure not to maintain any
2143         * thread-local state specific to the cache from prior to releasing
2144         * the critical section.
2145         */
2146        critical_exit();
2147        ZONE_LOCK(zone);
2148        critical_enter();
2149        cpu = curcpu;
2150        cache = &zone->uz_cpu[cpu];
2151        bucket = cache->uc_allocbucket;
2152        if (bucket != NULL) {
2153                if (bucket->ub_cnt > 0) {
2154                        ZONE_UNLOCK(zone);
2155                        goto zalloc_start;
2156                }
2157                bucket = cache->uc_freebucket;
2158                if (bucket != NULL && bucket->ub_cnt > 0) {
2159                        ZONE_UNLOCK(zone);
2160                        goto zalloc_start;
2161                }
2162        }
2163
2164        /* Since we have locked the zone we may as well send back our stats */
2165        zone->uz_allocs += cache->uc_allocs;
2166        cache->uc_allocs = 0;
2167        zone->uz_frees += cache->uc_frees;
2168        cache->uc_frees = 0;
2169
2170        /* Our old one is now a free bucket */
2171        if (cache->uc_allocbucket) {
2172                KASSERT(cache->uc_allocbucket->ub_cnt == 0,
2173                    ("uma_zalloc_arg: Freeing a non free bucket."));
2174                LIST_INSERT_HEAD(&zone->uz_free_bucket,
2175                    cache->uc_allocbucket, ub_link);
2176                cache->uc_allocbucket = NULL;
2177        }
2178
2179        /* Check the free list for a new alloc bucket */
2180        if ((bucket = LIST_FIRST(&zone->uz_full_bucket)) != NULL) {
2181                KASSERT(bucket->ub_cnt != 0,
2182                    ("uma_zalloc_arg: Returning an empty bucket."));
2183
2184                LIST_REMOVE(bucket, ub_link);
2185                cache->uc_allocbucket = bucket;
2186                ZONE_UNLOCK(zone);
2187                goto zalloc_start;
2188        }
2189        /* We are no longer associated with this CPU. */
2190        critical_exit();
2191
2192        /* Bump up our uz_count so we get here less */
2193        if (zone->uz_count < BUCKET_MAX)
2194                zone->uz_count++;
2195
2196        /*
2197         * Now lets just fill a bucket and put it on the free list.  If that
2198         * works we'll restart the allocation from the begining.
2199         */
2200        if (zone_alloc_bucket(zone, flags)) {
2201                ZONE_UNLOCK(zone);
2202                goto zalloc_restart;
2203        }
2204        ZONE_UNLOCK(zone);
2205        /*
2206         * We may not be able to get a bucket so return an actual item.
2207         */
2208#ifdef UMA_DEBUG
2209        printf("uma_zalloc_arg: Bucketzone returned NULL\n");
2210#endif
2211
2212        item = zone_alloc_item(zone, udata, flags);
2213        return (item);
2214}
2215
2216static uma_slab_t
2217keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int flags)
2218{
2219        uma_slab_t slab;
2220
2221        mtx_assert(&keg->uk_lock, MA_OWNED);
2222        slab = NULL;
2223
2224        for (;;) {
2225                /*
2226                 * Find a slab with some space.  Prefer slabs that are partially
2227                 * used over those that are totally full.  This helps to reduce
2228                 * fragmentation.
2229                 */
2230                if (keg->uk_free != 0) {
2231                        if (!LIST_EMPTY(&keg->uk_part_slab)) {
2232                                slab = LIST_FIRST(&keg->uk_part_slab);
2233                        } else {
2234                                slab = LIST_FIRST(&keg->uk_free_slab);
2235                                LIST_REMOVE(slab, us_link);
2236                                LIST_INSERT_HEAD(&keg->uk_part_slab, slab,
2237                                    us_link);
2238                        }
2239                        MPASS(slab->us_keg == keg);
2240                        return (slab);
2241                }
2242
2243                /*
2244                 * M_NOVM means don't ask at all!
2245                 */
2246                if (flags & M_NOVM)
2247                        break;
2248
2249                if (keg->uk_maxpages && keg->uk_pages >= keg->uk_maxpages) {
2250                        keg->uk_flags |= UMA_ZFLAG_FULL;
2251                        /*
2252                         * If this is not a multi-zone, set the FULL bit.
2253                         * Otherwise slab_multi() takes care of it.
2254                         */
2255                        if ((zone->uz_flags & UMA_ZFLAG_MULTI) == 0)
2256                                zone->uz_flags |= UMA_ZFLAG_FULL;
2257                        if (flags & M_NOWAIT)
2258                                break;
2259                        zone->uz_sleeps++;
2260                        msleep(keg, &keg->uk_lock, PVM, "keglimit", 0);
2261                        continue;
2262                }
2263                keg->uk_recurse++;
2264                slab = keg_alloc_slab(keg, zone, flags);
2265                keg->uk_recurse--;
2266                /*
2267                 * If we got a slab here it's safe to mark it partially used
2268                 * and return.  We assume that the caller is going to remove
2269                 * at least one item.
2270                 */
2271                if (slab) {
2272                        MPASS(slab->us_keg == keg);
2273                        LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2274                        return (slab);
2275                }
2276                /*
2277                 * We might not have been able to get a slab but another cpu
2278                 * could have while we were unlocked.  Check again before we
2279                 * fail.
2280                 */
2281                flags |= M_NOVM;
2282        }
2283        return (slab);
2284}
2285
2286static inline void
2287zone_relock(uma_zone_t zone, uma_keg_t keg)
2288{
2289        if (zone->uz_lock != &keg->uk_lock) {
2290                KEG_UNLOCK(keg);
2291                ZONE_LOCK(zone);
2292        }
2293}
2294
2295static inline void
2296keg_relock(uma_keg_t keg, uma_zone_t zone)
2297{
2298        if (zone->uz_lock != &keg->uk_lock) {
2299                ZONE_UNLOCK(zone);
2300                KEG_LOCK(keg);
2301        }
2302}
2303
2304static uma_slab_t
2305zone_fetch_slab(uma_zone_t zone, uma_keg_t keg, int flags)
2306{
2307        uma_slab_t slab;
2308
2309        if (keg == NULL)
2310                keg = zone_first_keg(zone);
2311        /*
2312         * This is to prevent us from recursively trying to allocate
2313         * buckets.  The problem is that if an allocation forces us to
2314         * grab a new bucket we will call page_alloc, which will go off
2315         * and cause the vm to allocate vm_map_entries.  If we need new
2316         * buckets there too we will recurse in kmem_alloc and bad
2317         * things happen.  So instead we return a NULL bucket, and make
2318         * the code that allocates buckets smart enough to deal with it
2319         */
2320        if (keg->uk_flags & UMA_ZFLAG_BUCKET && keg->uk_recurse != 0)
2321                return (NULL);
2322
2323        for (;;) {
2324                slab = keg_fetch_slab(keg, zone, flags);
2325                if (slab)
2326                        return (slab);
2327                if (flags & (M_NOWAIT | M_NOVM))
2328                        break;
2329        }
2330        return (NULL);
2331}
2332
2333#ifndef __rtems__
2334/*
2335 * uma_zone_fetch_slab_multi:  Fetches a slab from one available keg.  Returns
2336 * with the keg locked.  Caller must call zone_relock() afterwards if the
2337 * zone lock is required.  On NULL the zone lock is held.
2338 *
2339 * The last pointer is used to seed the search.  It is not required.
2340 */
2341static uma_slab_t
2342zone_fetch_slab_multi(uma_zone_t zone, uma_keg_t last, int rflags)
2343{
2344        uma_klink_t klink;
2345        uma_slab_t slab;
2346        uma_keg_t keg;
2347        int flags;
2348        int empty;
2349        int full;
2350
2351        /*
2352         * Don't wait on the first pass.  This will skip limit tests
2353         * as well.  We don't want to block if we can find a provider
2354         * without blocking.
2355         */
2356        flags = (rflags & ~M_WAITOK) | M_NOWAIT;
2357        /*
2358         * Use the last slab allocated as a hint for where to start
2359         * the search.
2360         */
2361        if (last) {
2362                slab = keg_fetch_slab(last, zone, flags);
2363                if (slab)
2364                        return (slab);
2365                zone_relock(zone, last);
2366                last = NULL;
2367        }
2368        /*
2369         * Loop until we have a slab incase of transient failures
2370         * while M_WAITOK is specified.  I'm not sure this is 100%
2371         * required but we've done it for so long now.
2372         */
2373        for (;;) {
2374                empty = 0;
2375                full = 0;
2376                /*
2377                 * Search the available kegs for slabs.  Be careful to hold the
2378                 * correct lock while calling into the keg layer.
2379                 */
2380                LIST_FOREACH(klink, &zone->uz_kegs, kl_link) {
2381                        keg = klink->kl_keg;
2382                        keg_relock(keg, zone);
2383                        if ((keg->uk_flags & UMA_ZFLAG_FULL) == 0) {
2384                                slab = keg_fetch_slab(keg, zone, flags);
2385                                if (slab)
2386                                        return (slab);
2387                        }
2388                        if (keg->uk_flags & UMA_ZFLAG_FULL)
2389                                full++;
2390                        else
2391                                empty++;
2392                        zone_relock(zone, keg);
2393                }
2394                if (rflags & (M_NOWAIT | M_NOVM))
2395                        break;
2396                flags = rflags;
2397                /*
2398                 * All kegs are full.  XXX We can't atomically check all kegs
2399                 * and sleep so just sleep for a short period and retry.
2400                 */
2401                if (full && !empty) {
2402                        zone->uz_flags |= UMA_ZFLAG_FULL;
2403                        zone->uz_sleeps++;
2404                        msleep(zone, zone->uz_lock, PVM, "zonelimit", hz/100);
2405                        zone->uz_flags &= ~UMA_ZFLAG_FULL;
2406                        continue;
2407                }
2408        }
2409        return (NULL);
2410}
2411#endif /* __rtems__ */
2412
2413static void *
2414slab_alloc_item(uma_zone_t zone, uma_slab_t slab)
2415{
2416        uma_keg_t keg;
2417        uma_slabrefcnt_t slabref;
2418        void *item;
2419        u_int8_t freei;
2420
2421        keg = slab->us_keg;
2422        mtx_assert(&keg->uk_lock, MA_OWNED);
2423
2424        freei = slab->us_firstfree;
2425        if (keg->uk_flags & UMA_ZONE_REFCNT) {
2426                slabref = (uma_slabrefcnt_t)slab;
2427                slab->us_firstfree = slabref->us_freelist[freei].us_item;
2428        } else {
2429                slab->us_firstfree = slab->us_freelist[freei].us_item;
2430        }
2431        item = slab->us_data + (keg->uk_rsize * freei);
2432
2433        slab->us_freecount--;
2434        keg->uk_free--;
2435#ifdef INVARIANTS
2436        uma_dbg_alloc(zone, slab, item);
2437#endif
2438        /* Move this slab to the full list */
2439        if (slab->us_freecount == 0) {
2440                LIST_REMOVE(slab, us_link);
2441                LIST_INSERT_HEAD(&keg->uk_full_slab, slab, us_link);
2442        }
2443
2444        return (item);
2445}
2446
2447static int
2448zone_alloc_bucket(uma_zone_t zone, int flags)
2449{
2450        uma_bucket_t bucket;
2451        uma_slab_t slab;
2452        uma_keg_t keg;
2453        int16_t saved;
2454        int max, origflags = flags;
2455
2456        /*
2457         * Try this zone's free list first so we don't allocate extra buckets.
2458         */
2459        if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2460                KASSERT(bucket->ub_cnt == 0,
2461                    ("zone_alloc_bucket: Bucket on free list is not empty."));
2462                LIST_REMOVE(bucket, ub_link);
2463        } else {
2464                int bflags;
2465
2466                bflags = (flags & ~M_ZERO);
2467                if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
2468                        bflags |= M_NOVM;
2469
2470                ZONE_UNLOCK(zone);
2471                bucket = bucket_alloc(zone->uz_count, bflags);
2472                ZONE_LOCK(zone);
2473        }
2474
2475        if (bucket == NULL) {
2476                return (0);
2477        }
2478
2479#ifdef SMP
2480        /*
2481         * This code is here to limit the number of simultaneous bucket fills
2482         * for any given zone to the number of per cpu caches in this zone. This
2483         * is done so that we don't allocate more memory than we really need.
2484         */
2485        if (zone->uz_fills >= mp_ncpus)
2486                goto done;
2487
2488#endif
2489        zone->uz_fills++;
2490
2491        max = MIN(bucket->ub_entries, zone->uz_count);
2492        /* Try to keep the buckets totally full */
2493        saved = bucket->ub_cnt;
2494        slab = NULL;
2495        keg = NULL;
2496        while (bucket->ub_cnt < max &&
2497            (slab = zone->uz_slab(zone, keg, flags)) != NULL) {
2498                keg = slab->us_keg;
2499                while (slab->us_freecount && bucket->ub_cnt < max) {
2500                        bucket->ub_bucket[bucket->ub_cnt++] =
2501                            slab_alloc_item(zone, slab);
2502                }
2503
2504                /* Don't block on the next fill */
2505                flags |= M_NOWAIT;
2506        }
2507        if (slab)
2508                zone_relock(zone, keg);
2509
2510        /*
2511         * We unlock here because we need to call the zone's init.
2512         * It should be safe to unlock because the slab dealt with
2513         * above is already on the appropriate list within the keg
2514         * and the bucket we filled is not yet on any list, so we
2515         * own it.
2516         */
2517        if (zone->uz_init != NULL) {
2518                int i;
2519
2520                ZONE_UNLOCK(zone);
2521                for (i = saved; i < bucket->ub_cnt; i++)
2522                        if (zone->uz_init(bucket->ub_bucket[i], zone->uz_size,
2523                            origflags) != 0)
2524                                break;
2525                /*
2526                 * If we couldn't initialize the whole bucket, put the
2527                 * rest back onto the freelist.
2528                 */
2529                if (i != bucket->ub_cnt) {
2530                        int j;
2531
2532                        for (j = i; j < bucket->ub_cnt; j++) {
2533                                zone_free_item(zone, bucket->ub_bucket[j],
2534                                    NULL, SKIP_FINI, 0);
2535#ifdef INVARIANTS
2536                                bucket->ub_bucket[j] = NULL;
2537#endif
2538                        }
2539                        bucket->ub_cnt = i;
2540                }
2541                ZONE_LOCK(zone);
2542        }
2543
2544        zone->uz_fills--;
2545        if (bucket->ub_cnt != 0) {
2546                LIST_INSERT_HEAD(&zone->uz_full_bucket,
2547                    bucket, ub_link);
2548                return (1);
2549        }
2550#ifdef SMP
2551done:
2552#endif
2553        bucket_free(bucket);
2554
2555        return (0);
2556}
2557/*
2558 * Allocates an item for an internal zone
2559 *
2560 * Arguments
2561 *      zone   The zone to alloc for.
2562 *      udata  The data to be passed to the constructor.
2563 *      flags  M_WAITOK, M_NOWAIT, M_ZERO.
2564 *
2565 * Returns
2566 *      NULL if there is no memory and M_NOWAIT is set
2567 *      An item if successful
2568 */
2569
2570static void *
2571zone_alloc_item(uma_zone_t zone, void *udata, int flags)
2572{
2573        uma_slab_t slab;
2574        void *item;
2575
2576        item = NULL;
2577
2578#ifdef UMA_DEBUG_ALLOC
2579        printf("INTERNAL: Allocating one item from %s(%p)\n", zone->uz_name, zone);
2580#endif
2581        ZONE_LOCK(zone);
2582
2583        slab = zone->uz_slab(zone, NULL, flags);
2584        if (slab == NULL) {
2585                zone->uz_fails++;
2586                ZONE_UNLOCK(zone);
2587                return (NULL);
2588        }
2589
2590        item = slab_alloc_item(zone, slab);
2591
2592        zone_relock(zone, slab->us_keg);
2593        zone->uz_allocs++;
2594        ZONE_UNLOCK(zone);
2595
2596        /*
2597         * We have to call both the zone's init (not the keg's init)
2598         * and the zone's ctor.  This is because the item is going from
2599         * a keg slab directly to the user, and the user is expecting it
2600         * to be both zone-init'd as well as zone-ctor'd.
2601         */
2602        if (zone->uz_init != NULL) {
2603                if (zone->uz_init(item, zone->uz_size, flags) != 0) {
2604                        zone_free_item(zone, item, udata, SKIP_FINI,
2605                            ZFREE_STATFAIL | ZFREE_STATFREE);
2606                        return (NULL);
2607                }
2608        }
2609        if (zone->uz_ctor != NULL) {
2610                if (zone->uz_ctor(item, zone->uz_size, udata, flags) != 0) {
2611                        zone_free_item(zone, item, udata, SKIP_DTOR,
2612                            ZFREE_STATFAIL | ZFREE_STATFREE);
2613                        return (NULL);
2614                }
2615        }
2616        if (flags & M_ZERO)
2617                bzero(item, zone->uz_size);
2618
2619        return (item);
2620}
2621
2622/* See uma.h */
2623void
2624uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
2625{
2626        uma_cache_t cache;
2627        uma_bucket_t bucket;
2628        int bflags;
2629        int cpu;
2630
2631#ifdef UMA_DEBUG_ALLOC_1
2632        printf("Freeing item %p to %s(%p)\n", item, zone->uz_name, zone);
2633#endif
2634        CTR2(KTR_UMA, "uma_zfree_arg thread %x zone %s", curthread,
2635            zone->uz_name);
2636
2637        /* uma_zfree(..., NULL) does nothing, to match free(9). */
2638        if (item == NULL)
2639                return;
2640
2641        if (zone->uz_dtor)
2642                zone->uz_dtor(item, zone->uz_size, udata);
2643
2644#ifdef INVARIANTS
2645        ZONE_LOCK(zone);
2646        if (zone->uz_flags & UMA_ZONE_MALLOC)
2647                uma_dbg_free(zone, udata, item);
2648        else
2649                uma_dbg_free(zone, NULL, item);
2650        ZONE_UNLOCK(zone);
2651#endif
2652        /*
2653         * The race here is acceptable.  If we miss it we'll just have to wait
2654         * a little longer for the limits to be reset.
2655         */
2656        if (zone->uz_flags & UMA_ZFLAG_FULL)
2657                goto zfree_internal;
2658
2659        /*
2660         * If possible, free to the per-CPU cache.  There are two
2661         * requirements for safe access to the per-CPU cache: (1) the thread
2662         * accessing the cache must not be preempted or yield during access,
2663         * and (2) the thread must not migrate CPUs without switching which
2664         * cache it accesses.  We rely on a critical section to prevent
2665         * preemption and migration.  We release the critical section in
2666         * order to acquire the zone mutex if we are unable to free to the
2667         * current cache; when we re-acquire the critical section, we must
2668         * detect and handle migration if it has occurred.
2669         */
2670zfree_restart:
2671        critical_enter();
2672        cpu = curcpu;
2673        cache = &zone->uz_cpu[cpu];
2674
2675zfree_start:
2676        bucket = cache->uc_freebucket;
2677
2678        if (bucket) {
2679                /*
2680                 * Do we have room in our bucket? It is OK for this uz count
2681                 * check to be slightly out of sync.
2682                 */
2683
2684                if (bucket->ub_cnt < bucket->ub_entries) {
2685                        KASSERT(bucket->ub_bucket[bucket->ub_cnt] == NULL,
2686                            ("uma_zfree: Freeing to non free bucket index."));
2687                        bucket->ub_bucket[bucket->ub_cnt] = item;
2688                        bucket->ub_cnt++;
2689                        cache->uc_frees++;
2690                        critical_exit();
2691                        return;
2692                } else if (cache->uc_allocbucket) {
2693#ifdef UMA_DEBUG_ALLOC
2694                        printf("uma_zfree: Swapping buckets.\n");
2695#endif
2696                        /*
2697                         * We have run out of space in our freebucket.
2698                         * See if we can switch with our alloc bucket.
2699                         */
2700                        if (cache->uc_allocbucket->ub_cnt <
2701                            cache->uc_freebucket->ub_cnt) {
2702                                bucket = cache->uc_freebucket;
2703                                cache->uc_freebucket = cache->uc_allocbucket;
2704                                cache->uc_allocbucket = bucket;
2705                                goto zfree_start;
2706                        }
2707                }
2708        }
2709        /*
2710         * We can get here for two reasons:
2711         *
2712         * 1) The buckets are NULL
2713         * 2) The alloc and free buckets are both somewhat full.
2714         *
2715         * We must go back the zone, which requires acquiring the zone lock,
2716         * which in turn means we must release and re-acquire the critical
2717         * section.  Since the critical section is released, we may be
2718         * preempted or migrate.  As such, make sure not to maintain any
2719         * thread-local state specific to the cache from prior to releasing
2720         * the critical section.
2721         */
2722        critical_exit();
2723        ZONE_LOCK(zone);
2724        critical_enter();
2725        cpu = curcpu;
2726        cache = &zone->uz_cpu[cpu];
2727        if (cache->uc_freebucket != NULL) {
2728                if (cache->uc_freebucket->ub_cnt <
2729                    cache->uc_freebucket->ub_entries) {
2730                        ZONE_UNLOCK(zone);
2731                        goto zfree_start;
2732                }
2733                if (cache->uc_allocbucket != NULL &&
2734                    (cache->uc_allocbucket->ub_cnt <
2735                    cache->uc_freebucket->ub_cnt)) {
2736                        ZONE_UNLOCK(zone);
2737                        goto zfree_start;
2738                }
2739        }
2740
2741        /* Since we have locked the zone we may as well send back our stats */
2742        zone->uz_allocs += cache->uc_allocs;
2743        cache->uc_allocs = 0;
2744        zone->uz_frees += cache->uc_frees;
2745        cache->uc_frees = 0;
2746
2747        bucket = cache->uc_freebucket;
2748        cache->uc_freebucket = NULL;
2749
2750        /* Can we throw this on the zone full list? */
2751        if (bucket != NULL) {
2752#ifdef UMA_DEBUG_ALLOC
2753                printf("uma_zfree: Putting old bucket on the free list.\n");
2754#endif
2755                /* ub_cnt is pointing to the last free item */
2756                KASSERT(bucket->ub_cnt != 0,
2757                    ("uma_zfree: Attempting to insert an empty bucket onto the full list.\n"));
2758                LIST_INSERT_HEAD(&zone->uz_full_bucket,
2759                    bucket, ub_link);
2760        }
2761        if ((bucket = LIST_FIRST(&zone->uz_free_bucket)) != NULL) {
2762                LIST_REMOVE(bucket, ub_link);
2763                ZONE_UNLOCK(zone);
2764                cache->uc_freebucket = bucket;
2765                goto zfree_start;
2766        }
2767        /* We are no longer associated with this CPU. */
2768        critical_exit();
2769
2770        /* And the zone.. */
2771        ZONE_UNLOCK(zone);
2772
2773#ifdef UMA_DEBUG_ALLOC
2774        printf("uma_zfree: Allocating new free bucket.\n");
2775#endif
2776        bflags = M_NOWAIT;
2777
2778        if (zone->uz_flags & UMA_ZFLAG_CACHEONLY)
2779                bflags |= M_NOVM;
2780        bucket = bucket_alloc(zone->uz_count, bflags);
2781        if (bucket) {
2782                ZONE_LOCK(zone);
2783                LIST_INSERT_HEAD(&zone->uz_free_bucket,
2784                    bucket, ub_link);
2785                ZONE_UNLOCK(zone);
2786                goto zfree_restart;
2787        }
2788
2789        /*
2790         * If nothing else caught this, we'll just do an internal free.
2791         */
2792zfree_internal:
2793        zone_free_item(zone, item, udata, SKIP_DTOR, ZFREE_STATFREE);
2794
2795        return;
2796}
2797
2798/*
2799 * Frees an item to an INTERNAL zone or allocates a free bucket
2800 *
2801 * Arguments:
2802 *      zone   The zone to free to
2803 *      item   The item we're freeing
2804 *      udata  User supplied data for the dtor
2805 *      skip   Skip dtors and finis
2806 */
2807static void
2808zone_free_item(uma_zone_t zone, void *item, void *udata,
2809    enum zfreeskip skip, int flags)
2810{
2811        uma_slab_t slab;
2812        uma_slabrefcnt_t slabref;
2813        uma_keg_t keg;
2814        u_int8_t *mem;
2815        u_int8_t freei;
2816        int clearfull;
2817
2818        if (skip < SKIP_DTOR && zone->uz_dtor)
2819                zone->uz_dtor(item, zone->uz_size, udata);
2820
2821        if (skip < SKIP_FINI && zone->uz_fini)
2822                zone->uz_fini(item, zone->uz_size);
2823
2824        ZONE_LOCK(zone);
2825
2826        if (flags & ZFREE_STATFAIL)
2827                zone->uz_fails++;
2828        if (flags & ZFREE_STATFREE)
2829                zone->uz_frees++;
2830
2831        if (!(zone->uz_flags & UMA_ZONE_VTOSLAB)) {
2832#ifndef __rtems__
2833                mem = (u_int8_t *)((unsigned long)item & (~UMA_SLAB_MASK));
2834#else /* __rtems__ */
2835                mem = rtems_bsd_chunk_get_begin(&rtems_bsd_uma_chunks, item);
2836#endif /* __rtems__ */
2837                keg = zone_first_keg(zone); /* Must only be one. */
2838                if (zone->uz_flags & UMA_ZONE_HASH) {
2839                        slab = hash_sfind(&keg->uk_hash, mem);
2840                } else {
2841                        mem += keg->uk_pgoff;
2842                        slab = (uma_slab_t)mem;
2843                }
2844        } else {
2845                /* This prevents redundant lookups via free(). */
2846                if ((zone->uz_flags & UMA_ZONE_MALLOC) && udata != NULL)
2847                        slab = (uma_slab_t)udata;
2848                else
2849                        slab = vtoslab((vm_offset_t)item);
2850                keg = slab->us_keg;
2851                keg_relock(keg, zone);
2852        }
2853        MPASS(keg == slab->us_keg);
2854
2855        /* Do we need to remove from any lists? */
2856        if (slab->us_freecount+1 == keg->uk_ipers) {
2857                LIST_REMOVE(slab, us_link);
2858                LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
2859        } else if (slab->us_freecount == 0) {
2860                LIST_REMOVE(slab, us_link);
2861                LIST_INSERT_HEAD(&keg->uk_part_slab, slab, us_link);
2862        }
2863
2864        /* Slab management stuff */
2865        freei = ((unsigned long)item - (unsigned long)slab->us_data)
2866                / keg->uk_rsize;
2867
2868#ifdef INVARIANTS
2869        if (!skip)
2870                uma_dbg_free(zone, slab, item);
2871#endif
2872
2873        if (keg->uk_flags & UMA_ZONE_REFCNT) {
2874                slabref = (uma_slabrefcnt_t)slab;
2875                slabref->us_freelist[freei].us_item = slab->us_firstfree;
2876        } else {
2877                slab->us_freelist[freei].us_item = slab->us_firstfree;
2878        }
2879        slab->us_firstfree = freei;
2880        slab->us_freecount++;
2881
2882        /* Zone statistics */
2883        keg->uk_free++;
2884
2885        clearfull = 0;
2886        if (keg->uk_flags & UMA_ZFLAG_FULL) {
2887                if (keg->uk_pages < keg->uk_maxpages) {
2888                        keg->uk_flags &= ~UMA_ZFLAG_FULL;
2889                        clearfull = 1;
2890                }
2891
2892                /*
2893                 * We can handle one more allocation. Since we're clearing ZFLAG_FULL,
2894                 * wake up all procs blocked on pages. This should be uncommon, so
2895                 * keeping this simple for now (rather than adding count of blocked
2896                 * threads etc).
2897                 */
2898                wakeup(keg);
2899        }
2900        if (clearfull) {
2901                zone_relock(zone, keg);
2902                zone->uz_flags &= ~UMA_ZFLAG_FULL;
2903                wakeup(zone);
2904                ZONE_UNLOCK(zone);
2905        } else
2906                KEG_UNLOCK(keg);
2907}
2908
2909/* See uma.h */
2910int
2911uma_zone_set_max(uma_zone_t zone, int nitems)
2912{
2913        uma_keg_t keg;
2914
2915        ZONE_LOCK(zone);
2916        keg = zone_first_keg(zone);
2917        keg->uk_maxpages = (nitems / keg->uk_ipers) * keg->uk_ppera;
2918        if (keg->uk_maxpages * keg->uk_ipers < nitems)
2919                keg->uk_maxpages += keg->uk_ppera;
2920        nitems = keg->uk_maxpages * keg->uk_ipers;
2921        ZONE_UNLOCK(zone);
2922
2923        return (nitems);
2924}
2925
2926/* See uma.h */
2927int
2928uma_zone_get_max(uma_zone_t zone)
2929{
2930        int nitems;
2931        uma_keg_t keg;
2932
2933        ZONE_LOCK(zone);
2934        keg = zone_first_keg(zone);
2935        nitems = keg->uk_maxpages * keg->uk_ipers;
2936        ZONE_UNLOCK(zone);
2937
2938        return (nitems);
2939}
2940
2941/* See uma.h */
2942int
2943uma_zone_get_cur(uma_zone_t zone)
2944{
2945        int64_t nitems;
2946        u_int i;
2947
2948        ZONE_LOCK(zone);
2949        nitems = zone->uz_allocs - zone->uz_frees;
2950        CPU_FOREACH(i) {
2951                /*
2952                 * See the comment in sysctl_vm_zone_stats() regarding the
2953                 * safety of accessing the per-cpu caches. With the zone lock
2954                 * held, it is safe, but can potentially result in stale data.
2955                 */
2956                nitems += zone->uz_cpu[i].uc_allocs -
2957                    zone->uz_cpu[i].uc_frees;
2958        }
2959        ZONE_UNLOCK(zone);
2960
2961        return (nitems < 0 ? 0 : nitems);
2962}
2963
2964/* See uma.h */
2965void
2966uma_zone_set_init(uma_zone_t zone, uma_init uminit)
2967{
2968        uma_keg_t keg;
2969
2970        ZONE_LOCK(zone);
2971        keg = zone_first_keg(zone);
2972        KASSERT(keg->uk_pages == 0,
2973            ("uma_zone_set_init on non-empty keg"));
2974        keg->uk_init = uminit;
2975        ZONE_UNLOCK(zone);
2976}
2977
2978/* See uma.h */
2979void
2980uma_zone_set_fini(uma_zone_t zone, uma_fini fini)
2981{
2982        uma_keg_t keg;
2983
2984        ZONE_LOCK(zone);
2985        keg = zone_first_keg(zone);
2986        KASSERT(keg->uk_pages == 0,
2987            ("uma_zone_set_fini on non-empty keg"));
2988        keg->uk_fini = fini;
2989        ZONE_UNLOCK(zone);
2990}
2991
2992/* See uma.h */
2993void
2994uma_zone_set_zinit(uma_zone_t zone, uma_init zinit)
2995{
2996        ZONE_LOCK(zone);
2997        KASSERT(zone_first_keg(zone)->uk_pages == 0,
2998            ("uma_zone_set_zinit on non-empty keg"));
2999        zone->uz_init = zinit;
3000        ZONE_UNLOCK(zone);
3001}
3002
3003/* See uma.h */
3004void
3005uma_zone_set_zfini(uma_zone_t zone, uma_fini zfini)
3006{
3007        ZONE_LOCK(zone);
3008        KASSERT(zone_first_keg(zone)->uk_pages == 0,
3009            ("uma_zone_set_zfini on non-empty keg"));
3010        zone->uz_fini = zfini;
3011        ZONE_UNLOCK(zone);
3012}
3013
3014/* See uma.h */
3015/* XXX uk_freef is not actually used with the zone locked */
3016void
3017uma_zone_set_freef(uma_zone_t zone, uma_free freef)
3018{
3019
3020        ZONE_LOCK(zone);
3021        zone_first_keg(zone)->uk_freef = freef;
3022        ZONE_UNLOCK(zone);
3023}
3024
3025/* See uma.h */
3026/* XXX uk_allocf is not actually used with the zone locked */
3027void
3028uma_zone_set_allocf(uma_zone_t zone, uma_alloc allocf)
3029{
3030        uma_keg_t keg;
3031
3032        ZONE_LOCK(zone);
3033        keg = zone_first_keg(zone);
3034        keg->uk_flags |= UMA_ZFLAG_PRIVALLOC;
3035        keg->uk_allocf = allocf;
3036        ZONE_UNLOCK(zone);
3037}
3038
3039#ifndef __rtems__
3040/* See uma.h */
3041int
3042uma_zone_set_obj(uma_zone_t zone, struct vm_object *obj, int count)
3043{
3044        uma_keg_t keg;
3045        vm_offset_t kva;
3046        int pages;
3047
3048        keg = zone_first_keg(zone);
3049        pages = count / keg->uk_ipers;
3050
3051        if (pages * keg->uk_ipers < count)
3052                pages++;
3053
3054        kva = kmem_alloc_nofault(kernel_map, pages * UMA_SLAB_SIZE);
3055
3056        if (kva == 0)
3057                return (0);
3058        if (obj == NULL)
3059                obj = vm_object_allocate(OBJT_PHYS, pages);
3060        else {
3061                VM_OBJECT_LOCK_INIT(obj, "uma object");
3062                _vm_object_allocate(OBJT_PHYS, pages, obj);
3063        }
3064        ZONE_LOCK(zone);
3065        keg->uk_kva = kva;
3066        keg->uk_obj = obj;
3067        keg->uk_maxpages = pages;
3068        keg->uk_allocf = obj_alloc;
3069        keg->uk_flags |= UMA_ZONE_NOFREE | UMA_ZFLAG_PRIVALLOC;
3070        ZONE_UNLOCK(zone);
3071        return (1);
3072}
3073#endif /* __rtems__ */
3074
3075/* See uma.h */
3076void
3077uma_prealloc(uma_zone_t zone, int items)
3078{
3079        int slabs;
3080        uma_slab_t slab;
3081        uma_keg_t keg;
3082
3083        keg = zone_first_keg(zone);
3084        ZONE_LOCK(zone);
3085        slabs = items / keg->uk_ipers;
3086        if (slabs * keg->uk_ipers < items)
3087                slabs++;
3088        while (slabs > 0) {
3089                slab = keg_alloc_slab(keg, zone, M_WAITOK);
3090                if (slab == NULL)
3091                        break;
3092                MPASS(slab->us_keg == keg);
3093                LIST_INSERT_HEAD(&keg->uk_free_slab, slab, us_link);
3094                slabs--;
3095        }
3096        ZONE_UNLOCK(zone);
3097}
3098
3099/* See uma.h */
3100u_int32_t *
3101uma_find_refcnt(uma_zone_t zone, void *item)
3102{
3103        uma_slabrefcnt_t slabref;
3104        uma_keg_t keg;
3105        u_int32_t *refcnt;
3106        int idx;
3107
3108#ifndef __rtems__
3109        slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item &
3110            (~UMA_SLAB_MASK));
3111#else /* __rtems__ */
3112        slabref = (uma_slabrefcnt_t)vtoslab((vm_offset_t)item);
3113#endif /* __rtems__ */
3114        keg = slabref->us_keg;
3115        KASSERT(slabref != NULL && slabref->us_keg->uk_flags & UMA_ZONE_REFCNT,
3116            ("uma_find_refcnt(): zone possibly not UMA_ZONE_REFCNT"));
3117        idx = ((unsigned long)item - (unsigned long)slabref->us_data)
3118            / keg->uk_rsize;
3119        refcnt = &slabref->us_freelist[idx].us_refcnt;
3120        return refcnt;
3121}
3122
3123/* See uma.h */
3124void
3125uma_reclaim(void)
3126{
3127#ifdef UMA_DEBUG
3128        printf("UMA: vm asked us to release pages!\n");
3129#endif
3130        bucket_enable();
3131        zone_foreach(zone_drain);
3132        /*
3133         * Some slabs may have been freed but this zone will be visited early
3134         * we visit again so that we can free pages that are empty once other
3135         * zones are drained.  We have to do the same for buckets.
3136         */
3137        zone_drain(slabzone);
3138        zone_drain(slabrefzone);
3139        bucket_zone_drain();
3140}
3141
3142/* See uma.h */
3143int
3144uma_zone_exhausted(uma_zone_t zone)
3145{
3146        int full;
3147
3148        ZONE_LOCK(zone);
3149        full = (zone->uz_flags & UMA_ZFLAG_FULL);
3150        ZONE_UNLOCK(zone);
3151        return (full); 
3152}
3153
3154int
3155uma_zone_exhausted_nolock(uma_zone_t zone)
3156{
3157        return (zone->uz_flags & UMA_ZFLAG_FULL);
3158}
3159
3160void *
3161uma_large_malloc(int size, int wait)
3162{
3163        void *mem;
3164        uma_slab_t slab;
3165        u_int8_t flags;
3166
3167        slab = zone_alloc_item(slabzone, NULL, wait);
3168        if (slab == NULL)
3169                return (NULL);
3170        mem = page_alloc(NULL, size, &flags, wait);
3171        if (mem) {
3172                vsetslab((vm_offset_t)mem, slab);
3173                slab->us_data = mem;
3174                slab->us_flags = flags | UMA_SLAB_MALLOC;
3175                slab->us_size = size;
3176        } else {
3177                zone_free_item(slabzone, slab, NULL, SKIP_NONE,
3178                    ZFREE_STATFAIL | ZFREE_STATFREE);
3179        }
3180
3181        return (mem);
3182}
3183
3184void
3185uma_large_free(uma_slab_t slab)
3186{
3187#ifndef __rtems__
3188        vsetobj((vm_offset_t)slab->us_data, kmem_object);
3189#endif /* __rtems__ */
3190        page_free(slab->us_data, slab->us_size, slab->us_flags);
3191        zone_free_item(slabzone, slab, NULL, SKIP_NONE, ZFREE_STATFREE);
3192}
3193
3194void
3195uma_print_stats(void)
3196{
3197        zone_foreach(uma_print_zone);
3198}
3199
3200static void
3201slab_print(uma_slab_t slab)
3202{
3203        printf("slab: keg %p, data %p, freecount %d, firstfree %d\n",
3204                slab->us_keg, slab->us_data, slab->us_freecount,
3205                slab->us_firstfree);
3206}
3207
3208static void
3209cache_print(uma_cache_t cache)
3210{
3211        printf("alloc: %p(%d), free: %p(%d)\n",
3212                cache->uc_allocbucket,
3213                cache->uc_allocbucket?cache->uc_allocbucket->ub_cnt:0,
3214                cache->uc_freebucket,
3215                cache->uc_freebucket?cache->uc_freebucket->ub_cnt:0);
3216}
3217
3218static void
3219uma_print_keg(uma_keg_t keg)
3220{
3221        uma_slab_t slab;
3222
3223        printf("keg: %s(%p) size %d(%d) flags %#x ipers %d ppera %d "
3224            "out %d free %d limit %d\n",
3225            keg->uk_name, keg, keg->uk_size, keg->uk_rsize, keg->uk_flags,
3226            keg->uk_ipers, keg->uk_ppera,
3227            (keg->uk_ipers * keg->uk_pages) - keg->uk_free, keg->uk_free,
3228            (keg->uk_maxpages / keg->uk_ppera) * keg->uk_ipers);
3229        printf("Part slabs:\n");
3230        LIST_FOREACH(slab, &keg->uk_part_slab, us_link)
3231                slab_print(slab);
3232        printf("Free slabs:\n");
3233        LIST_FOREACH(slab, &keg->uk_free_slab, us_link)
3234                slab_print(slab);
3235        printf("Full slabs:\n");
3236        LIST_FOREACH(slab, &keg->uk_full_slab, us_link)
3237                slab_print(slab);
3238}
3239
3240void
3241uma_print_zone(uma_zone_t zone)
3242{
3243        uma_cache_t cache;
3244        uma_klink_t kl;
3245        int i;
3246
3247        printf("zone: %s(%p) size %d flags %#x\n",
3248            zone->uz_name, zone, zone->uz_size, zone->uz_flags);
3249        LIST_FOREACH(kl, &zone->uz_kegs, kl_link)
3250                uma_print_keg(kl->kl_keg);
3251        CPU_FOREACH(i) {
3252                cache = &zone->uz_cpu[i];
3253                printf("CPU %d Cache:\n", i);
3254                cache_print(cache);
3255        }
3256}
3257
3258#ifndef __rtems__
3259#ifdef DDB
3260/*
3261 * Generate statistics across both the zone and its per-cpu cache's.  Return
3262 * desired statistics if the pointer is non-NULL for that statistic.
3263 *
3264 * Note: does not update the zone statistics, as it can't safely clear the
3265 * per-CPU cache statistic.
3266 *
3267 * XXXRW: Following the uc_allocbucket and uc_freebucket pointers here isn't
3268 * safe from off-CPU; we should modify the caches to track this information
3269 * directly so that we don't have to.
3270 */
3271static void
3272uma_zone_sumstat(uma_zone_t z, int *cachefreep, u_int64_t *allocsp,
3273    u_int64_t *freesp, u_int64_t *sleepsp)
3274{
3275        uma_cache_t cache;
3276        u_int64_t allocs, frees, sleeps;
3277        int cachefree, cpu;
3278
3279        allocs = frees = sleeps = 0;
3280        cachefree = 0;
3281        CPU_FOREACH(cpu) {
3282                cache = &z->uz_cpu[cpu];
3283                if (cache->uc_allocbucket != NULL)
3284                        cachefree += cache->uc_allocbucket->ub_cnt;
3285                if (cache->uc_freebucket != NULL)
3286                        cachefree += cache->uc_freebucket->ub_cnt;
3287                allocs += cache->uc_allocs;
3288                frees += cache->uc_frees;
3289        }
3290        allocs += z->uz_allocs;
3291        frees += z->uz_frees;
3292        sleeps += z->uz_sleeps;
3293        if (cachefreep != NULL)
3294                *cachefreep = cachefree;
3295        if (allocsp != NULL)
3296                *allocsp = allocs;
3297        if (freesp != NULL)
3298                *freesp = frees;
3299        if (sleepsp != NULL)
3300                *sleepsp = sleeps;
3301}
3302#endif /* DDB */
3303
3304static int
3305sysctl_vm_zone_count(SYSCTL_HANDLER_ARGS)
3306{
3307        uma_keg_t kz;
3308        uma_zone_t z;
3309        int count;
3310
3311        count = 0;
3312        mtx_lock(&uma_mtx);
3313        LIST_FOREACH(kz, &uma_kegs, uk_link) {
3314                LIST_FOREACH(z, &kz->uk_zones, uz_link)
3315                        count++;
3316        }
3317        mtx_unlock(&uma_mtx);
3318        return (sysctl_handle_int(oidp, &count, 0, req));
3319}
3320
3321static int
3322sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
3323{
3324        struct uma_stream_header ush;
3325        struct uma_type_header uth;
3326        struct uma_percpu_stat ups;
3327        uma_bucket_t bucket;
3328        struct sbuf sbuf;
3329        uma_cache_t cache;
3330        uma_klink_t kl;
3331        uma_keg_t kz;
3332        uma_zone_t z;
3333        uma_keg_t k;
3334        int count, error, i;
3335
3336        error = sysctl_wire_old_buffer(req, 0);
3337        if (error != 0)
3338                return (error);
3339        sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
3340
3341        count = 0;
3342        mtx_lock(&uma_mtx);
3343        LIST_FOREACH(kz, &uma_kegs, uk_link) {
3344                LIST_FOREACH(z, &kz->uk_zones, uz_link)
3345                        count++;
3346        }
3347
3348        /*
3349         * Insert stream header.
3350         */
3351        bzero(&ush, sizeof(ush));
3352        ush.ush_version = UMA_STREAM_VERSION;
3353        ush.ush_maxcpus = (mp_maxid + 1);
3354        ush.ush_count = count;
3355        (void)sbuf_bcat(&sbuf, &ush, sizeof(ush));
3356
3357        LIST_FOREACH(kz, &uma_kegs, uk_link) {
3358                LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3359                        bzero(&uth, sizeof(uth));
3360                        ZONE_LOCK(z);
3361                        strlcpy(uth.uth_name, z->uz_name, UTH_MAX_NAME);
3362                        uth.uth_align = kz->uk_align;
3363                        uth.uth_size = kz->uk_size;
3364                        uth.uth_rsize = kz->uk_rsize;
3365                        LIST_FOREACH(kl, &z->uz_kegs, kl_link) {
3366                                k = kl->kl_keg;
3367                                uth.uth_maxpages += k->uk_maxpages;
3368                                uth.uth_pages += k->uk_pages;
3369                                uth.uth_keg_free += k->uk_free;
3370                                uth.uth_limit = (k->uk_maxpages / k->uk_ppera)
3371                                    * k->uk_ipers;
3372                        }
3373
3374                        /*
3375                         * A zone is secondary is it is not the first entry
3376                         * on the keg's zone list.
3377                         */
3378                        if ((z->uz_flags & UMA_ZONE_SECONDARY) &&
3379                            (LIST_FIRST(&kz->uk_zones) != z))
3380                                uth.uth_zone_flags = UTH_ZONE_SECONDARY;
3381
3382                        LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
3383                                uth.uth_zone_free += bucket->ub_cnt;
3384                        uth.uth_allocs = z->uz_allocs;
3385                        uth.uth_frees = z->uz_frees;
3386                        uth.uth_fails = z->uz_fails;
3387                        uth.uth_sleeps = z->uz_sleeps;
3388                        (void)sbuf_bcat(&sbuf, &uth, sizeof(uth));
3389                        /*
3390                         * While it is not normally safe to access the cache
3391                         * bucket pointers while not on the CPU that owns the
3392                         * cache, we only allow the pointers to be exchanged
3393                         * without the zone lock held, not invalidated, so
3394                         * accept the possible race associated with bucket
3395                         * exchange during monitoring.
3396                         */
3397                        for (i = 0; i < (mp_maxid + 1); i++) {
3398                                bzero(&ups, sizeof(ups));
3399                                if (kz->uk_flags & UMA_ZFLAG_INTERNAL)
3400                                        goto skip;
3401                                if (CPU_ABSENT(i))
3402                                        goto skip;
3403                                cache = &z->uz_cpu[i];
3404                                if (cache->uc_allocbucket != NULL)
3405                                        ups.ups_cache_free +=
3406                                            cache->uc_allocbucket->ub_cnt;
3407                                if (cache->uc_freebucket != NULL)
3408                                        ups.ups_cache_free +=
3409                                            cache->uc_freebucket->ub_cnt;
3410                                ups.ups_allocs = cache->uc_allocs;
3411                                ups.ups_frees = cache->uc_frees;
3412skip:
3413                                (void)sbuf_bcat(&sbuf, &ups, sizeof(ups));
3414                        }
3415                        ZONE_UNLOCK(z);
3416                }
3417        }
3418        mtx_unlock(&uma_mtx);
3419        error = sbuf_finish(&sbuf);
3420        sbuf_delete(&sbuf);
3421        return (error);
3422}
3423
3424#ifdef DDB
3425DB_SHOW_COMMAND(uma, db_show_uma)
3426{
3427        u_int64_t allocs, frees, sleeps;
3428        uma_bucket_t bucket;
3429        uma_keg_t kz;
3430        uma_zone_t z;
3431        int cachefree;
3432
3433        db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
3434            "Requests", "Sleeps");
3435        LIST_FOREACH(kz, &uma_kegs, uk_link) {
3436                LIST_FOREACH(z, &kz->uk_zones, uz_link) {
3437                        if (kz->uk_flags & UMA_ZFLAG_INTERNAL) {
3438                                allocs = z->uz_allocs;
3439                                frees = z->uz_frees;
3440                                sleeps = z->uz_sleeps;
3441                                cachefree = 0;
3442                        } else
3443                                uma_zone_sumstat(z, &cachefree, &allocs,
3444                                    &frees, &sleeps);
3445                        if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
3446                            (LIST_FIRST(&kz->uk_zones) != z)))
3447                                cachefree += kz->uk_free;
3448                        LIST_FOREACH(bucket, &z->uz_full_bucket, ub_link)
3449                                cachefree += bucket->ub_cnt;
3450                        db_printf("%18s %8ju %8jd %8d %12ju %8ju\n", z->uz_name,
3451                            (uintmax_t)kz->uk_size,
3452                            (intmax_t)(allocs - frees), cachefree,
3453                            (uintmax_t)allocs, sleeps);
3454                        if (db_pager_quit)
3455                                return;
3456                }
3457        }
3458}
3459#endif
3460#endif /* __rtems__ */
Note: See TracBrowser for help on using the repository browser.