source: rtems-libbsd/freebsd/sys/vm/uma_int.h @ dbb2407

55-freebsd-126-freebsd-12
Last change on this file since dbb2407 was dbb2407, checked in by Sebastian Huber <sebastian.huber@…>, on 10/23/18 at 10:11:54

ZONE(9): Disable UMA_ZONE_NUMA

There is no NUMA support in RTEMS currently.

  • Property mode set to 100644
File size: 17.3 KB
Line 
1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2002-2005, 2009, 2013 Jeffrey Roberson <jeff@FreeBSD.org>
5 * Copyright (c) 2004, 2005 Bosko Milekic <bmilekic@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice unmodified, this list of conditions, and the following
13 *    disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * $FreeBSD$
30 *
31 */
32
33#include <sys/_bitset.h>
34#include <sys/_task.h>
35
36/*
37 * This file includes definitions, structures, prototypes, and inlines that
38 * should not be used outside of the actual implementation of UMA.
39 */
40
41/*
42 * The brief summary;  Zones describe unique allocation types.  Zones are
43 * organized into per-CPU caches which are filled by buckets.  Buckets are
44 * organized according to memory domains.  Buckets are filled from kegs which
45 * are also organized according to memory domains.  Kegs describe a unique
46 * allocation type, backend memory provider, and layout.  Kegs are associated
47 * with one or more zones and zones reference one or more kegs.  Kegs provide
48 * slabs which are virtually contiguous collections of pages.  Each slab is
49 * broken down int one or more items that will satisfy an individual allocation.
50 *
51 * Allocation is satisfied in the following order:
52 * 1) Per-CPU cache
53 * 2) Per-domain cache of buckets
54 * 3) Slab from any of N kegs
55 * 4) Backend page provider
56 *
57 * More detail on individual objects is contained below:
58 *
59 * Kegs contain lists of slabs which are stored in either the full bin, empty
60 * bin, or partially allocated bin, to reduce fragmentation.  They also contain
61 * the user supplied value for size, which is adjusted for alignment purposes
62 * and rsize is the result of that.  The Keg also stores information for
63 * managing a hash of page addresses that maps pages to uma_slab_t structures
64 * for pages that don't have embedded uma_slab_t's.
65 *
66 * Keg slab lists are organized by memory domain to support NUMA allocation
67 * policies.  By default allocations are spread across domains to reduce the
68 * potential for hotspots.  Special keg creation flags may be specified to
69 * prefer location allocation.  However there is no strict enforcement as frees
70 * may happen on any CPU and these are returned to the CPU-local cache
71 * regardless of the originating domain.
72 * 
73 * The uma_slab_t may be embedded in a UMA_SLAB_SIZE chunk of memory or it may
74 * be allocated off the page from a special slab zone.  The free list within a
75 * slab is managed with a bitmask.  For item sizes that would yield more than
76 * 10% memory waste we potentially allocate a separate uma_slab_t if this will
77 * improve the number of items per slab that will fit. 
78 *
79 * The only really gross cases, with regards to memory waste, are for those
80 * items that are just over half the page size.   You can get nearly 50% waste,
81 * so you fall back to the memory footprint of the power of two allocator. I
82 * have looked at memory allocation sizes on many of the machines available to
83 * me, and there does not seem to be an abundance of allocations at this range
84 * so at this time it may not make sense to optimize for it.  This can, of
85 * course, be solved with dynamic slab sizes.
86 *
87 * Kegs may serve multiple Zones but by far most of the time they only serve
88 * one.  When a Zone is created, a Keg is allocated and setup for it.  While
89 * the backing Keg stores slabs, the Zone caches Buckets of items allocated
90 * from the slabs.  Each Zone is equipped with an init/fini and ctor/dtor
91 * pair, as well as with its own set of small per-CPU caches, layered above
92 * the Zone's general Bucket cache.
93 *
94 * The PCPU caches are protected by critical sections, and may be accessed
95 * safely only from their associated CPU, while the Zones backed by the same
96 * Keg all share a common Keg lock (to coalesce contention on the backing
97 * slabs).  The backing Keg typically only serves one Zone but in the case of
98 * multiple Zones, one of the Zones is considered the Master Zone and all
99 * Zone-related stats from the Keg are done in the Master Zone.  For an
100 * example of a Multi-Zone setup, refer to the Mbuf allocation code.
101 */
102
103/*
104 *      This is the representation for normal (Non OFFPAGE slab)
105 *
106 *      i == item
107 *      s == slab pointer
108 *
109 *      <----------------  Page (UMA_SLAB_SIZE) ------------------>
110 *      ___________________________________________________________
111 *     | _  _  _  _  _  _  _  _  _  _  _  _  _  _  _   ___________ |
112 *     ||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i| |slab header||
113 *     ||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_| |___________||
114 *     |___________________________________________________________|
115 *
116 *
117 *      This is an OFFPAGE slab. These can be larger than UMA_SLAB_SIZE.
118 *
119 *      ___________________________________________________________
120 *     | _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _  _   |
121 *     ||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i||i|  |
122 *     ||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_||_|  |
123 *     |___________________________________________________________|
124 *       ___________    ^
125 *      |slab header|   |
126 *      |___________|---*
127 *
128 */
129
130#ifndef VM_UMA_INT_H
131#define VM_UMA_INT_H
132
133#define UMA_SLAB_SIZE   PAGE_SIZE       /* How big are our slabs? */
134#define UMA_SLAB_MASK   (PAGE_SIZE - 1) /* Mask to get back to the page */
135#define UMA_SLAB_SHIFT  PAGE_SHIFT      /* Number of bits PAGE_MASK */
136
137/* Max waste percentage before going to off page slab management */
138#define UMA_MAX_WASTE   10
139
140/*
141 * Size of memory in a not offpage slab available for actual items.
142 */
143#define UMA_SLAB_SPACE  (UMA_SLAB_SIZE - sizeof(struct uma_slab))
144
145/*
146 * I doubt there will be many cases where this is exceeded. This is the initial
147 * size of the hash table for uma_slabs that are managed off page. This hash
148 * does expand by powers of two.  Currently it doesn't get smaller.
149 */
150#define UMA_HASH_SIZE_INIT      32             
151
152/*
153 * I should investigate other hashing algorithms.  This should yield a low
154 * number of collisions if the pages are relatively contiguous.
155 */
156
157#define UMA_HASH(h, s) ((((uintptr_t)s) >> UMA_SLAB_SHIFT) & (h)->uh_hashmask)
158
159#define UMA_HASH_INSERT(h, s, mem)                                      \
160                SLIST_INSERT_HEAD(&(h)->uh_slab_hash[UMA_HASH((h),      \
161                    (mem))], (s), us_hlink)
162#define UMA_HASH_REMOVE(h, s, mem)                                      \
163                SLIST_REMOVE(&(h)->uh_slab_hash[UMA_HASH((h),           \
164                    (mem))], (s), uma_slab, us_hlink)
165
166/* Hash table for freed address -> slab translation */
167
168SLIST_HEAD(slabhead, uma_slab);
169
170struct uma_hash {
171        struct slabhead *uh_slab_hash;  /* Hash table for slabs */
172        int             uh_hashsize;    /* Current size of the hash table */
173        int             uh_hashmask;    /* Mask used during hashing */
174};
175
176/*
177 * align field or structure to cache line
178 */
179#if defined(__amd64__) || defined(__powerpc64__)
180#define UMA_ALIGN       __aligned(128)
181#else
182#define UMA_ALIGN
183#endif
184
185/*
186 * Structures for per cpu queues.
187 */
188
189struct uma_bucket {
190        LIST_ENTRY(uma_bucket)  ub_link;        /* Link into the zone */
191        int16_t ub_cnt;                         /* Count of items in bucket. */
192        int16_t ub_entries;                     /* Max items. */
193        void    *ub_bucket[];                   /* actual allocation storage */
194};
195
196typedef struct uma_bucket * uma_bucket_t;
197
198struct uma_cache {
199        uma_bucket_t    uc_freebucket;  /* Bucket we're freeing to */
200        uma_bucket_t    uc_allocbucket; /* Bucket to allocate from */
201        uint64_t        uc_allocs;      /* Count of allocations */
202        uint64_t        uc_frees;       /* Count of frees */
203} UMA_ALIGN;
204
205typedef struct uma_cache * uma_cache_t;
206
207/*
208 * Per-domain memory list.  Embedded in the kegs.
209 */
210struct uma_domain {
211        LIST_HEAD(,uma_slab)    ud_part_slab;   /* partially allocated slabs */
212        LIST_HEAD(,uma_slab)    ud_free_slab;   /* empty slab list */
213        LIST_HEAD(,uma_slab)    ud_full_slab;   /* full slabs */
214};
215
216typedef struct uma_domain * uma_domain_t;
217
218/*
219 * Keg management structure
220 *
221 * TODO: Optimize for cache line size
222 *
223 */
224struct uma_keg {
225        struct mtx      uk_lock;        /* Lock for the keg */
226        struct uma_hash uk_hash;
227        LIST_HEAD(,uma_zone)    uk_zones;       /* Keg's zones */
228
229        uint32_t        uk_cursor;      /* Domain alloc cursor. */
230        uint32_t        uk_align;       /* Alignment mask */
231        uint32_t        uk_pages;       /* Total page count */
232        uint32_t        uk_free;        /* Count of items free in slabs */
233        uint32_t        uk_reserve;     /* Number of reserved items. */
234        uint32_t        uk_size;        /* Requested size of each item */
235        uint32_t        uk_rsize;       /* Real size of each item */
236        uint32_t        uk_maxpages;    /* Maximum number of pages to alloc */
237
238        uma_init        uk_init;        /* Keg's init routine */
239        uma_fini        uk_fini;        /* Keg's fini routine */
240        uma_alloc       uk_allocf;      /* Allocation function */
241        uma_free        uk_freef;       /* Free routine */
242
243        u_long          uk_offset;      /* Next free offset from base KVA */
244        vm_offset_t     uk_kva;         /* Zone base KVA */
245        uma_zone_t      uk_slabzone;    /* Slab zone backing us, if OFFPAGE */
246
247        uint32_t        uk_pgoff;       /* Offset to uma_slab struct */
248        uint16_t        uk_ppera;       /* pages per allocation from backend */
249        uint16_t        uk_ipers;       /* Items per slab */
250        uint32_t        uk_flags;       /* Internal flags */
251
252        /* Least used fields go to the last cache line. */
253        const char      *uk_name;               /* Name of creating zone. */
254        LIST_ENTRY(uma_keg)     uk_link;        /* List of all kegs */
255
256        /* Must be last, variable sized. */
257        struct uma_domain       uk_domain[];    /* Keg's slab lists. */
258};
259typedef struct uma_keg  * uma_keg_t;
260
261/*
262 * Free bits per-slab.
263 */
264#define SLAB_SETSIZE    (PAGE_SIZE / UMA_SMALLEST_UNIT)
265BITSET_DEFINE(slabbits, SLAB_SETSIZE);
266
267/*
268 * The slab structure manages a single contiguous allocation from backing
269 * store and subdivides it into individually allocatable items.
270 */
271struct uma_slab {
272        uma_keg_t       us_keg;                 /* Keg we live in */
273        union {
274                LIST_ENTRY(uma_slab)    _us_link;       /* slabs in zone */
275#ifndef __rtems__
276                unsigned long   _us_size;       /* Size of allocation */
277#endif /* __rtems__ */
278        } us_type;
279        SLIST_ENTRY(uma_slab)   us_hlink;       /* Link for hash table */
280        uint8_t         *us_data;               /* First item */
281        struct slabbits us_free;                /* Free bitmask. */
282#ifdef INVARIANTS
283        struct slabbits us_debugfree;           /* Debug bitmask. */
284#endif
285        uint16_t        us_freecount;           /* How many are free? */
286        uint8_t         us_flags;               /* Page flags see uma.h */
287        uint8_t         us_domain;              /* Backing NUMA domain. */
288};
289
290#define us_link us_type._us_link
291#ifndef __rtems__
292#define us_size us_type._us_size
293#endif /* __rtems__ */
294
295#if MAXMEMDOM >= 255
296#error "Slab domain type insufficient"
297#endif
298
299typedef struct uma_slab * uma_slab_t;
300typedef uma_slab_t (*uma_slaballoc)(uma_zone_t, uma_keg_t, int, int);
301
302struct uma_klink {
303        LIST_ENTRY(uma_klink)   kl_link;
304        uma_keg_t               kl_keg;
305};
306typedef struct uma_klink *uma_klink_t;
307
308struct uma_zone_domain {
309        LIST_HEAD(,uma_bucket)  uzd_buckets;    /* full buckets */
310};
311
312typedef struct uma_zone_domain * uma_zone_domain_t;
313
314/*
315 * Zone management structure
316 *
317 * TODO: Optimize for cache line size
318 *
319 */
320struct uma_zone {
321        /* Offset 0, used in alloc/free fast/medium fast path and const. */
322        struct mtx      *uz_lockptr;
323        const char      *uz_name;       /* Text name of the zone */
324#ifndef __rtems__
325        struct uma_zone_domain  *uz_domain;     /* per-domain buckets */
326#else /* __rtems__ */
327        struct uma_zone_domain  uz_domain[1];   /* per-domain buckets */
328#endif /* __rtems__ */
329        uint32_t        uz_flags;       /* Flags inherited from kegs */
330        uint32_t        uz_size;        /* Size inherited from kegs */
331        uma_ctor        uz_ctor;        /* Constructor for each allocation */
332        uma_dtor        uz_dtor;        /* Destructor */
333        uma_init        uz_init;        /* Initializer for each item */
334        uma_fini        uz_fini;        /* Finalizer for each item. */
335
336        /* Offset 64, used in bucket replenish. */
337        uma_import      uz_import;      /* Import new memory to cache. */
338        uma_release     uz_release;     /* Release memory from cache. */
339        void            *uz_arg;        /* Import/release argument. */
340        uma_slaballoc   uz_slab;        /* Allocate a slab from the backend. */
341        uint16_t        uz_count;       /* Amount of items in full bucket */
342        uint16_t        uz_count_min;   /* Minimal amount of items there */
343        /* 32bit pad on 64bit. */
344        LIST_ENTRY(uma_zone)    uz_link;        /* List of all zones in keg */
345        LIST_HEAD(,uma_klink)   uz_kegs;        /* List of kegs. */
346
347        /* Offset 128 Rare. */
348        /*
349         * The lock is placed here to avoid adjacent line prefetcher
350         * in fast paths and to take up space near infrequently accessed
351         * members to reduce alignment overhead.
352         */
353        struct mtx      uz_lock;        /* Lock for the zone */
354        struct uma_klink        uz_klink;       /* klink for first keg. */
355        /* The next two fields are used to print a rate-limited warnings. */
356        const char      *uz_warning;    /* Warning to print on failure */
357        struct timeval  uz_ratecheck;   /* Warnings rate-limiting */
358        struct task     uz_maxaction;   /* Task to run when at limit */
359
360        /* 16 bytes of pad. */
361
362        /* Offset 256, atomic stats. */
363        volatile u_long uz_allocs UMA_ALIGN; /* Total number of allocations */
364        volatile u_long uz_fails;       /* Total number of alloc failures */
365        volatile u_long uz_frees;       /* Total number of frees */
366        uint64_t        uz_sleeps;      /* Total number of alloc sleeps */
367
368        /*
369         * This HAS to be the last item because we adjust the zone size
370         * based on NCPU and then allocate the space for the zones.
371         */
372        struct uma_cache        uz_cpu[]; /* Per cpu caches */
373
374        /* uz_domain follows here. */
375};
376
377/*
378 * These flags must not overlap with the UMA_ZONE flags specified in uma.h.
379 */
380#define UMA_ZFLAG_MULTI         0x04000000      /* Multiple kegs in the zone. */
381#define UMA_ZFLAG_DRAINING      0x08000000      /* Running zone_drain. */
382#define UMA_ZFLAG_BUCKET        0x10000000      /* Bucket zone. */
383#define UMA_ZFLAG_INTERNAL      0x20000000      /* No offpage no PCPU. */
384#define UMA_ZFLAG_FULL          0x40000000      /* Reached uz_maxpages */
385#define UMA_ZFLAG_CACHEONLY     0x80000000      /* Don't ask VM for buckets. */
386
387#define UMA_ZFLAG_INHERIT                                               \
388    (UMA_ZFLAG_INTERNAL | UMA_ZFLAG_CACHEONLY | UMA_ZFLAG_BUCKET)
389
390static inline uma_keg_t
391zone_first_keg(uma_zone_t zone)
392{
393        uma_klink_t klink;
394
395        klink = LIST_FIRST(&zone->uz_kegs);
396        return (klink != NULL) ? klink->kl_keg : NULL;
397}
398
399#undef UMA_ALIGN
400
401#ifdef _KERNEL
402/* Internal prototypes */
403static __inline uma_slab_t hash_sfind(struct uma_hash *hash, uint8_t *data);
404void *uma_large_malloc(vm_size_t size, int wait);
405void *uma_large_malloc_domain(vm_size_t size, int domain, int wait);
406void uma_large_free(uma_slab_t slab);
407
408/* Lock Macros */
409
410#define KEG_LOCK_INIT(k, lc)                                    \
411        do {                                                    \
412                if ((lc))                                       \
413                        mtx_init(&(k)->uk_lock, (k)->uk_name,   \
414                            (k)->uk_name, MTX_DEF | MTX_DUPOK); \
415                else                                            \
416                        mtx_init(&(k)->uk_lock, (k)->uk_name,   \
417                            "UMA zone", MTX_DEF | MTX_DUPOK);   \
418        } while (0)
419
420#define KEG_LOCK_FINI(k)        mtx_destroy(&(k)->uk_lock)
421#define KEG_LOCK(k)     mtx_lock(&(k)->uk_lock)
422#define KEG_UNLOCK(k)   mtx_unlock(&(k)->uk_lock)
423
424#define ZONE_LOCK_INIT(z, lc)                                   \
425        do {                                                    \
426                if ((lc))                                       \
427                        mtx_init(&(z)->uz_lock, (z)->uz_name,   \
428                            (z)->uz_name, MTX_DEF | MTX_DUPOK); \
429                else                                            \
430                        mtx_init(&(z)->uz_lock, (z)->uz_name,   \
431                            "UMA zone", MTX_DEF | MTX_DUPOK);   \
432        } while (0)
433           
434#define ZONE_LOCK(z)    mtx_lock((z)->uz_lockptr)
435#define ZONE_TRYLOCK(z) mtx_trylock((z)->uz_lockptr)
436#define ZONE_UNLOCK(z)  mtx_unlock((z)->uz_lockptr)
437#define ZONE_LOCK_FINI(z)       mtx_destroy(&(z)->uz_lock)
438
439/*
440 * Find a slab within a hash table.  This is used for OFFPAGE zones to lookup
441 * the slab structure.
442 *
443 * Arguments:
444 *      hash  The hash table to search.
445 *      data  The base page of the item.
446 *
447 * Returns:
448 *      A pointer to a slab if successful, else NULL.
449 */
450static __inline uma_slab_t
451hash_sfind(struct uma_hash *hash, uint8_t *data)
452{
453        uma_slab_t slab;
454        int hval;
455
456        hval = UMA_HASH(hash, data);
457
458        SLIST_FOREACH(slab, &hash->uh_slab_hash[hval], us_hlink) {
459                if ((uint8_t *)slab->us_data == data)
460                        return (slab);
461        }
462        return (NULL);
463}
464
465#ifdef __rtems__
466#include <machine/rtems-bsd-page.h>
467#endif /* __rtems__ */
468static __inline uma_slab_t
469vtoslab(vm_offset_t va)
470{
471#ifndef __rtems__
472        vm_page_t p;
473
474        p = PHYS_TO_VM_PAGE(pmap_kextract(va));
475        return ((uma_slab_t)p->plinks.s.pv);
476#else /* __rtems__ */
477        return (rtems_bsd_page_get_object((void *)va));
478#endif /* __rtems__ */
479}
480
481static __inline void
482vsetslab(vm_offset_t va, uma_slab_t slab)
483{
484#ifndef __rtems__
485        vm_page_t p;
486
487        p = PHYS_TO_VM_PAGE(pmap_kextract(va));
488        p->plinks.s.pv = slab;
489#else /* __rtems__ */
490        rtems_bsd_page_set_object((void *)va, slab);
491#endif /* __rtems__ */
492}
493
494/*
495 * The following two functions may be defined by architecture specific code
496 * if they can provide more efficient allocation functions.  This is useful
497 * for using direct mapped addresses.
498 */
499void *uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain,
500    uint8_t *pflag, int wait);
501void uma_small_free(void *mem, vm_size_t size, uint8_t flags);
502
503/* Set a global soft limit on UMA managed memory. */
504void uma_set_limit(unsigned long limit);
505#endif /* _KERNEL */
506
507#endif /* VM_UMA_INT_H */
Note: See TracBrowser for help on using the repository browser.