source: rtems-libbsd/freebsd/lib/libc/db/btree/bt_split.c @ e599318

4.1155-freebsd-126-freebsd-12freebsd-9.3
Last change on this file since e599318 was e599318, checked in by Sebastian Huber <sebastian.huber@…>, on 10/09/13 at 20:52:54

Update files to match FreeBSD layout

Add compatibility with Newlib header files. Some FreeBSD header files
are mapped by the translation script:

o rtems/bsd/sys/_types.h
o rtems/bsd/sys/errno.h
o rtems/bsd/sys/lock.h
o rtems/bsd/sys/param.h
o rtems/bsd/sys/resource.h
o rtems/bsd/sys/time.h
o rtems/bsd/sys/timespec.h
o rtems/bsd/sys/types.h
o rtems/bsd/sys/unistd.h

It is now possible to include <sys/socket.h> directly for example.

Generate one Makefile which builds everything including tests.

  • Property mode set to 100644
File size: 21.4 KB
Line 
1#include "port_before.h"
2
3/*-
4 * Copyright (c) 1990, 1993, 1994
5 *      The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Mike Olson.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#if defined(LIBC_SCCS) && !defined(lint)
36static char sccsid[] = "@(#)bt_split.c  8.10 (Berkeley) 1/9/95";
37#endif /* LIBC_SCCS and not lint */
38#include <sys/cdefs.h>
39__FBSDID("$FreeBSD$");
40
41#include <rtems/bsd/sys/types.h>
42
43#include <limits.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47
48#include <db.h>
49#include "btree.h"
50
51static int       bt_broot(BTREE *, PAGE *, PAGE *, PAGE *);
52static PAGE     *bt_page(BTREE *, PAGE *, PAGE **, PAGE **, indx_t *, size_t);
53static int       bt_preserve(BTREE *, pgno_t);
54static PAGE     *bt_psplit(BTREE *, PAGE *, PAGE *, PAGE *, indx_t *, size_t);
55static PAGE     *bt_root(BTREE *, PAGE *, PAGE **, PAGE **, indx_t *, size_t);
56static int       bt_rroot(BTREE *, PAGE *, PAGE *, PAGE *);
57static recno_t   rec_total(PAGE *);
58
59#ifdef STATISTICS
60u_long  bt_rootsplit, bt_split, bt_sortsplit, bt_pfxsaved;
61#endif
62
63/*
64 * __BT_SPLIT -- Split the tree.
65 *
66 * Parameters:
67 *      t:      tree
68 *      sp:     page to split
69 *      key:    key to insert
70 *      data:   data to insert
71 *      flags:  BIGKEY/BIGDATA flags
72 *      ilen:   insert length
73 *      skip:   index to leave open
74 *
75 * Returns:
76 *      RET_ERROR, RET_SUCCESS
77 */
78int
79__bt_split(BTREE *t, PAGE *sp, const DBT *key, const DBT *data, int flags,
80    size_t ilen, u_int32_t argskip)
81{
82        BINTERNAL *bi;
83        BLEAF *bl, *tbl;
84        DBT a, b;
85        EPGNO *parent;
86        PAGE *h, *l, *r, *lchild, *rchild;
87        indx_t nxtindex;
88        u_int16_t skip;
89        u_int32_t n, nbytes, nksize;
90        int parentsplit;
91        char *dest;
92
93        /*
94         * Split the page into two pages, l and r.  The split routines return
95         * a pointer to the page into which the key should be inserted and with
96         * skip set to the offset which should be used.  Additionally, l and r
97         * are pinned.
98         */
99        skip = argskip;
100        h = sp->pgno == P_ROOT ?
101            bt_root(t, sp, &l, &r, &skip, ilen) :
102            bt_page(t, sp, &l, &r, &skip, ilen);
103        if (h == NULL)
104                return (RET_ERROR);
105
106        /*
107         * Insert the new key/data pair into the leaf page.  (Key inserts
108         * always cause a leaf page to split first.)
109         */
110        h->linp[skip] = h->upper -= ilen;
111        dest = (char *)h + h->upper;
112        if (F_ISSET(t, R_RECNO))
113                WR_RLEAF(dest, data, flags)
114        else
115                WR_BLEAF(dest, key, data, flags)
116
117        /* If the root page was split, make it look right. */
118        if (sp->pgno == P_ROOT &&
119            (F_ISSET(t, R_RECNO) ?
120            bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR)
121                goto err2;
122
123        /*
124         * Now we walk the parent page stack -- a LIFO stack of the pages that
125         * were traversed when we searched for the page that split.  Each stack
126         * entry is a page number and a page index offset.  The offset is for
127         * the page traversed on the search.  We've just split a page, so we
128         * have to insert a new key into the parent page.
129         *
130         * If the insert into the parent page causes it to split, may have to
131         * continue splitting all the way up the tree.  We stop if the root
132         * splits or the page inserted into didn't have to split to hold the
133         * new key.  Some algorithms replace the key for the old page as well
134         * as the new page.  We don't, as there's no reason to believe that the
135         * first key on the old page is any better than the key we have, and,
136         * in the case of a key being placed at index 0 causing the split, the
137         * key is unavailable.
138         *
139         * There are a maximum of 5 pages pinned at any time.  We keep the left
140         * and right pages pinned while working on the parent.   The 5 are the
141         * two children, left parent and right parent (when the parent splits)
142         * and the root page or the overflow key page when calling bt_preserve.
143         * This code must make sure that all pins are released other than the
144         * root page or overflow page which is unlocked elsewhere.
145         */
146        while ((parent = BT_POP(t)) != NULL) {
147                lchild = l;
148                rchild = r;
149
150                /* Get the parent page. */
151                if ((h = mpool_get(t->bt_mp, parent->pgno, 0)) == NULL)
152                        goto err2;
153
154                /*
155                 * The new key goes ONE AFTER the index, because the split
156                 * was to the right.
157                 */
158                skip = parent->index + 1;
159
160                /*
161                 * Calculate the space needed on the parent page.
162                 *
163                 * Prefix trees: space hack when inserting into BINTERNAL
164                 * pages.  Retain only what's needed to distinguish between
165                 * the new entry and the LAST entry on the page to its left.
166                 * If the keys compare equal, retain the entire key.  Note,
167                 * we don't touch overflow keys, and the entire key must be
168                 * retained for the next-to-left most key on the leftmost
169                 * page of each level, or the search will fail.  Applicable
170                 * ONLY to internal pages that have leaf pages as children.
171                 * Further reduction of the key between pairs of internal
172                 * pages loses too much information.
173                 */
174                switch (rchild->flags & P_TYPE) {
175                case P_BINTERNAL:
176                        bi = GETBINTERNAL(rchild, 0);
177                        nbytes = NBINTERNAL(bi->ksize);
178                        break;
179                case P_BLEAF:
180                        bl = GETBLEAF(rchild, 0);
181                        nbytes = NBINTERNAL(bl->ksize);
182                        if (t->bt_pfx && !(bl->flags & P_BIGKEY) &&
183                            (h->prevpg != P_INVALID || skip > 1)) {
184                                tbl = GETBLEAF(lchild, NEXTINDEX(lchild) - 1);
185                                a.size = tbl->ksize;
186                                a.data = tbl->bytes;
187                                b.size = bl->ksize;
188                                b.data = bl->bytes;
189                                nksize = t->bt_pfx(&a, &b);
190                                n = NBINTERNAL(nksize);
191                                if (n < nbytes) {
192#ifdef STATISTICS
193                                        bt_pfxsaved += nbytes - n;
194#endif
195                                        nbytes = n;
196                                } else
197                                        nksize = 0;
198                        } else
199                                nksize = 0;
200                        break;
201                case P_RINTERNAL:
202                case P_RLEAF:
203                        nbytes = NRINTERNAL;
204                        break;
205                default:
206                        abort();
207                }
208
209                /* Split the parent page if necessary or shift the indices. */
210                if ((u_int32_t)(h->upper - h->lower) < nbytes + sizeof(indx_t)) {
211                        sp = h;
212                        h = h->pgno == P_ROOT ?
213                            bt_root(t, h, &l, &r, &skip, nbytes) :
214                            bt_page(t, h, &l, &r, &skip, nbytes);
215                        if (h == NULL)
216                                goto err1;
217                        parentsplit = 1;
218                } else {
219                        if (skip < (nxtindex = NEXTINDEX(h)))
220                                memmove(h->linp + skip + 1, h->linp + skip,
221                                    (nxtindex - skip) * sizeof(indx_t));
222                        h->lower += sizeof(indx_t);
223                        parentsplit = 0;
224                }
225
226                /* Insert the key into the parent page. */
227                switch (rchild->flags & P_TYPE) {
228                case P_BINTERNAL:
229                        h->linp[skip] = h->upper -= nbytes;
230                        dest = (char *)h + h->linp[skip];
231                        memmove(dest, bi, nbytes);
232                        ((BINTERNAL *)dest)->pgno = rchild->pgno;
233                        break;
234                case P_BLEAF:
235                        h->linp[skip] = h->upper -= nbytes;
236                        dest = (char *)h + h->linp[skip];
237                        WR_BINTERNAL(dest, nksize ? nksize : bl->ksize,
238                            rchild->pgno, bl->flags & P_BIGKEY);
239                        memmove(dest, bl->bytes, nksize ? nksize : bl->ksize);
240                        if (bl->flags & P_BIGKEY &&
241                            bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR)
242                                goto err1;
243                        break;
244                case P_RINTERNAL:
245                        /*
246                         * Update the left page count.  If split
247                         * added at index 0, fix the correct page.
248                         */
249                        if (skip > 0)
250                                dest = (char *)h + h->linp[skip - 1];
251                        else
252                                dest = (char *)l + l->linp[NEXTINDEX(l) - 1];
253                        ((RINTERNAL *)dest)->nrecs = rec_total(lchild);
254                        ((RINTERNAL *)dest)->pgno = lchild->pgno;
255
256                        /* Update the right page count. */
257                        h->linp[skip] = h->upper -= nbytes;
258                        dest = (char *)h + h->linp[skip];
259                        ((RINTERNAL *)dest)->nrecs = rec_total(rchild);
260                        ((RINTERNAL *)dest)->pgno = rchild->pgno;
261                        break;
262                case P_RLEAF:
263                        /*
264                         * Update the left page count.  If split
265                         * added at index 0, fix the correct page.
266                         */
267                        if (skip > 0)
268                                dest = (char *)h + h->linp[skip - 1];
269                        else
270                                dest = (char *)l + l->linp[NEXTINDEX(l) - 1];
271                        ((RINTERNAL *)dest)->nrecs = NEXTINDEX(lchild);
272                        ((RINTERNAL *)dest)->pgno = lchild->pgno;
273
274                        /* Update the right page count. */
275                        h->linp[skip] = h->upper -= nbytes;
276                        dest = (char *)h + h->linp[skip];
277                        ((RINTERNAL *)dest)->nrecs = NEXTINDEX(rchild);
278                        ((RINTERNAL *)dest)->pgno = rchild->pgno;
279                        break;
280                default:
281                        abort();
282                }
283
284                /* Unpin the held pages. */
285                if (!parentsplit) {
286                        mpool_put(t->bt_mp, h, MPOOL_DIRTY);
287                        break;
288                }
289
290                /* If the root page was split, make it look right. */
291                if (sp->pgno == P_ROOT &&
292                    (F_ISSET(t, R_RECNO) ?
293                    bt_rroot(t, sp, l, r) : bt_broot(t, sp, l, r)) == RET_ERROR)
294                        goto err1;
295
296                mpool_put(t->bt_mp, lchild, MPOOL_DIRTY);
297                mpool_put(t->bt_mp, rchild, MPOOL_DIRTY);
298        }
299
300        /* Unpin the held pages. */
301        mpool_put(t->bt_mp, l, MPOOL_DIRTY);
302        mpool_put(t->bt_mp, r, MPOOL_DIRTY);
303
304        /* Clear any pages left on the stack. */
305        return (RET_SUCCESS);
306
307        /*
308         * If something fails in the above loop we were already walking back
309         * up the tree and the tree is now inconsistent.  Nothing much we can
310         * do about it but release any memory we're holding.
311         */
312err1:   mpool_put(t->bt_mp, lchild, MPOOL_DIRTY);
313        mpool_put(t->bt_mp, rchild, MPOOL_DIRTY);
314
315err2:   mpool_put(t->bt_mp, l, 0);
316        mpool_put(t->bt_mp, r, 0);
317        __dbpanic(t->bt_dbp);
318        return (RET_ERROR);
319}
320
321/*
322 * BT_PAGE -- Split a non-root page of a btree.
323 *
324 * Parameters:
325 *      t:      tree
326 *      h:      root page
327 *      lp:     pointer to left page pointer
328 *      rp:     pointer to right page pointer
329 *      skip:   pointer to index to leave open
330 *      ilen:   insert length
331 *
332 * Returns:
333 *      Pointer to page in which to insert or NULL on error.
334 */
335static PAGE *
336bt_page(BTREE *t, PAGE *h, PAGE **lp, PAGE **rp, indx_t *skip, size_t ilen)
337{
338        PAGE *l, *r, *tp;
339        pgno_t npg;
340
341#ifdef STATISTICS
342        ++bt_split;
343#endif
344        /* Put the new right page for the split into place. */
345        if ((r = __bt_new(t, &npg)) == NULL)
346                return (NULL);
347        r->pgno = npg;
348        r->lower = BTDATAOFF;
349        r->upper = t->bt_psize;
350        r->nextpg = h->nextpg;
351        r->prevpg = h->pgno;
352        r->flags = h->flags & P_TYPE;
353
354        /*
355         * If we're splitting the last page on a level because we're appending
356         * a key to it (skip is NEXTINDEX()), it's likely that the data is
357         * sorted.  Adding an empty page on the side of the level is less work
358         * and can push the fill factor much higher than normal.  If we're
359         * wrong it's no big deal, we'll just do the split the right way next
360         * time.  It may look like it's equally easy to do a similar hack for
361         * reverse sorted data, that is, split the tree left, but it's not.
362         * Don't even try.
363         */
364        if (h->nextpg == P_INVALID && *skip == NEXTINDEX(h)) {
365#ifdef STATISTICS
366                ++bt_sortsplit;
367#endif
368                h->nextpg = r->pgno;
369                r->lower = BTDATAOFF + sizeof(indx_t);
370                *skip = 0;
371                *lp = h;
372                *rp = r;
373                return (r);
374        }
375
376        /* Put the new left page for the split into place. */
377        if ((l = (PAGE *)calloc(1, t->bt_psize)) == NULL) {
378                mpool_put(t->bt_mp, r, 0);
379                return (NULL);
380        }
381        l->pgno = h->pgno;
382        l->nextpg = r->pgno;
383        l->prevpg = h->prevpg;
384        l->lower = BTDATAOFF;
385        l->upper = t->bt_psize;
386        l->flags = h->flags & P_TYPE;
387
388        /* Fix up the previous pointer of the page after the split page. */
389        if (h->nextpg != P_INVALID) {
390                if ((tp = mpool_get(t->bt_mp, h->nextpg, 0)) == NULL) {
391                        free(l);
392                        /* XXX mpool_free(t->bt_mp, r->pgno); */
393                        return (NULL);
394                }
395                tp->prevpg = r->pgno;
396                mpool_put(t->bt_mp, tp, MPOOL_DIRTY);
397        }
398
399        /*
400         * Split right.  The key/data pairs aren't sorted in the btree page so
401         * it's simpler to copy the data from the split page onto two new pages
402         * instead of copying half the data to the right page and compacting
403         * the left page in place.  Since the left page can't change, we have
404         * to swap the original and the allocated left page after the split.
405         */
406        tp = bt_psplit(t, h, l, r, skip, ilen);
407
408        /* Move the new left page onto the old left page. */
409        memmove(h, l, t->bt_psize);
410        if (tp == l)
411                tp = h;
412        free(l);
413
414        *lp = h;
415        *rp = r;
416        return (tp);
417}
418
419/*
420 * BT_ROOT -- Split the root page of a btree.
421 *
422 * Parameters:
423 *      t:      tree
424 *      h:      root page
425 *      lp:     pointer to left page pointer
426 *      rp:     pointer to right page pointer
427 *      skip:   pointer to index to leave open
428 *      ilen:   insert length
429 *
430 * Returns:
431 *      Pointer to page in which to insert or NULL on error.
432 */
433static PAGE *
434bt_root(BTREE *t, PAGE *h, PAGE **lp, PAGE **rp, indx_t *skip, size_t ilen)
435{
436        PAGE *l, *r, *tp;
437        pgno_t lnpg, rnpg;
438
439#ifdef STATISTICS
440        ++bt_split;
441        ++bt_rootsplit;
442#endif
443        /* Put the new left and right pages for the split into place. */
444        if ((l = __bt_new(t, &lnpg)) == NULL ||
445            (r = __bt_new(t, &rnpg)) == NULL)
446                return (NULL);
447        l->pgno = lnpg;
448        r->pgno = rnpg;
449        l->nextpg = r->pgno;
450        r->prevpg = l->pgno;
451        l->prevpg = r->nextpg = P_INVALID;
452        l->lower = r->lower = BTDATAOFF;
453        l->upper = r->upper = t->bt_psize;
454        l->flags = r->flags = h->flags & P_TYPE;
455
456        /* Split the root page. */
457        tp = bt_psplit(t, h, l, r, skip, ilen);
458
459        *lp = l;
460        *rp = r;
461        return (tp);
462}
463
464/*
465 * BT_RROOT -- Fix up the recno root page after it has been split.
466 *
467 * Parameters:
468 *      t:      tree
469 *      h:      root page
470 *      l:      left page
471 *      r:      right page
472 *
473 * Returns:
474 *      RET_ERROR, RET_SUCCESS
475 */
476static int
477bt_rroot(BTREE *t, PAGE *h, PAGE *l, PAGE *r)
478{
479        char *dest;
480
481        /* Insert the left and right keys, set the header information. */
482        h->linp[0] = h->upper = t->bt_psize - NRINTERNAL;
483        dest = (char *)h + h->upper;
484        WR_RINTERNAL(dest,
485            l->flags & P_RLEAF ? NEXTINDEX(l) : rec_total(l), l->pgno);
486
487        h->linp[1] = h->upper -= NRINTERNAL;
488        dest = (char *)h + h->upper;
489        WR_RINTERNAL(dest,
490            r->flags & P_RLEAF ? NEXTINDEX(r) : rec_total(r), r->pgno);
491
492        h->lower = BTDATAOFF + 2 * sizeof(indx_t);
493
494        /* Unpin the root page, set to recno internal page. */
495        h->flags &= ~P_TYPE;
496        h->flags |= P_RINTERNAL;
497        mpool_put(t->bt_mp, h, MPOOL_DIRTY);
498
499        return (RET_SUCCESS);
500}
501
502/*
503 * BT_BROOT -- Fix up the btree root page after it has been split.
504 *
505 * Parameters:
506 *      t:      tree
507 *      h:      root page
508 *      l:      left page
509 *      r:      right page
510 *
511 * Returns:
512 *      RET_ERROR, RET_SUCCESS
513 */
514static int
515bt_broot(BTREE *t, PAGE *h, PAGE *l, PAGE *r)
516{
517        BINTERNAL *bi;
518        BLEAF *bl;
519        u_int32_t nbytes;
520        char *dest;
521
522        /*
523         * If the root page was a leaf page, change it into an internal page.
524         * We copy the key we split on (but not the key's data, in the case of
525         * a leaf page) to the new root page.
526         *
527         * The btree comparison code guarantees that the left-most key on any
528         * level of the tree is never used, so it doesn't need to be filled in.
529         */
530        nbytes = NBINTERNAL(0);
531        h->linp[0] = h->upper = t->bt_psize - nbytes;
532        dest = (char *)h + h->upper;
533        WR_BINTERNAL(dest, 0, l->pgno, 0);
534
535        switch (h->flags & P_TYPE) {
536        case P_BLEAF:
537                bl = GETBLEAF(r, 0);
538                nbytes = NBINTERNAL(bl->ksize);
539                h->linp[1] = h->upper -= nbytes;
540                dest = (char *)h + h->upper;
541                WR_BINTERNAL(dest, bl->ksize, r->pgno, 0);
542                memmove(dest, bl->bytes, bl->ksize);
543
544                /*
545                 * If the key is on an overflow page, mark the overflow chain
546                 * so it isn't deleted when the leaf copy of the key is deleted.
547                 */
548                if (bl->flags & P_BIGKEY &&
549                    bt_preserve(t, *(pgno_t *)bl->bytes) == RET_ERROR)
550                        return (RET_ERROR);
551                break;
552        case P_BINTERNAL:
553                bi = GETBINTERNAL(r, 0);
554                nbytes = NBINTERNAL(bi->ksize);
555                h->linp[1] = h->upper -= nbytes;
556                dest = (char *)h + h->upper;
557                memmove(dest, bi, nbytes);
558                ((BINTERNAL *)dest)->pgno = r->pgno;
559                break;
560        default:
561                abort();
562        }
563
564        /* There are two keys on the page. */
565        h->lower = BTDATAOFF + 2 * sizeof(indx_t);
566
567        /* Unpin the root page, set to btree internal page. */
568        h->flags &= ~P_TYPE;
569        h->flags |= P_BINTERNAL;
570        mpool_put(t->bt_mp, h, MPOOL_DIRTY);
571
572        return (RET_SUCCESS);
573}
574
575/*
576 * BT_PSPLIT -- Do the real work of splitting the page.
577 *
578 * Parameters:
579 *      t:      tree
580 *      h:      page to be split
581 *      l:      page to put lower half of data
582 *      r:      page to put upper half of data
583 *      pskip:  pointer to index to leave open
584 *      ilen:   insert length
585 *
586 * Returns:
587 *      Pointer to page in which to insert.
588 */
589static PAGE *
590bt_psplit(BTREE *t, PAGE *h, PAGE *l, PAGE *r, indx_t *pskip, size_t ilen)
591{
592        BINTERNAL *bi;
593        BLEAF *bl;
594        CURSOR *c;
595        RLEAF *rl;
596        PAGE *rval;
597        void *src;
598        indx_t full, half, nxt, off, skip, top, used;
599        u_int32_t nbytes;
600        int bigkeycnt, isbigkey;
601
602        /*
603         * Split the data to the left and right pages.  Leave the skip index
604         * open.  Additionally, make some effort not to split on an overflow
605         * key.  This makes internal page processing faster and can save
606         * space as overflow keys used by internal pages are never deleted.
607         */
608        bigkeycnt = 0;
609        skip = *pskip;
610        full = t->bt_psize - BTDATAOFF;
611        half = full / 2;
612        used = 0;
613        for (nxt = off = 0, top = NEXTINDEX(h); nxt < top; ++off) {
614                if (skip == off) {
615                        nbytes = ilen;
616                        isbigkey = 0;           /* XXX: not really known. */
617                } else
618                        switch (h->flags & P_TYPE) {
619                        case P_BINTERNAL:
620                                src = bi = GETBINTERNAL(h, nxt);
621                                nbytes = NBINTERNAL(bi->ksize);
622                                isbigkey = bi->flags & P_BIGKEY;
623                                break;
624                        case P_BLEAF:
625                                src = bl = GETBLEAF(h, nxt);
626                                nbytes = NBLEAF(bl);
627                                isbigkey = bl->flags & P_BIGKEY;
628                                break;
629                        case P_RINTERNAL:
630                                src = GETRINTERNAL(h, nxt);
631                                nbytes = NRINTERNAL;
632                                isbigkey = 0;
633                                break;
634                        case P_RLEAF:
635                                src = rl = GETRLEAF(h, nxt);
636                                nbytes = NRLEAF(rl);
637                                isbigkey = 0;
638                                break;
639                        default:
640                                abort();
641                        }
642
643                /*
644                 * If the key/data pairs are substantial fractions of the max
645                 * possible size for the page, it's possible to get situations
646                 * where we decide to try and copy too much onto the left page.
647                 * Make sure that doesn't happen.
648                 */
649                if ((skip <= off && used + nbytes + sizeof(indx_t) >= full) ||
650                    nxt == top - 1) {
651                        --off;
652                        break;
653                }
654
655                /* Copy the key/data pair, if not the skipped index. */
656                if (skip != off) {
657                        ++nxt;
658
659                        l->linp[off] = l->upper -= nbytes;
660                        memmove((char *)l + l->upper, src, nbytes);
661                }
662
663                used += nbytes + sizeof(indx_t);
664                if (used >= half) {
665                        if (!isbigkey || bigkeycnt == 3)
666                                break;
667                        else
668                                ++bigkeycnt;
669                }
670        }
671
672        /*
673         * Off is the last offset that's valid for the left page.
674         * Nxt is the first offset to be placed on the right page.
675         */
676        l->lower += (off + 1) * sizeof(indx_t);
677
678        /*
679         * If splitting the page that the cursor was on, the cursor has to be
680         * adjusted to point to the same record as before the split.  If the
681         * cursor is at or past the skipped slot, the cursor is incremented by
682         * one.  If the cursor is on the right page, it is decremented by the
683         * number of records split to the left page.
684         */
685        c = &t->bt_cursor;
686        if (F_ISSET(c, CURS_INIT) && c->pg.pgno == h->pgno) {
687                if (c->pg.index >= skip)
688                        ++c->pg.index;
689                if (c->pg.index < nxt)                  /* Left page. */
690                        c->pg.pgno = l->pgno;
691                else {                                  /* Right page. */
692                        c->pg.pgno = r->pgno;
693                        c->pg.index -= nxt;
694                }
695        }
696
697        /*
698         * If the skipped index was on the left page, just return that page.
699         * Otherwise, adjust the skip index to reflect the new position on
700         * the right page.
701         */
702        if (skip <= off) {
703                skip = MAX_PAGE_OFFSET;
704                rval = l;
705        } else {
706                rval = r;
707                *pskip -= nxt;
708        }
709
710        for (off = 0; nxt < top; ++off) {
711                if (skip == nxt) {
712                        ++off;
713                        skip = MAX_PAGE_OFFSET;
714                }
715                switch (h->flags & P_TYPE) {
716                case P_BINTERNAL:
717                        src = bi = GETBINTERNAL(h, nxt);
718                        nbytes = NBINTERNAL(bi->ksize);
719                        break;
720                case P_BLEAF:
721                        src = bl = GETBLEAF(h, nxt);
722                        nbytes = NBLEAF(bl);
723                        break;
724                case P_RINTERNAL:
725                        src = GETRINTERNAL(h, nxt);
726                        nbytes = NRINTERNAL;
727                        break;
728                case P_RLEAF:
729                        src = rl = GETRLEAF(h, nxt);
730                        nbytes = NRLEAF(rl);
731                        break;
732                default:
733                        abort();
734                }
735                ++nxt;
736                r->linp[off] = r->upper -= nbytes;
737                memmove((char *)r + r->upper, src, nbytes);
738        }
739        r->lower += off * sizeof(indx_t);
740
741        /* If the key is being appended to the page, adjust the index. */
742        if (skip == top)
743                r->lower += sizeof(indx_t);
744
745        return (rval);
746}
747
748/*
749 * BT_PRESERVE -- Mark a chain of pages as used by an internal node.
750 *
751 * Chains of indirect blocks pointed to by leaf nodes get reclaimed when the
752 * record that references them gets deleted.  Chains pointed to by internal
753 * pages never get deleted.  This routine marks a chain as pointed to by an
754 * internal page.
755 *
756 * Parameters:
757 *      t:      tree
758 *      pg:     page number of first page in the chain.
759 *
760 * Returns:
761 *      RET_SUCCESS, RET_ERROR.
762 */
763static int
764bt_preserve(BTREE *t, pgno_t pg)
765{
766        PAGE *h;
767
768        if ((h = mpool_get(t->bt_mp, pg, 0)) == NULL)
769                return (RET_ERROR);
770        h->flags |= P_PRESERVE;
771        mpool_put(t->bt_mp, h, MPOOL_DIRTY);
772        return (RET_SUCCESS);
773}
774
775/*
776 * REC_TOTAL -- Return the number of recno entries below a page.
777 *
778 * Parameters:
779 *      h:      page
780 *
781 * Returns:
782 *      The number of recno entries below a page.
783 *
784 * XXX
785 * These values could be set by the bt_psplit routine.  The problem is that the
786 * entry has to be popped off of the stack etc. or the values have to be passed
787 * all the way back to bt_split/bt_rroot and it's not very clean.
788 */
789static recno_t
790rec_total(PAGE *h)
791{
792        recno_t recs;
793        indx_t nxt, top;
794
795        for (recs = 0, nxt = 0, top = NEXTINDEX(h); nxt < top; ++nxt)
796                recs += GETRINTERNAL(h, nxt)->nrecs;
797        return (recs);
798}
Note: See TracBrowser for help on using the repository browser.