@@ -70,6 +70,9 @@ static void _bt_recsplitloc(FindSplitData *state,
7070static void _bt_deltasortsplits (FindSplitData * state , double fillfactormult ,
7171 bool usemult );
7272static int _bt_splitcmp (const void * arg1 , const void * arg2 );
73+ static bool _bt_afternewitemoff (FindSplitData * state , OffsetNumber maxoff ,
74+ int leaffillfactor , bool * usemult );
75+ static bool _bt_adjacenthtid (ItemPointer lowhtid , ItemPointer highhtid );
7376static OffsetNumber _bt_bestsplitloc (FindSplitData * state , int perfectpenalty ,
7477 bool * newitemonleft );
7578static int _bt_strategy (FindSplitData * state , SplitPoint * leftpage ,
@@ -249,9 +252,10 @@ _bt_findsplitloc(Relation rel,
249252 * Start search for a split point among list of legal split points. Give
250253 * primary consideration to equalizing available free space in each half
251254 * of the split initially (start with default strategy), while applying
252- * rightmost optimization where appropriate. Either of the two other
253- * fallback strategies may be required for cases with a large number of
254- * duplicates around the original/space-optimal split point.
255+ * rightmost and split-after-new-item optimizations where appropriate.
256+ * Either of the two other fallback strategies may be required for cases
257+ * with a large number of duplicates around the original/space-optimal
258+ * split point.
255259 *
256260 * Default strategy gives some weight to suffix truncation in deciding a
257261 * split point on leaf pages. It attempts to select a split point where a
@@ -273,6 +277,44 @@ _bt_findsplitloc(Relation rel,
273277 usemult = true;
274278 fillfactormult = leaffillfactor / 100.0 ;
275279 }
280+ else if (_bt_afternewitemoff (& state , maxoff , leaffillfactor , & usemult ))
281+ {
282+ /*
283+ * New item inserted at rightmost point among a localized grouping on
284+ * a leaf page -- apply "split after new item" optimization, either by
285+ * applying leaf fillfactor multiplier, or by choosing the exact split
286+ * point that leaves the new item as last on the left. (usemult is set
287+ * for us.)
288+ */
289+ if (usemult )
290+ {
291+ /* fillfactormult should be set based on leaf fillfactor */
292+ fillfactormult = leaffillfactor / 100.0 ;
293+ }
294+ else
295+ {
296+ /* find precise split point after newitemoff */
297+ for (int i = 0 ; i < state .nsplits ; i ++ )
298+ {
299+ SplitPoint * split = state .splits + i ;
300+
301+ if (split -> newitemonleft &&
302+ newitemoff == split -> firstoldonright )
303+ {
304+ pfree (state .splits );
305+ * newitemonleft = true;
306+ return newitemoff ;
307+ }
308+ }
309+
310+ /*
311+ * Cannot legally split after newitemoff; proceed with split
312+ * without using fillfactor multiplier. This is defensive, and
313+ * should never be needed in practice.
314+ */
315+ fillfactormult = 0.50 ;
316+ }
317+ }
276318 else
277319 {
278320 /* Other leaf page. 50:50 page split. */
@@ -519,6 +561,172 @@ _bt_splitcmp(const void *arg1, const void *arg2)
519561 return 0 ;
520562}
521563
564+ /*
565+ * Subroutine to determine whether or not a non-rightmost leaf page should be
566+ * split immediately after the would-be original page offset for the
567+ * new/incoming tuple (or should have leaf fillfactor applied when new item is
568+ * to the right on original page). This is appropriate when there is a
569+ * pattern of localized monotonically increasing insertions into a composite
570+ * index, where leading attribute values form local groupings, and we
571+ * anticipate further insertions of the same/current grouping (new item's
572+ * grouping) in the near future. This can be thought of as a variation on
573+ * applying leaf fillfactor during rightmost leaf page splits, since cases
574+ * that benefit will converge on packing leaf pages leaffillfactor% full over
575+ * time.
576+ *
577+ * We may leave extra free space remaining on the rightmost page of a "most
578+ * significant column" grouping of tuples if that grouping never ends up
579+ * having future insertions that use the free space. That effect is
580+ * self-limiting; a future grouping that becomes the "nearest on the right"
581+ * grouping of the affected grouping usually puts the extra free space to good
582+ * use.
583+ *
584+ * Caller uses optimization when routine returns true, though the exact action
585+ * taken by caller varies. Caller uses original leaf page fillfactor in
586+ * standard way rather than using the new item offset directly when *usemult
587+ * was also set to true here. Otherwise, caller applies optimization by
588+ * locating the legal split point that makes the new tuple the very last tuple
589+ * on the left side of the split.
590+ */
591+ static bool
592+ _bt_afternewitemoff (FindSplitData * state , OffsetNumber maxoff ,
593+ int leaffillfactor , bool * usemult )
594+ {
595+ int16 nkeyatts ;
596+ ItemId itemid ;
597+ IndexTuple tup ;
598+ int keepnatts ;
599+
600+ Assert (state -> is_leaf && !state -> is_rightmost );
601+
602+ nkeyatts = IndexRelationGetNumberOfKeyAttributes (state -> rel );
603+
604+ /* Single key indexes not considered here */
605+ if (nkeyatts == 1 )
606+ return false;
607+
608+ /* Ascending insertion pattern never inferred when new item is first */
609+ if (state -> newitemoff == P_FIRSTKEY )
610+ return false;
611+
612+ /*
613+ * Only apply optimization on pages with equisized tuples, since ordinal
614+ * keys are likely to be fixed-width. Testing if the new tuple is
615+ * variable width directly might also work, but that fails to apply the
616+ * optimization to indexes with a numeric_ops attribute.
617+ *
618+ * Conclude that page has equisized tuples when the new item is the same
619+ * width as the smallest item observed during pass over page, and other
620+ * non-pivot tuples must be the same width as well. (Note that the
621+ * possibly-truncated existing high key isn't counted in
622+ * olddataitemstotal, and must be subtracted from maxoff.)
623+ */
624+ if (state -> newitemsz != state -> minfirstrightsz )
625+ return false;
626+ if (state -> newitemsz * (maxoff - 1 ) != state -> olddataitemstotal )
627+ return false;
628+
629+ /*
630+ * Avoid applying optimization when tuples are wider than a tuple
631+ * consisting of two non-NULL int8/int64 attributes (or four non-NULL
632+ * int4/int32 attributes)
633+ */
634+ if (state -> newitemsz >
635+ MAXALIGN (sizeof (IndexTupleData ) + sizeof (int64 ) * 2 ) +
636+ sizeof (ItemIdData ))
637+ return false;
638+
639+ /*
640+ * At least the first attribute's value must be equal to the corresponding
641+ * value in previous tuple to apply optimization. New item cannot be a
642+ * duplicate, either.
643+ *
644+ * Handle case where new item is to the right of all items on the existing
645+ * page. This is suggestive of monotonically increasing insertions in
646+ * itself, so the "heap TID adjacency" test is not applied here.
647+ */
648+ if (state -> newitemoff > maxoff )
649+ {
650+ itemid = PageGetItemId (state -> page , maxoff );
651+ tup = (IndexTuple ) PageGetItem (state -> page , itemid );
652+ keepnatts = _bt_keep_natts_fast (state -> rel , tup , state -> newitem );
653+
654+ if (keepnatts > 1 && keepnatts <= nkeyatts )
655+ {
656+ * usemult = true;
657+ return true;
658+ }
659+
660+ return false;
661+ }
662+
663+ /*
664+ * "Low cardinality leading column, high cardinality suffix column"
665+ * indexes with a random insertion pattern (e.g., an index with a boolean
666+ * column, such as an index on '(book_is_in_print, book_isbn)') present us
667+ * with a risk of consistently misapplying the optimization. We're
668+ * willing to accept very occasional misapplication of the optimization,
669+ * provided the cases where we get it wrong are rare and self-limiting.
670+ *
671+ * Heap TID adjacency strongly suggests that the item just to the left was
672+ * inserted very recently, which limits overapplication of the
673+ * optimization. Besides, all inappropriate cases triggered here will
674+ * still split in the middle of the page on average.
675+ */
676+ itemid = PageGetItemId (state -> page , OffsetNumberPrev (state -> newitemoff ));
677+ tup = (IndexTuple ) PageGetItem (state -> page , itemid );
678+ /* Do cheaper test first */
679+ if (!_bt_adjacenthtid (& tup -> t_tid , & state -> newitem -> t_tid ))
680+ return false;
681+ /* Check same conditions as rightmost item case, too */
682+ keepnatts = _bt_keep_natts_fast (state -> rel , tup , state -> newitem );
683+
684+ if (keepnatts > 1 && keepnatts <= nkeyatts )
685+ {
686+ double interp = (double ) state -> newitemoff / ((double ) maxoff + 1 );
687+ double leaffillfactormult = (double ) leaffillfactor / 100.0 ;
688+
689+ /*
690+ * Don't allow caller to split after a new item when it will result in
691+ * a split point to the right of the point that a leaf fillfactor
692+ * split would use -- have caller apply leaf fillfactor instead
693+ */
694+ * usemult = interp > leaffillfactormult ;
695+
696+ return true;
697+ }
698+
699+ return false;
700+ }
701+
702+ /*
703+ * Subroutine for determining if two heap TIDS are "adjacent".
704+ *
705+ * Adjacent means that the high TID is very likely to have been inserted into
706+ * heap relation immediately after the low TID, probably during the current
707+ * transaction.
708+ */
709+ static bool
710+ _bt_adjacenthtid (ItemPointer lowhtid , ItemPointer highhtid )
711+ {
712+ BlockNumber lowblk ,
713+ highblk ;
714+
715+ lowblk = ItemPointerGetBlockNumber (lowhtid );
716+ highblk = ItemPointerGetBlockNumber (highhtid );
717+
718+ /* Make optimistic assumption of adjacency when heap blocks match */
719+ if (lowblk == highblk )
720+ return true;
721+
722+ /* When heap block one up, second offset should be FirstOffsetNumber */
723+ if (lowblk + 1 == highblk &&
724+ ItemPointerGetOffsetNumber (highhtid ) == FirstOffsetNumber )
725+ return true;
726+
727+ return false;
728+ }
729+
522730/*
523731 * Subroutine to find the "best" split point among candidate split points.
524732 * The best split point is the split point with the lowest penalty among split
0 commit comments