=== Applying patches on top of PostgreSQL commit ID 98b1efd6ef6a1612019ef9ed6e44b79c132352ce === /etc/rc.d/jail: WARNING: Per-jail configuration via jail_* variables is obsolete. Please consider migrating to /etc/jail.conf. Tue Dec 31 21:14:27 UTC 2024 On branch cf/4966 nothing to commit, working tree clean === applying patch ./v20241008-0001-Allow-parallel-create-for-GIN-indexes.patch Applied patch to 'src/backend/access/gin/gininsert.c' with conflicts. Applied patch to 'src/backend/access/gin/ginutil.c' cleanly. Applied patch to 'src/backend/access/transam/parallel.c' cleanly. Applied patch to 'src/backend/utils/sort/tuplesortvariants.c' cleanly. Applied patch to 'src/include/access/gin.h' cleanly. Falling back to direct application... Applied patch to 'src/include/utils/tuplesort.h' cleanly. Applied patch to 'src/tools/pgindent/typedefs.list' cleanly. U src/backend/access/gin/gininsert.c diff --cc src/backend/access/gin/gininsert.c index 31ee565041,f3b51878d5..0000000000 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@@ -376,24 -633,93 +633,98 @@@ ginbuild(Relation heap, Relation index ginInitBA(&buildstate.accum); /* - * Do the heap scan. We disallow sync scan here because dataPlaceToPage - * prefers to receive tuples in TID order. + * Attempt to launch parallel worker scan when required + * + * XXX plan_create_index_workers makes the number of workers dependent on + * maintenance_work_mem, requiring 32MB for each worker. For GIN that's + * reasonable too, because we sort the data just like btree. It does + * ignore the memory used to accumulate data in memory (set by work_mem), + * but there is no way to communicate that to plan_create_index_workers. + */ + if (indexInfo->ii_ParallelWorkers > 0) + _gin_begin_parallel(state, heap, index, indexInfo->ii_Concurrent, + indexInfo->ii_ParallelWorkers); + + + /* + * If parallel build requested and at least one worker process was + * successfully launched, set up coordination state, wait for workers to + * complete. Then read all tuples from the shared tuplesort and insert + * them into the index. + * + * In serial mode, simply scan the table and build the index one index + * tuple at a time. */ ++<<<<<<< ours + reltuples = table_index_build_scan(heap, index, indexInfo, false, true, + ginBuildCallback, &buildstate, NULL); ++======= + if (state->bs_leader) + { + SortCoordinate coordinate; + + coordinate = (SortCoordinate) palloc0(sizeof(SortCoordinateData)); + coordinate->isWorker = false; + coordinate->nParticipants = + state->bs_leader->nparticipanttuplesorts; + coordinate->sharedsort = state->bs_leader->sharedsort; + + /* + * Begin leader tuplesort. + * + * In cases where parallelism is involved, the leader receives the + * same share of maintenance_work_mem as a serial sort (it is + * generally treated in the same way as a serial sort once we return). + * Parallel worker Tuplesortstates will have received only a fraction + * of maintenance_work_mem, though. + * + * We rely on the lifetime of the Leader Tuplesortstate almost not + * overlapping with any worker Tuplesortstate's lifetime. There may + * be some small overlap, but that's okay because we rely on leader + * Tuplesortstate only allocating a small, fixed amount of memory + * here. When its tuplesort_performsort() is called (by our caller), + * and significant amounts of memory are likely to be used, all + * workers must have already freed almost all memory held by their + * Tuplesortstates (they are about to go away completely, too). The + * overall effect is that maintenance_work_mem always represents an + * absolute high watermark on the amount of memory used by a CREATE + * INDEX operation, regardless of the use of parallelism or any other + * factor. + */ + state->bs_sortstate = + tuplesort_begin_index_gin(heap, index, + maintenance_work_mem, coordinate, + TUPLESORT_NONE); + + /* scan the relation in parallel and merge per-worker results */ + reltuples = _gin_parallel_merge(state); ++>>>>>>> theirs - /* dump remaining entries to the index */ - oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx); - ginBeginBAScan(&buildstate.accum); - while ((list = ginGetBAEntry(&buildstate.accum, - &attnum, &key, &category, &nlist)) != NULL) + _gin_end_parallel(state->bs_leader, state); + } + else /* no parallel index build */ { - /* there could be many entries, so be willing to abort here */ - CHECK_FOR_INTERRUPTS(); - ginEntryInsert(&buildstate.ginstate, attnum, key, category, - list, nlist, &buildstate.buildStats); + /* + * Do the heap scan. We disallow sync scan here because + * dataPlaceToPage prefers to receive tuples in TID order. + */ + reltuples = table_index_build_scan(heap, index, indexInfo, false, true, + ginBuildCallback, (void *) &buildstate, + NULL); + + /* dump remaining entries to the index */ + oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx); + ginBeginBAScan(&buildstate.accum); + while ((list = ginGetBAEntry(&buildstate.accum, + &attnum, &key, &category, &nlist)) != NULL) + { + /* there could be many entries, so be willing to abort here */ + CHECK_FOR_INTERRUPTS(); + ginEntryInsert(&buildstate.ginstate, attnum, key, category, + list, nlist, &buildstate.buildStats); + } + MemoryContextSwitchTo(oldCtx); } - MemoryContextSwitchTo(oldCtx); MemoryContextDelete(buildstate.funcCtx); MemoryContextDelete(buildstate.tmpCtx);