Skip to content

Commit db8a674

Browse files
author
Heikki Linnakangas
committed
Refactor SLRU download interface between Postgres and the neon extension
Move the responsibility of writing the SLRU segment contents to the neon extension. The neon extension hook now downloads the file and places it to the correct path, and returns a true/false to the caller to indicate if the file existed. On successful download, the caller retries opening the file. While we're at it, refactor the read_slru_segment function so that it is not part of the SMGR api anymore, but an independent hook. We were previously pretending it's part of the SMGR api and because of that, passed a dummy SMgrRelation, but it was a bit silly.
1 parent 2aaab3b commit db8a674

File tree

5 files changed

+80
-113
lines changed

5 files changed

+80
-113
lines changed

src/backend/access/transam/slru.c

Lines changed: 49 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -620,70 +620,28 @@ SimpleLruWritePage(SlruCtl ctl, int slotno)
620620

621621

622622
/*
623-
* NEON: we do not want to include large pg_xact/multixact files in basebackup and prefer
624-
* to download them on demand to reduce startup time.
625-
* If SLRU segment is not found, we try to download it from page server
623+
* NEON: we do not want to include large pg_xact/multixact files in the
624+
* basebackup and prefer to download them on demand to reduce startup time.
625+
*
626+
* If SLRU segment is not found, we try to download it from the pageserver.
627+
*
628+
* Returns:
629+
* true if the file was successfully downloaded
630+
* false if the file was not found in pageserver
631+
* ereports if some other error happened
626632
*/
627-
static int
628-
SimpleLruDownloadSegment(SlruCtl ctl, int pageno, char const* path)
633+
static bool
634+
SimpleLruDownloadSegment(SlruCtl ctl, int pageno, char const *path)
629635
{
630-
int segno;
631-
int fd = -1;
632-
int n_blocks;
633-
char* buffer;
634-
635-
static SMgrRelationData dummy_smgr_rel = {0};
636+
int segno;
636637

637638
/* If page is greater than latest written page, then do not try to download segment from server */
638639
if (ctl->PagePrecedes(ctl->shared->latest_page_number, pageno))
639-
return -1;
640+
return false;
640641

641-
if (!dummy_smgr_rel.smgr)
642-
{
643-
RelFileNode rnode = {0};
644-
dummy_smgr_rel.smgr = smgr(InvalidBackendId, rnode);
645-
}
646642
segno = pageno / SLRU_PAGES_PER_SEGMENT;
647643

648-
if (neon_use_communicator_worker) {
649-
buffer = NULL;
650-
} else {
651-
buffer = palloc(BLCKSZ * SLRU_PAGES_PER_SEGMENT);
652-
}
653-
654-
n_blocks = smgr_read_slru_segment(&dummy_smgr_rel, path, segno, buffer);
655-
if (n_blocks > 0)
656-
{
657-
fd = OpenTransientFile(path, O_RDWR | O_CREAT | PG_BINARY);
658-
if (fd < 0)
659-
{
660-
slru_errcause = SLRU_OPEN_FAILED;
661-
slru_errno = errno;
662-
pfree(buffer);
663-
return -1;
664-
}
665-
666-
if (!neon_use_communicator_worker) {
667-
errno = 0;
668-
pgstat_report_wait_start(WAIT_EVENT_SLRU_WRITE);
669-
if (pg_pwrite(fd, buffer, n_blocks*BLCKSZ, 0) != n_blocks*BLCKSZ)
670-
{
671-
pgstat_report_wait_end();
672-
/* if write didn't set errno, assume problem is no disk space */
673-
if (errno == 0)
674-
errno = ENOSPC;
675-
slru_errcause = SLRU_WRITE_FAILED;
676-
slru_errno = errno;
677-
678-
CloseTransientFile(fd);
679-
pfree(buffer);
680-
return -1;
681-
}
682-
pgstat_report_wait_end();
683-
}
684-
}
685-
pfree(buffer);
686-
return fd;
644+
return smgr_read_slru_segment(path, segno);
687645
}
688646

689647
/*
@@ -702,29 +660,34 @@ SimpleLruDoesPhysicalPageExist(SlruCtl ctl, int pageno)
702660
int fd;
703661
bool result;
704662
off_t endpos;
663+
bool attempted_download = false;
705664

706665
/* update the stats counter of checked pages */
707666
pgstat_count_slru_page_exists(ctl->shared->slru_stats_idx);
708667

709668
SlruFileName(ctl, path, segno);
710669

670+
retry_after_download:
711671
fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
712672
if (fd < 0)
713673
{
714-
/* expected: file doesn't exist */
715-
if (errno == ENOENT)
716-
{
717-
fd = SimpleLruDownloadSegment(ctl, pageno, path);
718-
if (fd < 0)
719-
return false;
720-
}
721-
else
674+
if (errno == ENOENT && !attempted_download)
722675
{
723-
/* report error normally */
724-
slru_errcause = SLRU_OPEN_FAILED;
725-
slru_errno = errno;
726-
SlruReportIOError(ctl, pageno, 0);
676+
/* Try to download the file from the pageserver */
677+
attempted_download = true;
678+
if (SimpleLruDownloadSegment(ctl, pageno, path))
679+
goto retry_after_download;
680+
errno = ENOENT;
727681
}
682+
683+
/* expected: file doesn't exist */
684+
if (errno == ENOENT)
685+
return false;
686+
687+
/* report error normally */
688+
slru_errcause = SLRU_OPEN_FAILED;
689+
slru_errno = errno;
690+
SlruReportIOError(ctl, pageno, 0);
728691
}
729692

730693
if ((endpos = lseek(fd, 0, SEEK_END)) < 0)
@@ -765,6 +728,7 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
765728
off_t offset = rpageno * BLCKSZ;
766729
char path[MAXPGPATH];
767730
int fd;
731+
bool attempted_download = false;
768732

769733
SlruFileName(ctl, path, segno);
770734

@@ -775,33 +739,31 @@ SlruPhysicalReadPage(SlruCtl ctl, int pageno, int slotno)
775739
* SlruPhysicalWritePage). Hence, if we are InRecovery, allow the case
776740
* where the file doesn't exist, and return zeroes instead.
777741
*/
742+
retry_after_download:
778743
fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
779744
if (fd < 0)
780745
{
781-
if (errno != ENOENT)
746+
if (errno == ENOENT && !attempted_download)
747+
{
748+
/* Try to download the file from the pageserver */
749+
attempted_download = true;
750+
if (SimpleLruDownloadSegment(ctl, pageno, path))
751+
goto retry_after_download;
752+
errno = ENOENT;
753+
}
754+
755+
if (errno != ENOENT || !InRecovery)
782756
{
783757
slru_errcause = SLRU_OPEN_FAILED;
784758
slru_errno = errno;
785759
return false;
786760
}
787-
fd = SimpleLruDownloadSegment(ctl, pageno, path);
788-
if (fd < 0)
789-
{
790-
if (!InRecovery)
791-
{
792-
slru_errcause = SLRU_OPEN_FAILED;
793-
slru_errno = errno;
794-
return false;
795-
}
796-
else
797-
{
798-
ereport(LOG,
799-
(errmsg("file \"%s\" doesn't exist, reading as zeroes",
800-
path)));
801-
MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
802-
return true;
803-
}
804-
}
761+
762+
ereport(LOG,
763+
(errmsg("file \"%s\" doesn't exist, reading as zeroes",
764+
path)));
765+
MemSet(shared->page_buffer[slotno], 0, BLCKSZ);
766+
return true;
805767
}
806768

807769
errno = 0;

src/backend/storage/smgr/smgr.c

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,13 @@ static const f_smgr smgr_md = {
4646
.smgr_immedsync = mdimmedsync,
4747
};
4848

49+
/*
50+
* SLRU download isn't really part of the smgr API, as SLRUs are not
51+
* relations. But we define this here anyway, to keep it close to the smgr
52+
* hooks in Neon.
53+
*/
54+
read_slru_segment_hook_type read_slru_segment_hook;
55+
4956
/*
5057
* Each backend has a hashtable that stores all extant SMgrRelation objects.
5158
* In addition, "unowned" SMgrRelation objects are chained together in a list.
@@ -570,22 +577,6 @@ smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
570577
buffer, skipFsync);
571578
}
572579

573-
/*
574-
* NEON: we do not want to include large pg_xact/multixact files in basebackup and prefer
575-
* to download them on demand to reduce startup time.
576-
* If SLRU segment is not found, we try to download it from page server
577-
*
578-
* This function returns number of blocks in segment. Usually it should be SLRU_PAGES_PER_SEGMENT but in case
579-
* of partial segment, it can be smaller. Zero value means that segment doesn't exist.
580-
* From Postgres point of view empty segment is the same as absent segment.
581-
*/
582-
int
583-
smgr_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buffer)
584-
{
585-
return (*reln->smgr).smgr_read_slru_segment ? (*reln->smgr).smgr_read_slru_segment(reln, path, segno, buffer) : 0;
586-
}
587-
588-
589580

590581
/*
591582
* smgrwriteback() -- Trigger kernel writeback for the supplied range of
@@ -772,6 +763,25 @@ smgr_end_unlogged_build(SMgrRelation reln)
772763
(*reln->smgr).smgr_end_unlogged_build(reln);
773764
}
774765

766+
/*
767+
* NEON: Attempt to download an SLRU file from remote storage.
768+
*
769+
* To reduce startup time, we don't want to include large pg_xact/multixact
770+
* files in the basebackup. Instead, we have this hook to download them on
771+
* demand. If an SLRU segment is not found, the code in slru.c calls this to
772+
* check if it can be downloaded from the pageserver.
773+
*
774+
* If the segment is found in remote storage, the hook writes it to the local
775+
* file and returns 'true'. If the file is not found, returns 'false'.
776+
*/
777+
bool
778+
smgr_read_slru_segment(const char *path, int segno)
779+
{
780+
if (read_slru_segment_hook)
781+
return read_slru_segment_hook(path, segno);
782+
return false;
783+
}
784+
775785
/*
776786
* AtEOXact_SMgr
777787
*

src/backend/utils/init/globals.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,6 @@ bool IsPostmasterEnvironment = false;
113113
bool IsUnderPostmaster = false;
114114
bool IsBinaryUpgrade = false;
115115
bool IsBackgroundWorker = false;
116-
bool neon_use_communicator_worker = false;
117116

118117
bool ExitOnAnyError = false;
119118

src/include/miscadmin.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -170,12 +170,6 @@ extern PGDLLIMPORT bool IsPostmasterEnvironment;
170170
extern PGDLLIMPORT bool IsUnderPostmaster;
171171
extern PGDLLIMPORT bool IsBackgroundWorker;
172172
extern PGDLLIMPORT bool IsBinaryUpgrade;
173-
/* Whether the communicator worker is used or not. Defined here so that
174-
* it is also accessible from the main postgres code easily without
175-
* having to look up the value using strings and chain of other
176-
* functions.
177-
*/
178-
extern PGDLLIMPORT bool neon_use_communicator_worker;
179173

180174
extern PGDLLIMPORT bool ExitOnAnyError;
181175

src/include/storage/smgr.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,6 @@ typedef struct f_smgr
128128
void (*smgr_start_unlogged_build) (SMgrRelation reln);
129129
void (*smgr_finish_unlogged_build_phase_1) (SMgrRelation reln);
130130
void (*smgr_end_unlogged_build) (SMgrRelation reln);
131-
132-
int (*smgr_read_slru_segment) (SMgrRelation reln, const char *path, int segno, void* buffer);
133131
} f_smgr;
134132

135133
typedef void (*smgr_init_hook_type) (void);
@@ -139,6 +137,10 @@ extern PGDLLIMPORT smgr_shutdown_hook_type smgr_shutdown_hook;
139137
extern void smgr_init_standard(void);
140138
extern void smgr_shutdown_standard(void);
141139

140+
/* NEON: Hook for reading an SLRU segment from e.g. remote storage */
141+
typedef bool (*read_slru_segment_hook_type) (const char *path, int segno);
142+
extern read_slru_segment_hook_type read_slru_segment_hook;
143+
142144
// Alternative implementation of calculate_database_size()
143145
typedef int64 (*dbsize_hook_type) (Oid dbOid);
144146
extern PGDLLIMPORT dbsize_hook_type dbsize_hook;
@@ -187,6 +189,6 @@ extern void smgr_start_unlogged_build(SMgrRelation reln);
187189
extern void smgr_finish_unlogged_build_phase_1(SMgrRelation reln);
188190
extern void smgr_end_unlogged_build(SMgrRelation reln);
189191

190-
extern int smgr_read_slru_segment(SMgrRelation reln, const char *path, int segno, void* buffer);
192+
extern bool smgr_read_slru_segment(const char *path, int segno);
191193

192194
#endif /* SMGR_H */

0 commit comments

Comments
 (0)