You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

autoprewarm.c 24KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927
  1. /*-------------------------------------------------------------------------
  2. *
  3. * autoprewarm.c
  4. * Periodically dump information about the blocks present in
  5. * shared_buffers, and reload them on server restart.
  6. *
  7. * Due to locking considerations, we can't actually begin prewarming
  8. * until the server reaches a consistent state. We need the catalogs
  9. * to be consistent so that we can figure out which relation to lock,
  10. * and we need to lock the relations so that we don't try to prewarm
  11. * pages from a relation that is in the process of being dropped.
  12. *
  13. * While prewarming, autoprewarm will use two workers. There's a
  14. * master worker that reads and sorts the list of blocks to be
  15. * prewarmed and then launches a per-database worker for each
  16. * relevant database in turn. The former keeps running after the
  17. * initial prewarm is complete to update the dump file periodically.
  18. *
  19. * Copyright (c) 2016-2019, PostgreSQL Global Development Group
  20. *
  21. * IDENTIFICATION
  22. * contrib/pg_prewarm/autoprewarm.c
  23. *
  24. *-------------------------------------------------------------------------
  25. */
  26. #include "postgres.h"
  27. #include <unistd.h>
  28. #include "access/heapam.h"
  29. #include "access/xact.h"
  30. #include "catalog/pg_class.h"
  31. #include "catalog/pg_type.h"
  32. #include "miscadmin.h"
  33. #include "pgstat.h"
  34. #include "postmaster/bgworker.h"
  35. #include "storage/buf_internals.h"
  36. #include "storage/dsm.h"
  37. #include "storage/ipc.h"
  38. #include "storage/latch.h"
  39. #include "storage/lwlock.h"
  40. #include "storage/proc.h"
  41. #include "storage/procsignal.h"
  42. #include "storage/shmem.h"
  43. #include "storage/smgr.h"
  44. #include "tcop/tcopprot.h"
  45. #include "utils/acl.h"
  46. #include "utils/guc.h"
  47. #include "utils/memutils.h"
  48. #include "utils/rel.h"
  49. #include "utils/relfilenodemap.h"
  50. #include "utils/resowner.h"
  51. #define AUTOPREWARM_FILE "autoprewarm.blocks"
  52. /* Metadata for each block we dump. */
  53. typedef struct BlockInfoRecord
  54. {
  55. Oid database;
  56. Oid tablespace;
  57. Oid filenode;
  58. ForkNumber forknum;
  59. BlockNumber blocknum;
  60. } BlockInfoRecord;
  61. /* Shared state information for autoprewarm bgworker. */
  62. typedef struct AutoPrewarmSharedState
  63. {
  64. LWLock lock; /* mutual exclusion */
  65. pid_t bgworker_pid; /* for main bgworker */
  66. pid_t pid_using_dumpfile; /* for autoprewarm or block dump */
  67. /* Following items are for communication with per-database worker */
  68. dsm_handle block_info_handle;
  69. Oid database;
  70. int prewarm_start_idx;
  71. int prewarm_stop_idx;
  72. int prewarmed_blocks;
  73. } AutoPrewarmSharedState;
  74. void _PG_init(void);
  75. void autoprewarm_main(Datum main_arg);
  76. void autoprewarm_database_main(Datum main_arg);
  77. PG_FUNCTION_INFO_V1(autoprewarm_start_worker);
  78. PG_FUNCTION_INFO_V1(autoprewarm_dump_now);
  79. static void apw_load_buffers(void);
  80. static int apw_dump_now(bool is_bgworker, bool dump_unlogged);
  81. static void apw_start_master_worker(void);
  82. static void apw_start_database_worker(void);
  83. static bool apw_init_shmem(void);
  84. static void apw_detach_shmem(int code, Datum arg);
  85. static int apw_compare_blockinfo(const void *p, const void *q);
  86. static void apw_sigterm_handler(SIGNAL_ARGS);
  87. static void apw_sighup_handler(SIGNAL_ARGS);
  88. /* Flags set by signal handlers */
  89. static volatile sig_atomic_t got_sigterm = false;
  90. static volatile sig_atomic_t got_sighup = false;
  91. /* Pointer to shared-memory state. */
  92. static AutoPrewarmSharedState *apw_state = NULL;
  93. /* GUC variables. */
  94. static bool autoprewarm = true; /* start worker? */
  95. static int autoprewarm_interval; /* dump interval */
  96. /*
  97. * Module load callback.
  98. */
  99. void
  100. _PG_init(void)
  101. {
  102. DefineCustomIntVariable("pg_prewarm.autoprewarm_interval",
  103. "Sets the interval between dumps of shared buffers",
  104. "If set to zero, time-based dumping is disabled.",
  105. &autoprewarm_interval,
  106. 300,
  107. 0, INT_MAX / 1000,
  108. PGC_SIGHUP,
  109. GUC_UNIT_S,
  110. NULL,
  111. NULL,
  112. NULL);
  113. if (!process_shared_preload_libraries_in_progress)
  114. return;
  115. /* can't define PGC_POSTMASTER variable after startup */
  116. DefineCustomBoolVariable("pg_prewarm.autoprewarm",
  117. "Starts the autoprewarm worker.",
  118. NULL,
  119. &autoprewarm,
  120. true,
  121. PGC_POSTMASTER,
  122. 0,
  123. NULL,
  124. NULL,
  125. NULL);
  126. EmitWarningsOnPlaceholders("pg_prewarm");
  127. RequestAddinShmemSpace(MAXALIGN(sizeof(AutoPrewarmSharedState)));
  128. /* Register autoprewarm worker, if enabled. */
  129. if (autoprewarm)
  130. apw_start_master_worker();
  131. }
  132. /*
  133. * Main entry point for the master autoprewarm process. Per-database workers
  134. * have a separate entry point.
  135. */
  136. void
  137. autoprewarm_main(Datum main_arg)
  138. {
  139. bool first_time = true;
  140. TimestampTz last_dump_time = 0;
  141. /* Establish signal handlers; once that's done, unblock signals. */
  142. pqsignal(SIGTERM, apw_sigterm_handler);
  143. pqsignal(SIGHUP, apw_sighup_handler);
  144. pqsignal(SIGUSR1, procsignal_sigusr1_handler);
  145. BackgroundWorkerUnblockSignals();
  146. /* Create (if necessary) and attach to our shared memory area. */
  147. if (apw_init_shmem())
  148. first_time = false;
  149. /* Set on-detach hook so that our PID will be cleared on exit. */
  150. on_shmem_exit(apw_detach_shmem, 0);
  151. /*
  152. * Store our PID in the shared memory area --- unless there's already
  153. * another worker running, in which case just exit.
  154. */
  155. LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
  156. if (apw_state->bgworker_pid != InvalidPid)
  157. {
  158. LWLockRelease(&apw_state->lock);
  159. ereport(LOG,
  160. (errmsg("autoprewarm worker is already running under PID %lu",
  161. (unsigned long) apw_state->bgworker_pid)));
  162. return;
  163. }
  164. apw_state->bgworker_pid = MyProcPid;
  165. LWLockRelease(&apw_state->lock);
  166. /*
  167. * Preload buffers from the dump file only if we just created the shared
  168. * memory region. Otherwise, it's either already been done or shouldn't
  169. * be done - e.g. because the old dump file has been overwritten since the
  170. * server was started.
  171. *
  172. * There's not much point in performing a dump immediately after we finish
  173. * preloading; so, if we do end up preloading, consider the last dump time
  174. * to be equal to the current time.
  175. */
  176. if (first_time)
  177. {
  178. apw_load_buffers();
  179. last_dump_time = GetCurrentTimestamp();
  180. }
  181. /* Periodically dump buffers until terminated. */
  182. while (!got_sigterm)
  183. {
  184. /* In case of a SIGHUP, just reload the configuration. */
  185. if (got_sighup)
  186. {
  187. got_sighup = false;
  188. ProcessConfigFile(PGC_SIGHUP);
  189. }
  190. if (autoprewarm_interval <= 0)
  191. {
  192. /* We're only dumping at shutdown, so just wait forever. */
  193. (void) WaitLatch(&MyProc->procLatch,
  194. WL_LATCH_SET | WL_EXIT_ON_PM_DEATH,
  195. -1L,
  196. PG_WAIT_EXTENSION);
  197. }
  198. else
  199. {
  200. long delay_in_ms = 0;
  201. TimestampTz next_dump_time = 0;
  202. long secs = 0;
  203. int usecs = 0;
  204. /* Compute the next dump time. */
  205. next_dump_time =
  206. TimestampTzPlusMilliseconds(last_dump_time,
  207. autoprewarm_interval * 1000);
  208. TimestampDifference(GetCurrentTimestamp(), next_dump_time,
  209. &secs, &usecs);
  210. delay_in_ms = secs + (usecs / 1000);
  211. /* Perform a dump if it's time. */
  212. if (delay_in_ms <= 0)
  213. {
  214. last_dump_time = GetCurrentTimestamp();
  215. apw_dump_now(true, false);
  216. continue;
  217. }
  218. /* Sleep until the next dump time. */
  219. (void) WaitLatch(&MyProc->procLatch,
  220. WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
  221. delay_in_ms,
  222. PG_WAIT_EXTENSION);
  223. }
  224. /* Reset the latch, loop. */
  225. ResetLatch(&MyProc->procLatch);
  226. }
  227. /*
  228. * Dump one last time. We assume this is probably the result of a system
  229. * shutdown, although it's possible that we've merely been terminated.
  230. */
  231. apw_dump_now(true, true);
  232. }
  233. /*
  234. * Read the dump file and launch per-database workers one at a time to
  235. * prewarm the buffers found there.
  236. */
  237. static void
  238. apw_load_buffers(void)
  239. {
  240. FILE *file = NULL;
  241. int num_elements,
  242. i;
  243. BlockInfoRecord *blkinfo;
  244. dsm_segment *seg;
  245. /*
  246. * Skip the prewarm if the dump file is in use; otherwise, prevent any
  247. * other process from writing it while we're using it.
  248. */
  249. LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
  250. if (apw_state->pid_using_dumpfile == InvalidPid)
  251. apw_state->pid_using_dumpfile = MyProcPid;
  252. else
  253. {
  254. LWLockRelease(&apw_state->lock);
  255. ereport(LOG,
  256. (errmsg("skipping prewarm because block dump file is being written by PID %lu",
  257. (unsigned long) apw_state->pid_using_dumpfile)));
  258. return;
  259. }
  260. LWLockRelease(&apw_state->lock);
  261. /*
  262. * Open the block dump file. Exit quietly if it doesn't exist, but report
  263. * any other error.
  264. */
  265. file = AllocateFile(AUTOPREWARM_FILE, "r");
  266. if (!file)
  267. {
  268. if (errno == ENOENT)
  269. {
  270. LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
  271. apw_state->pid_using_dumpfile = InvalidPid;
  272. LWLockRelease(&apw_state->lock);
  273. return; /* No file to load. */
  274. }
  275. ereport(ERROR,
  276. (errcode_for_file_access(),
  277. errmsg("could not read file \"%s\": %m",
  278. AUTOPREWARM_FILE)));
  279. }
  280. /* First line of the file is a record count. */
  281. if (fscanf(file, "<<%d>>\n", &num_elements) != 1)
  282. ereport(ERROR,
  283. (errcode_for_file_access(),
  284. errmsg("could not read from file \"%s\": %m",
  285. AUTOPREWARM_FILE)));
  286. /* Allocate a dynamic shared memory segment to store the record data. */
  287. seg = dsm_create(sizeof(BlockInfoRecord) * num_elements, 0);
  288. blkinfo = (BlockInfoRecord *) dsm_segment_address(seg);
  289. /* Read records, one per line. */
  290. for (i = 0; i < num_elements; i++)
  291. {
  292. unsigned forknum;
  293. if (fscanf(file, "%u,%u,%u,%u,%u\n", &blkinfo[i].database,
  294. &blkinfo[i].tablespace, &blkinfo[i].filenode,
  295. &forknum, &blkinfo[i].blocknum) != 5)
  296. ereport(ERROR,
  297. (errmsg("autoprewarm block dump file is corrupted at line %d",
  298. i + 1)));
  299. blkinfo[i].forknum = forknum;
  300. }
  301. FreeFile(file);
  302. /* Sort the blocks to be loaded. */
  303. pg_qsort(blkinfo, num_elements, sizeof(BlockInfoRecord),
  304. apw_compare_blockinfo);
  305. /* Populate shared memory state. */
  306. apw_state->block_info_handle = dsm_segment_handle(seg);
  307. apw_state->prewarm_start_idx = apw_state->prewarm_stop_idx = 0;
  308. apw_state->prewarmed_blocks = 0;
  309. /* Get the info position of the first block of the next database. */
  310. while (apw_state->prewarm_start_idx < num_elements)
  311. {
  312. int j = apw_state->prewarm_start_idx;
  313. Oid current_db = blkinfo[j].database;
  314. /*
  315. * Advance the prewarm_stop_idx to the first BlockRecordInfo that does
  316. * not belong to this database.
  317. */
  318. j++;
  319. while (j < num_elements)
  320. {
  321. if (current_db != blkinfo[j].database)
  322. {
  323. /*
  324. * Combine BlockRecordInfos for global objects with those of
  325. * the database.
  326. */
  327. if (current_db != InvalidOid)
  328. break;
  329. current_db = blkinfo[j].database;
  330. }
  331. j++;
  332. }
  333. /*
  334. * If we reach this point with current_db == InvalidOid, then only
  335. * BlockRecordInfos belonging to global objects exist. We can't
  336. * prewarm without a database connection, so just bail out.
  337. */
  338. if (current_db == InvalidOid)
  339. break;
  340. /* Configure stop point and database for next per-database worker. */
  341. apw_state->prewarm_stop_idx = j;
  342. apw_state->database = current_db;
  343. Assert(apw_state->prewarm_start_idx < apw_state->prewarm_stop_idx);
  344. /* If we've run out of free buffers, don't launch another worker. */
  345. if (!have_free_buffer())
  346. break;
  347. /*
  348. * Start a per-database worker to load blocks for this database; this
  349. * function will return once the per-database worker exits.
  350. */
  351. apw_start_database_worker();
  352. /* Prepare for next database. */
  353. apw_state->prewarm_start_idx = apw_state->prewarm_stop_idx;
  354. }
  355. /* Clean up. */
  356. dsm_detach(seg);
  357. LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
  358. apw_state->block_info_handle = DSM_HANDLE_INVALID;
  359. apw_state->pid_using_dumpfile = InvalidPid;
  360. LWLockRelease(&apw_state->lock);
  361. /* Report our success. */
  362. ereport(LOG,
  363. (errmsg("autoprewarm successfully prewarmed %d of %d previously-loaded blocks",
  364. apw_state->prewarmed_blocks, num_elements)));
  365. }
  366. /*
  367. * Prewarm all blocks for one database (and possibly also global objects, if
  368. * those got grouped with this database).
  369. */
  370. void
  371. autoprewarm_database_main(Datum main_arg)
  372. {
  373. int pos;
  374. BlockInfoRecord *block_info;
  375. Relation rel = NULL;
  376. BlockNumber nblocks = 0;
  377. BlockInfoRecord *old_blk = NULL;
  378. dsm_segment *seg;
  379. /* Establish signal handlers; once that's done, unblock signals. */
  380. pqsignal(SIGTERM, die);
  381. BackgroundWorkerUnblockSignals();
  382. /* Connect to correct database and get block information. */
  383. apw_init_shmem();
  384. seg = dsm_attach(apw_state->block_info_handle);
  385. if (seg == NULL)
  386. ereport(ERROR,
  387. (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
  388. errmsg("could not map dynamic shared memory segment")));
  389. BackgroundWorkerInitializeConnectionByOid(apw_state->database, InvalidOid, 0);
  390. block_info = (BlockInfoRecord *) dsm_segment_address(seg);
  391. pos = apw_state->prewarm_start_idx;
  392. /*
  393. * Loop until we run out of blocks to prewarm or until we run out of free
  394. * buffers.
  395. */
  396. while (pos < apw_state->prewarm_stop_idx && have_free_buffer())
  397. {
  398. BlockInfoRecord *blk = &block_info[pos++];
  399. Buffer buf;
  400. CHECK_FOR_INTERRUPTS();
  401. /*
  402. * Quit if we've reached records for another database. If previous
  403. * blocks are of some global objects, then continue pre-warming.
  404. */
  405. if (old_blk != NULL && old_blk->database != blk->database &&
  406. old_blk->database != 0)
  407. break;
  408. /*
  409. * As soon as we encounter a block of a new relation, close the old
  410. * relation. Note that rel will be NULL if try_relation_open failed
  411. * previously; in that case, there is nothing to close.
  412. */
  413. if (old_blk != NULL && old_blk->filenode != blk->filenode &&
  414. rel != NULL)
  415. {
  416. relation_close(rel, AccessShareLock);
  417. rel = NULL;
  418. CommitTransactionCommand();
  419. }
  420. /*
  421. * Try to open each new relation, but only once, when we first
  422. * encounter it. If it's been dropped, skip the associated blocks.
  423. */
  424. if (old_blk == NULL || old_blk->filenode != blk->filenode)
  425. {
  426. Oid reloid;
  427. Assert(rel == NULL);
  428. StartTransactionCommand();
  429. reloid = RelidByRelfilenode(blk->tablespace, blk->filenode);
  430. if (OidIsValid(reloid))
  431. rel = try_relation_open(reloid, AccessShareLock);
  432. if (!rel)
  433. CommitTransactionCommand();
  434. }
  435. if (!rel)
  436. {
  437. old_blk = blk;
  438. continue;
  439. }
  440. /* Once per fork, check for fork existence and size. */
  441. if (old_blk == NULL ||
  442. old_blk->filenode != blk->filenode ||
  443. old_blk->forknum != blk->forknum)
  444. {
  445. RelationOpenSmgr(rel);
  446. /*
  447. * smgrexists is not safe for illegal forknum, hence check whether
  448. * the passed forknum is valid before using it in smgrexists.
  449. */
  450. if (blk->forknum > InvalidForkNumber &&
  451. blk->forknum <= MAX_FORKNUM &&
  452. smgrexists(rel->rd_smgr, blk->forknum))
  453. nblocks = RelationGetNumberOfBlocksInFork(rel, blk->forknum);
  454. else
  455. nblocks = 0;
  456. }
  457. /* Check whether blocknum is valid and within fork file size. */
  458. if (blk->blocknum >= nblocks)
  459. {
  460. /* Move to next forknum. */
  461. old_blk = blk;
  462. continue;
  463. }
  464. /* Prewarm buffer. */
  465. buf = ReadBufferExtended(rel, blk->forknum, blk->blocknum, RBM_NORMAL,
  466. NULL);
  467. if (BufferIsValid(buf))
  468. {
  469. apw_state->prewarmed_blocks++;
  470. ReleaseBuffer(buf);
  471. }
  472. old_blk = blk;
  473. }
  474. dsm_detach(seg);
  475. /* Release lock on previous relation. */
  476. if (rel)
  477. {
  478. relation_close(rel, AccessShareLock);
  479. CommitTransactionCommand();
  480. }
  481. }
  482. /*
  483. * Dump information on blocks in shared buffers. We use a text format here
  484. * so that it's easy to understand and even change the file contents if
  485. * necessary.
  486. * Returns the number of blocks dumped.
  487. */
  488. static int
  489. apw_dump_now(bool is_bgworker, bool dump_unlogged)
  490. {
  491. int num_blocks;
  492. int i;
  493. int ret;
  494. BlockInfoRecord *block_info_array;
  495. BufferDesc *bufHdr;
  496. FILE *file;
  497. char transient_dump_file_path[MAXPGPATH];
  498. pid_t pid;
  499. LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
  500. pid = apw_state->pid_using_dumpfile;
  501. if (apw_state->pid_using_dumpfile == InvalidPid)
  502. apw_state->pid_using_dumpfile = MyProcPid;
  503. LWLockRelease(&apw_state->lock);
  504. if (pid != InvalidPid)
  505. {
  506. if (!is_bgworker)
  507. ereport(ERROR,
  508. (errmsg("could not perform block dump because dump file is being used by PID %lu",
  509. (unsigned long) apw_state->pid_using_dumpfile)));
  510. ereport(LOG,
  511. (errmsg("skipping block dump because it is already being performed by PID %lu",
  512. (unsigned long) apw_state->pid_using_dumpfile)));
  513. return 0;
  514. }
  515. block_info_array =
  516. (BlockInfoRecord *) palloc(sizeof(BlockInfoRecord) * NBuffers);
  517. for (num_blocks = 0, i = 0; i < NBuffers; i++)
  518. {
  519. uint32 buf_state;
  520. CHECK_FOR_INTERRUPTS();
  521. bufHdr = GetBufferDescriptor(i);
  522. /* Lock each buffer header before inspecting. */
  523. buf_state = LockBufHdr(bufHdr);
  524. /*
  525. * Unlogged tables will be automatically truncated after a crash or
  526. * unclean shutdown. In such cases we need not prewarm them. Dump them
  527. * only if requested by caller.
  528. */
  529. if (buf_state & BM_TAG_VALID &&
  530. ((buf_state & BM_PERMANENT) || dump_unlogged))
  531. {
  532. block_info_array[num_blocks].database = bufHdr->tag.rnode.dbNode;
  533. block_info_array[num_blocks].tablespace = bufHdr->tag.rnode.spcNode;
  534. block_info_array[num_blocks].filenode = bufHdr->tag.rnode.relNode;
  535. block_info_array[num_blocks].forknum = bufHdr->tag.forkNum;
  536. block_info_array[num_blocks].blocknum = bufHdr->tag.blockNum;
  537. ++num_blocks;
  538. }
  539. UnlockBufHdr(bufHdr, buf_state);
  540. }
  541. snprintf(transient_dump_file_path, MAXPGPATH, "%s.tmp", AUTOPREWARM_FILE);
  542. file = AllocateFile(transient_dump_file_path, "w");
  543. if (!file)
  544. ereport(ERROR,
  545. (errcode_for_file_access(),
  546. errmsg("could not open file \"%s\": %m",
  547. transient_dump_file_path)));
  548. ret = fprintf(file, "<<%d>>\n", num_blocks);
  549. if (ret < 0)
  550. {
  551. int save_errno = errno;
  552. FreeFile(file);
  553. unlink(transient_dump_file_path);
  554. errno = save_errno;
  555. ereport(ERROR,
  556. (errcode_for_file_access(),
  557. errmsg("could not write to file \"%s\": %m",
  558. transient_dump_file_path)));
  559. }
  560. for (i = 0; i < num_blocks; i++)
  561. {
  562. CHECK_FOR_INTERRUPTS();
  563. ret = fprintf(file, "%u,%u,%u,%u,%u\n",
  564. block_info_array[i].database,
  565. block_info_array[i].tablespace,
  566. block_info_array[i].filenode,
  567. (uint32) block_info_array[i].forknum,
  568. block_info_array[i].blocknum);
  569. if (ret < 0)
  570. {
  571. int save_errno = errno;
  572. FreeFile(file);
  573. unlink(transient_dump_file_path);
  574. errno = save_errno;
  575. ereport(ERROR,
  576. (errcode_for_file_access(),
  577. errmsg("could not write to file \"%s\": %m",
  578. transient_dump_file_path)));
  579. }
  580. }
  581. pfree(block_info_array);
  582. /*
  583. * Rename transient_dump_file_path to AUTOPREWARM_FILE to make things
  584. * permanent.
  585. */
  586. ret = FreeFile(file);
  587. if (ret != 0)
  588. {
  589. int save_errno = errno;
  590. unlink(transient_dump_file_path);
  591. errno = save_errno;
  592. ereport(ERROR,
  593. (errcode_for_file_access(),
  594. errmsg("could not close file \"%s\": %m",
  595. transient_dump_file_path)));
  596. }
  597. (void) durable_rename(transient_dump_file_path, AUTOPREWARM_FILE, ERROR);
  598. apw_state->pid_using_dumpfile = InvalidPid;
  599. ereport(DEBUG1,
  600. (errmsg("wrote block details for %d blocks", num_blocks)));
  601. return num_blocks;
  602. }
  603. /*
  604. * SQL-callable function to launch autoprewarm.
  605. */
  606. Datum
  607. autoprewarm_start_worker(PG_FUNCTION_ARGS)
  608. {
  609. pid_t pid;
  610. if (!autoprewarm)
  611. ereport(ERROR,
  612. (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
  613. errmsg("autoprewarm is disabled")));
  614. apw_init_shmem();
  615. LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
  616. pid = apw_state->bgworker_pid;
  617. LWLockRelease(&apw_state->lock);
  618. if (pid != InvalidPid)
  619. ereport(ERROR,
  620. (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
  621. errmsg("autoprewarm worker is already running under PID %lu",
  622. (unsigned long) pid)));
  623. apw_start_master_worker();
  624. PG_RETURN_VOID();
  625. }
  626. /*
  627. * SQL-callable function to perform an immediate block dump.
  628. *
  629. * Note: this is declared to return int8, as insurance against some
  630. * very distant day when we might make NBuffers wider than int.
  631. */
  632. Datum
  633. autoprewarm_dump_now(PG_FUNCTION_ARGS)
  634. {
  635. int num_blocks;
  636. apw_init_shmem();
  637. PG_ENSURE_ERROR_CLEANUP(apw_detach_shmem, 0);
  638. {
  639. num_blocks = apw_dump_now(false, true);
  640. }
  641. PG_END_ENSURE_ERROR_CLEANUP(apw_detach_shmem, 0);
  642. PG_RETURN_INT64((int64) num_blocks);
  643. }
  644. /*
  645. * Allocate and initialize autoprewarm related shared memory, if not already
  646. * done, and set up backend-local pointer to that state. Returns true if an
  647. * existing shared memory segment was found.
  648. */
  649. static bool
  650. apw_init_shmem(void)
  651. {
  652. bool found;
  653. LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
  654. apw_state = ShmemInitStruct("autoprewarm",
  655. sizeof(AutoPrewarmSharedState),
  656. &found);
  657. if (!found)
  658. {
  659. /* First time through ... */
  660. LWLockInitialize(&apw_state->lock, LWLockNewTrancheId());
  661. apw_state->bgworker_pid = InvalidPid;
  662. apw_state->pid_using_dumpfile = InvalidPid;
  663. }
  664. LWLockRelease(AddinShmemInitLock);
  665. LWLockRegisterTranche(apw_state->lock.tranche, "autoprewarm");
  666. return found;
  667. }
  668. /*
  669. * Clear our PID from autoprewarm shared state.
  670. */
  671. static void
  672. apw_detach_shmem(int code, Datum arg)
  673. {
  674. LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
  675. if (apw_state->pid_using_dumpfile == MyProcPid)
  676. apw_state->pid_using_dumpfile = InvalidPid;
  677. if (apw_state->bgworker_pid == MyProcPid)
  678. apw_state->bgworker_pid = InvalidPid;
  679. LWLockRelease(&apw_state->lock);
  680. }
  681. /*
  682. * Start autoprewarm master worker process.
  683. */
  684. static void
  685. apw_start_master_worker(void)
  686. {
  687. BackgroundWorker worker;
  688. BackgroundWorkerHandle *handle;
  689. BgwHandleStatus status;
  690. pid_t pid;
  691. MemSet(&worker, 0, sizeof(BackgroundWorker));
  692. worker.bgw_flags = BGWORKER_SHMEM_ACCESS;
  693. worker.bgw_start_time = BgWorkerStart_ConsistentState;
  694. strcpy(worker.bgw_library_name, "pg_prewarm");
  695. strcpy(worker.bgw_function_name, "autoprewarm_main");
  696. strcpy(worker.bgw_name, "autoprewarm master");
  697. strcpy(worker.bgw_type, "autoprewarm master");
  698. if (process_shared_preload_libraries_in_progress)
  699. {
  700. RegisterBackgroundWorker(&worker);
  701. return;
  702. }
  703. /* must set notify PID to wait for startup */
  704. worker.bgw_notify_pid = MyProcPid;
  705. if (!RegisterDynamicBackgroundWorker(&worker, &handle))
  706. ereport(ERROR,
  707. (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
  708. errmsg("could not register background process"),
  709. errhint("You may need to increase max_worker_processes.")));
  710. status = WaitForBackgroundWorkerStartup(handle, &pid);
  711. if (status != BGWH_STARTED)
  712. ereport(ERROR,
  713. (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
  714. errmsg("could not start background process"),
  715. errhint("More details may be available in the server log.")));
  716. }
  717. /*
  718. * Start autoprewarm per-database worker process.
  719. */
  720. static void
  721. apw_start_database_worker(void)
  722. {
  723. BackgroundWorker worker;
  724. BackgroundWorkerHandle *handle;
  725. MemSet(&worker, 0, sizeof(BackgroundWorker));
  726. worker.bgw_flags =
  727. BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION;
  728. worker.bgw_start_time = BgWorkerStart_ConsistentState;
  729. strcpy(worker.bgw_library_name, "pg_prewarm");
  730. strcpy(worker.bgw_function_name, "autoprewarm_database_main");
  731. strcpy(worker.bgw_name, "autoprewarm worker");
  732. strcpy(worker.bgw_type, "autoprewarm worker");
  733. /* must set notify PID to wait for shutdown */
  734. worker.bgw_notify_pid = MyProcPid;
  735. if (!RegisterDynamicBackgroundWorker(&worker, &handle))
  736. ereport(ERROR,
  737. (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
  738. errmsg("registering dynamic bgworker autoprewarm failed"),
  739. errhint("Consider increasing configuration parameter \"max_worker_processes\".")));
  740. /*
  741. * Ignore return value; if it fails, postmaster has died, but we have
  742. * checks for that elsewhere.
  743. */
  744. WaitForBackgroundWorkerShutdown(handle);
  745. }
  746. /* Compare member elements to check whether they are not equal. */
  747. #define cmp_member_elem(fld) \
  748. do { \
  749. if (a->fld < b->fld) \
  750. return -1; \
  751. else if (a->fld > b->fld) \
  752. return 1; \
  753. } while(0)
  754. /*
  755. * apw_compare_blockinfo
  756. *
  757. * We depend on all records for a particular database being consecutive
  758. * in the dump file; each per-database worker will preload blocks until
  759. * it sees a block for some other database. Sorting by tablespace,
  760. * filenode, forknum, and blocknum isn't critical for correctness, but
  761. * helps us get a sequential I/O pattern.
  762. */
  763. static int
  764. apw_compare_blockinfo(const void *p, const void *q)
  765. {
  766. const BlockInfoRecord *a = (const BlockInfoRecord *) p;
  767. const BlockInfoRecord *b = (const BlockInfoRecord *) q;
  768. cmp_member_elem(database);
  769. cmp_member_elem(tablespace);
  770. cmp_member_elem(filenode);
  771. cmp_member_elem(forknum);
  772. cmp_member_elem(blocknum);
  773. return 0;
  774. }
  775. /*
  776. * Signal handler for SIGTERM
  777. */
  778. static void
  779. apw_sigterm_handler(SIGNAL_ARGS)
  780. {
  781. int save_errno = errno;
  782. got_sigterm = true;
  783. if (MyProc)
  784. SetLatch(&MyProc->procLatch);
  785. errno = save_errno;
  786. }
  787. /*
  788. * Signal handler for SIGHUP
  789. */
  790. static void
  791. apw_sighup_handler(SIGNAL_ARGS)
  792. {
  793. int save_errno = errno;
  794. got_sighup = true;
  795. if (MyProc)
  796. SetLatch(&MyProc->procLatch);
  797. errno = save_errno;
  798. }