You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3566 lines
109KB

  1. /*
  2. * QEMU live migration
  3. *
  4. * Copyright IBM, Corp. 2008
  5. *
  6. * Authors:
  7. * Anthony Liguori <aliguori@us.ibm.com>
  8. *
  9. * This work is licensed under the terms of the GNU GPL, version 2. See
  10. * the COPYING file in the top-level directory.
  11. *
  12. * Contributions after 2012-01-13 are licensed under the terms of the
  13. * GNU GPL, version 2 or (at your option) any later version.
  14. */
  15. #include "qemu/osdep.h"
  16. #include "qemu/cutils.h"
  17. #include "qemu/error-report.h"
  18. #include "migration/blocker.h"
  19. #include "exec.h"
  20. #include "fd.h"
  21. #include "socket.h"
  22. #include "rdma.h"
  23. #include "ram.h"
  24. #include "migration/global_state.h"
  25. #include "migration/misc.h"
  26. #include "migration.h"
  27. #include "savevm.h"
  28. #include "qemu-file-channel.h"
  29. #include "qemu-file.h"
  30. #include "migration/vmstate.h"
  31. #include "block/block.h"
  32. #include "qapi/error.h"
  33. #include "qapi/clone-visitor.h"
  34. #include "qapi/qapi-visit-sockets.h"
  35. #include "qapi/qapi-commands-migration.h"
  36. #include "qapi/qapi-events-migration.h"
  37. #include "qapi/qmp/qerror.h"
  38. #include "qapi/qmp/qnull.h"
  39. #include "qemu/rcu.h"
  40. #include "block.h"
  41. #include "postcopy-ram.h"
  42. #include "qemu/thread.h"
  43. #include "trace.h"
  44. #include "exec/target_page.h"
  45. #include "io/channel-buffer.h"
  46. #include "migration/colo.h"
  47. #include "hw/boards.h"
  48. #include "monitor/monitor.h"
  49. #include "net/announce.h"
  50. #define MAX_THROTTLE (32 << 20) /* Migration transfer speed throttling */
  51. /* Amount of time to allocate to each "chunk" of bandwidth-throttled
  52. * data. */
  53. #define BUFFER_DELAY 100
  54. #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
  55. /* Time in milliseconds we are allowed to stop the source,
  56. * for sending the last part */
  57. #define DEFAULT_MIGRATE_SET_DOWNTIME 300
  58. /* Maximum migrate downtime set to 2000 seconds */
  59. #define MAX_MIGRATE_DOWNTIME_SECONDS 2000
  60. #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
  61. /* Default compression thread count */
  62. #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
  63. /* Default decompression thread count, usually decompression is at
  64. * least 4 times as fast as compression.*/
  65. #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
  66. /*0: means nocompress, 1: best speed, ... 9: best compress ratio */
  67. #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
  68. /* Define default autoconverge cpu throttle migration parameters */
  69. #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20
  70. #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10
  71. #define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99
  72. /* Migration XBZRLE default cache size */
  73. #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024)
  74. /* The delay time (in ms) between two COLO checkpoints */
  75. #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100)
  76. #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
  77. /* Background transfer rate for postcopy, 0 means unlimited, note
  78. * that page requests can still exceed this limit.
  79. */
  80. #define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0
  81. /*
  82. * Parameters for self_announce_delay giving a stream of RARP/ARP
  83. * packets after migration.
  84. */
  85. #define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50
  86. #define DEFAULT_MIGRATE_ANNOUNCE_MAX 550
  87. #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5
  88. #define DEFAULT_MIGRATE_ANNOUNCE_STEP 100
  89. static NotifierList migration_state_notifiers =
  90. NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
  91. static bool deferred_incoming;
  92. /* Messages sent on the return path from destination to source */
  93. enum mig_rp_message_type {
  94. MIG_RP_MSG_INVALID = 0, /* Must be 0 */
  95. MIG_RP_MSG_SHUT, /* sibling will not send any more RP messages */
  96. MIG_RP_MSG_PONG, /* Response to a PING; data (seq: be32 ) */
  97. MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */
  98. MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */
  99. MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */
  100. MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */
  101. MIG_RP_MSG_MAX
  102. };
  103. /* When we add fault tolerance, we could have several
  104. migrations at once. For now we don't need to add
  105. dynamic creation of migration */
  106. static MigrationState *current_migration;
  107. static MigrationIncomingState *current_incoming;
  108. static bool migration_object_check(MigrationState *ms, Error **errp);
  109. static int migration_maybe_pause(MigrationState *s,
  110. int *current_active_state,
  111. int new_state);
  112. static void migrate_fd_cancel(MigrationState *s);
  113. void migration_object_init(void)
  114. {
  115. MachineState *ms = MACHINE(qdev_get_machine());
  116. Error *err = NULL;
  117. /* This can only be called once. */
  118. assert(!current_migration);
  119. current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION));
  120. /*
  121. * Init the migrate incoming object as well no matter whether
  122. * we'll use it or not.
  123. */
  124. assert(!current_incoming);
  125. current_incoming = g_new0(MigrationIncomingState, 1);
  126. current_incoming->state = MIGRATION_STATUS_NONE;
  127. current_incoming->postcopy_remote_fds =
  128. g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD));
  129. qemu_mutex_init(&current_incoming->rp_mutex);
  130. qemu_event_init(&current_incoming->main_thread_load_event, false);
  131. qemu_sem_init(&current_incoming->postcopy_pause_sem_dst, 0);
  132. qemu_sem_init(&current_incoming->postcopy_pause_sem_fault, 0);
  133. init_dirty_bitmap_incoming_migration();
  134. if (!migration_object_check(current_migration, &err)) {
  135. error_report_err(err);
  136. exit(1);
  137. }
  138. /*
  139. * We cannot really do this in migration_instance_init() since at
  140. * that time global properties are not yet applied, then this
  141. * value will be definitely replaced by something else.
  142. */
  143. if (ms->enforce_config_section) {
  144. current_migration->send_configuration = true;
  145. }
  146. }
  147. void migration_shutdown(void)
  148. {
  149. /*
  150. * Cancel the current migration - that will (eventually)
  151. * stop the migration using this structure
  152. */
  153. migrate_fd_cancel(current_migration);
  154. object_unref(OBJECT(current_migration));
  155. }
  156. /* For outgoing */
  157. MigrationState *migrate_get_current(void)
  158. {
  159. /* This can only be called after the object created. */
  160. assert(current_migration);
  161. return current_migration;
  162. }
  163. MigrationIncomingState *migration_incoming_get_current(void)
  164. {
  165. assert(current_incoming);
  166. return current_incoming;
  167. }
  168. void migration_incoming_state_destroy(void)
  169. {
  170. struct MigrationIncomingState *mis = migration_incoming_get_current();
  171. if (mis->to_src_file) {
  172. /* Tell source that we are done */
  173. migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
  174. qemu_fclose(mis->to_src_file);
  175. mis->to_src_file = NULL;
  176. }
  177. if (mis->from_src_file) {
  178. qemu_fclose(mis->from_src_file);
  179. mis->from_src_file = NULL;
  180. }
  181. if (mis->postcopy_remote_fds) {
  182. g_array_free(mis->postcopy_remote_fds, TRUE);
  183. mis->postcopy_remote_fds = NULL;
  184. }
  185. qemu_event_reset(&mis->main_thread_load_event);
  186. if (mis->socket_address_list) {
  187. qapi_free_SocketAddressList(mis->socket_address_list);
  188. mis->socket_address_list = NULL;
  189. }
  190. }
  191. static void migrate_generate_event(int new_state)
  192. {
  193. if (migrate_use_events()) {
  194. qapi_event_send_migration(new_state);
  195. }
  196. }
  197. static bool migrate_late_block_activate(void)
  198. {
  199. MigrationState *s;
  200. s = migrate_get_current();
  201. return s->enabled_capabilities[
  202. MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE];
  203. }
  204. /*
  205. * Called on -incoming with a defer: uri.
  206. * The migration can be started later after any parameters have been
  207. * changed.
  208. */
  209. static void deferred_incoming_migration(Error **errp)
  210. {
  211. if (deferred_incoming) {
  212. error_setg(errp, "Incoming migration already deferred");
  213. }
  214. deferred_incoming = true;
  215. }
  216. /*
  217. * Send a message on the return channel back to the source
  218. * of the migration.
  219. */
  220. static int migrate_send_rp_message(MigrationIncomingState *mis,
  221. enum mig_rp_message_type message_type,
  222. uint16_t len, void *data)
  223. {
  224. int ret = 0;
  225. trace_migrate_send_rp_message((int)message_type, len);
  226. qemu_mutex_lock(&mis->rp_mutex);
  227. /*
  228. * It's possible that the file handle got lost due to network
  229. * failures.
  230. */
  231. if (!mis->to_src_file) {
  232. ret = -EIO;
  233. goto error;
  234. }
  235. qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
  236. qemu_put_be16(mis->to_src_file, len);
  237. qemu_put_buffer(mis->to_src_file, data, len);
  238. qemu_fflush(mis->to_src_file);
  239. /* It's possible that qemu file got error during sending */
  240. ret = qemu_file_get_error(mis->to_src_file);
  241. error:
  242. qemu_mutex_unlock(&mis->rp_mutex);
  243. return ret;
  244. }
  245. /* Request a range of pages from the source VM at the given
  246. * start address.
  247. * rbname: Name of the RAMBlock to request the page in, if NULL it's the same
  248. * as the last request (a name must have been given previously)
  249. * Start: Address offset within the RB
  250. * Len: Length in bytes required - must be a multiple of pagesize
  251. */
  252. int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
  253. ram_addr_t start, size_t len)
  254. {
  255. uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
  256. size_t msglen = 12; /* start + len */
  257. enum mig_rp_message_type msg_type;
  258. *(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
  259. *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
  260. if (rbname) {
  261. int rbname_len = strlen(rbname);
  262. assert(rbname_len < 256);
  263. bufc[msglen++] = rbname_len;
  264. memcpy(bufc + msglen, rbname, rbname_len);
  265. msglen += rbname_len;
  266. msg_type = MIG_RP_MSG_REQ_PAGES_ID;
  267. } else {
  268. msg_type = MIG_RP_MSG_REQ_PAGES;
  269. }
  270. return migrate_send_rp_message(mis, msg_type, msglen, bufc);
  271. }
  272. static bool migration_colo_enabled;
  273. bool migration_incoming_colo_enabled(void)
  274. {
  275. return migration_colo_enabled;
  276. }
  277. void migration_incoming_disable_colo(void)
  278. {
  279. migration_colo_enabled = false;
  280. }
  281. void migration_incoming_enable_colo(void)
  282. {
  283. migration_colo_enabled = true;
  284. }
  285. void migrate_add_address(SocketAddress *address)
  286. {
  287. MigrationIncomingState *mis = migration_incoming_get_current();
  288. SocketAddressList *addrs;
  289. addrs = g_new0(SocketAddressList, 1);
  290. addrs->next = mis->socket_address_list;
  291. mis->socket_address_list = addrs;
  292. addrs->value = QAPI_CLONE(SocketAddress, address);
  293. }
  294. void qemu_start_incoming_migration(const char *uri, Error **errp)
  295. {
  296. const char *p;
  297. qapi_event_send_migration(MIGRATION_STATUS_SETUP);
  298. if (!strcmp(uri, "defer")) {
  299. deferred_incoming_migration(errp);
  300. } else if (strstart(uri, "tcp:", &p)) {
  301. tcp_start_incoming_migration(p, errp);
  302. #ifdef CONFIG_RDMA
  303. } else if (strstart(uri, "rdma:", &p)) {
  304. rdma_start_incoming_migration(p, errp);
  305. #endif
  306. } else if (strstart(uri, "exec:", &p)) {
  307. exec_start_incoming_migration(p, errp);
  308. } else if (strstart(uri, "unix:", &p)) {
  309. unix_start_incoming_migration(p, errp);
  310. } else if (strstart(uri, "fd:", &p)) {
  311. fd_start_incoming_migration(p, errp);
  312. } else {
  313. error_setg(errp, "unknown migration protocol: %s", uri);
  314. }
  315. }
  316. static void process_incoming_migration_bh(void *opaque)
  317. {
  318. Error *local_err = NULL;
  319. MigrationIncomingState *mis = opaque;
  320. /* If capability late_block_activate is set:
  321. * Only fire up the block code now if we're going to restart the
  322. * VM, else 'cont' will do it.
  323. * This causes file locking to happen; so we don't want it to happen
  324. * unless we really are starting the VM.
  325. */
  326. if (!migrate_late_block_activate() ||
  327. (autostart && (!global_state_received() ||
  328. global_state_get_runstate() == RUN_STATE_RUNNING))) {
  329. /* Make sure all file formats flush their mutable metadata.
  330. * If we get an error here, just don't restart the VM yet. */
  331. bdrv_invalidate_cache_all(&local_err);
  332. if (local_err) {
  333. error_report_err(local_err);
  334. local_err = NULL;
  335. autostart = false;
  336. }
  337. }
  338. /*
  339. * This must happen after all error conditions are dealt with and
  340. * we're sure the VM is going to be running on this host.
  341. */
  342. qemu_announce_self(&mis->announce_timer, migrate_announce_params());
  343. if (multifd_load_cleanup(&local_err) != 0) {
  344. error_report_err(local_err);
  345. autostart = false;
  346. }
  347. /* If global state section was not received or we are in running
  348. state, we need to obey autostart. Any other state is set with
  349. runstate_set. */
  350. dirty_bitmap_mig_before_vm_start();
  351. if (!global_state_received() ||
  352. global_state_get_runstate() == RUN_STATE_RUNNING) {
  353. if (autostart) {
  354. vm_start();
  355. } else {
  356. runstate_set(RUN_STATE_PAUSED);
  357. }
  358. } else if (migration_incoming_colo_enabled()) {
  359. migration_incoming_disable_colo();
  360. vm_start();
  361. } else {
  362. runstate_set(global_state_get_runstate());
  363. }
  364. /*
  365. * This must happen after any state changes since as soon as an external
  366. * observer sees this event they might start to prod at the VM assuming
  367. * it's ready to use.
  368. */
  369. migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
  370. MIGRATION_STATUS_COMPLETED);
  371. qemu_bh_delete(mis->bh);
  372. migration_incoming_state_destroy();
  373. }
  374. static void process_incoming_migration_co(void *opaque)
  375. {
  376. MigrationIncomingState *mis = migration_incoming_get_current();
  377. PostcopyState ps;
  378. int ret;
  379. Error *local_err = NULL;
  380. assert(mis->from_src_file);
  381. mis->migration_incoming_co = qemu_coroutine_self();
  382. mis->largest_page_size = qemu_ram_pagesize_largest();
  383. postcopy_state_set(POSTCOPY_INCOMING_NONE);
  384. migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
  385. MIGRATION_STATUS_ACTIVE);
  386. ret = qemu_loadvm_state(mis->from_src_file);
  387. ps = postcopy_state_get();
  388. trace_process_incoming_migration_co_end(ret, ps);
  389. if (ps != POSTCOPY_INCOMING_NONE) {
  390. if (ps == POSTCOPY_INCOMING_ADVISE) {
  391. /*
  392. * Where a migration had postcopy enabled (and thus went to advise)
  393. * but managed to complete within the precopy period, we can use
  394. * the normal exit.
  395. */
  396. postcopy_ram_incoming_cleanup(mis);
  397. } else if (ret >= 0) {
  398. /*
  399. * Postcopy was started, cleanup should happen at the end of the
  400. * postcopy thread.
  401. */
  402. trace_process_incoming_migration_co_postcopy_end_main();
  403. return;
  404. }
  405. /* Else if something went wrong then just fall out of the normal exit */
  406. }
  407. /* we get COLO info, and know if we are in COLO mode */
  408. if (!ret && migration_incoming_colo_enabled()) {
  409. /* Make sure all file formats flush their mutable metadata */
  410. bdrv_invalidate_cache_all(&local_err);
  411. if (local_err) {
  412. error_report_err(local_err);
  413. goto fail;
  414. }
  415. if (colo_init_ram_cache() < 0) {
  416. error_report("Init ram cache failed");
  417. goto fail;
  418. }
  419. qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
  420. colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
  421. mis->have_colo_incoming_thread = true;
  422. qemu_coroutine_yield();
  423. /* Wait checkpoint incoming thread exit before free resource */
  424. qemu_thread_join(&mis->colo_incoming_thread);
  425. /* We hold the global iothread lock, so it is safe here */
  426. colo_release_ram_cache();
  427. }
  428. if (ret < 0) {
  429. error_report("load of migration failed: %s", strerror(-ret));
  430. goto fail;
  431. }
  432. mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
  433. qemu_bh_schedule(mis->bh);
  434. mis->migration_incoming_co = NULL;
  435. return;
  436. fail:
  437. local_err = NULL;
  438. migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
  439. MIGRATION_STATUS_FAILED);
  440. qemu_fclose(mis->from_src_file);
  441. if (multifd_load_cleanup(&local_err) != 0) {
  442. error_report_err(local_err);
  443. }
  444. exit(EXIT_FAILURE);
  445. }
  446. static void migration_incoming_setup(QEMUFile *f)
  447. {
  448. MigrationIncomingState *mis = migration_incoming_get_current();
  449. if (multifd_load_setup() != 0) {
  450. /* We haven't been able to create multifd threads
  451. nothing better to do */
  452. exit(EXIT_FAILURE);
  453. }
  454. if (!mis->from_src_file) {
  455. mis->from_src_file = f;
  456. }
  457. qemu_file_set_blocking(f, false);
  458. }
  459. void migration_incoming_process(void)
  460. {
  461. Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL);
  462. qemu_coroutine_enter(co);
  463. }
  464. /* Returns true if recovered from a paused migration, otherwise false */
  465. static bool postcopy_try_recover(QEMUFile *f)
  466. {
  467. MigrationIncomingState *mis = migration_incoming_get_current();
  468. if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
  469. /* Resumed from a paused postcopy migration */
  470. mis->from_src_file = f;
  471. /* Postcopy has standalone thread to do vm load */
  472. qemu_file_set_blocking(f, true);
  473. /* Re-configure the return path */
  474. mis->to_src_file = qemu_file_get_return_path(f);
  475. migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
  476. MIGRATION_STATUS_POSTCOPY_RECOVER);
  477. /*
  478. * Here, we only wake up the main loading thread (while the
  479. * fault thread will still be waiting), so that we can receive
  480. * commands from source now, and answer it if needed. The
  481. * fault thread will be woken up afterwards until we are sure
  482. * that source is ready to reply to page requests.
  483. */
  484. qemu_sem_post(&mis->postcopy_pause_sem_dst);
  485. return true;
  486. }
  487. return false;
  488. }
  489. void migration_fd_process_incoming(QEMUFile *f)
  490. {
  491. if (postcopy_try_recover(f)) {
  492. return;
  493. }
  494. migration_incoming_setup(f);
  495. migration_incoming_process();
  496. }
  497. void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
  498. {
  499. MigrationIncomingState *mis = migration_incoming_get_current();
  500. bool start_migration;
  501. if (!mis->from_src_file) {
  502. /* The first connection (multifd may have multiple) */
  503. QEMUFile *f = qemu_fopen_channel_input(ioc);
  504. /* If it's a recovery, we're done */
  505. if (postcopy_try_recover(f)) {
  506. return;
  507. }
  508. migration_incoming_setup(f);
  509. /*
  510. * Common migration only needs one channel, so we can start
  511. * right now. Multifd needs more than one channel, we wait.
  512. */
  513. start_migration = !migrate_use_multifd();
  514. } else {
  515. Error *local_err = NULL;
  516. /* Multiple connections */
  517. assert(migrate_use_multifd());
  518. start_migration = multifd_recv_new_channel(ioc, &local_err);
  519. if (local_err) {
  520. error_propagate(errp, local_err);
  521. return;
  522. }
  523. }
  524. if (start_migration) {
  525. migration_incoming_process();
  526. }
  527. }
  528. /**
  529. * @migration_has_all_channels: We have received all channels that we need
  530. *
  531. * Returns true when we have got connections to all the channels that
  532. * we need for migration.
  533. */
  534. bool migration_has_all_channels(void)
  535. {
  536. MigrationIncomingState *mis = migration_incoming_get_current();
  537. bool all_channels;
  538. all_channels = multifd_recv_all_channels_created();
  539. return all_channels && mis->from_src_file != NULL;
  540. }
  541. /*
  542. * Send a 'SHUT' message on the return channel with the given value
  543. * to indicate that we've finished with the RP. Non-0 value indicates
  544. * error.
  545. */
  546. void migrate_send_rp_shut(MigrationIncomingState *mis,
  547. uint32_t value)
  548. {
  549. uint32_t buf;
  550. buf = cpu_to_be32(value);
  551. migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
  552. }
  553. /*
  554. * Send a 'PONG' message on the return channel with the given value
  555. * (normally in response to a 'PING')
  556. */
  557. void migrate_send_rp_pong(MigrationIncomingState *mis,
  558. uint32_t value)
  559. {
  560. uint32_t buf;
  561. buf = cpu_to_be32(value);
  562. migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
  563. }
  564. void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis,
  565. char *block_name)
  566. {
  567. char buf[512];
  568. int len;
  569. int64_t res;
  570. /*
  571. * First, we send the header part. It contains only the len of
  572. * idstr, and the idstr itself.
  573. */
  574. len = strlen(block_name);
  575. buf[0] = len;
  576. memcpy(buf + 1, block_name, len);
  577. if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
  578. error_report("%s: MSG_RP_RECV_BITMAP only used for recovery",
  579. __func__);
  580. return;
  581. }
  582. migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf);
  583. /*
  584. * Next, we dump the received bitmap to the stream.
  585. *
  586. * TODO: currently we are safe since we are the only one that is
  587. * using the to_src_file handle (fault thread is still paused),
  588. * and it's ok even not taking the mutex. However the best way is
  589. * to take the lock before sending the message header, and release
  590. * the lock after sending the bitmap.
  591. */
  592. qemu_mutex_lock(&mis->rp_mutex);
  593. res = ramblock_recv_bitmap_send(mis->to_src_file, block_name);
  594. qemu_mutex_unlock(&mis->rp_mutex);
  595. trace_migrate_send_rp_recv_bitmap(block_name, res);
  596. }
  597. void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value)
  598. {
  599. uint32_t buf;
  600. buf = cpu_to_be32(value);
  601. migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf);
  602. }
  603. MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
  604. {
  605. MigrationCapabilityStatusList *head = NULL;
  606. MigrationCapabilityStatusList *caps;
  607. MigrationState *s = migrate_get_current();
  608. int i;
  609. caps = NULL; /* silence compiler warning */
  610. for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  611. #ifndef CONFIG_LIVE_BLOCK_MIGRATION
  612. if (i == MIGRATION_CAPABILITY_BLOCK) {
  613. continue;
  614. }
  615. #endif
  616. if (head == NULL) {
  617. head = g_malloc0(sizeof(*caps));
  618. caps = head;
  619. } else {
  620. caps->next = g_malloc0(sizeof(*caps));
  621. caps = caps->next;
  622. }
  623. caps->value =
  624. g_malloc(sizeof(*caps->value));
  625. caps->value->capability = i;
  626. caps->value->state = s->enabled_capabilities[i];
  627. }
  628. return head;
  629. }
  630. MigrationParameters *qmp_query_migrate_parameters(Error **errp)
  631. {
  632. MigrationParameters *params;
  633. MigrationState *s = migrate_get_current();
  634. /* TODO use QAPI_CLONE() instead of duplicating it inline */
  635. params = g_malloc0(sizeof(*params));
  636. params->has_compress_level = true;
  637. params->compress_level = s->parameters.compress_level;
  638. params->has_compress_threads = true;
  639. params->compress_threads = s->parameters.compress_threads;
  640. params->has_compress_wait_thread = true;
  641. params->compress_wait_thread = s->parameters.compress_wait_thread;
  642. params->has_decompress_threads = true;
  643. params->decompress_threads = s->parameters.decompress_threads;
  644. params->has_cpu_throttle_initial = true;
  645. params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
  646. params->has_cpu_throttle_increment = true;
  647. params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
  648. params->has_tls_creds = true;
  649. params->tls_creds = g_strdup(s->parameters.tls_creds);
  650. params->has_tls_hostname = true;
  651. params->tls_hostname = g_strdup(s->parameters.tls_hostname);
  652. params->has_tls_authz = true;
  653. params->tls_authz = g_strdup(s->parameters.tls_authz);
  654. params->has_max_bandwidth = true;
  655. params->max_bandwidth = s->parameters.max_bandwidth;
  656. params->has_downtime_limit = true;
  657. params->downtime_limit = s->parameters.downtime_limit;
  658. params->has_x_checkpoint_delay = true;
  659. params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
  660. params->has_block_incremental = true;
  661. params->block_incremental = s->parameters.block_incremental;
  662. params->has_multifd_channels = true;
  663. params->multifd_channels = s->parameters.multifd_channels;
  664. params->has_xbzrle_cache_size = true;
  665. params->xbzrle_cache_size = s->parameters.xbzrle_cache_size;
  666. params->has_max_postcopy_bandwidth = true;
  667. params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth;
  668. params->has_max_cpu_throttle = true;
  669. params->max_cpu_throttle = s->parameters.max_cpu_throttle;
  670. params->has_announce_initial = true;
  671. params->announce_initial = s->parameters.announce_initial;
  672. params->has_announce_max = true;
  673. params->announce_max = s->parameters.announce_max;
  674. params->has_announce_rounds = true;
  675. params->announce_rounds = s->parameters.announce_rounds;
  676. params->has_announce_step = true;
  677. params->announce_step = s->parameters.announce_step;
  678. return params;
  679. }
  680. AnnounceParameters *migrate_announce_params(void)
  681. {
  682. static AnnounceParameters ap;
  683. MigrationState *s = migrate_get_current();
  684. ap.initial = s->parameters.announce_initial;
  685. ap.max = s->parameters.announce_max;
  686. ap.rounds = s->parameters.announce_rounds;
  687. ap.step = s->parameters.announce_step;
  688. return &ap;
  689. }
  690. /*
  691. * Return true if we're already in the middle of a migration
  692. * (i.e. any of the active or setup states)
  693. */
  694. bool migration_is_setup_or_active(int state)
  695. {
  696. switch (state) {
  697. case MIGRATION_STATUS_ACTIVE:
  698. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  699. case MIGRATION_STATUS_POSTCOPY_PAUSED:
  700. case MIGRATION_STATUS_POSTCOPY_RECOVER:
  701. case MIGRATION_STATUS_SETUP:
  702. case MIGRATION_STATUS_PRE_SWITCHOVER:
  703. case MIGRATION_STATUS_DEVICE:
  704. return true;
  705. default:
  706. return false;
  707. }
  708. }
  709. static void populate_ram_info(MigrationInfo *info, MigrationState *s)
  710. {
  711. info->has_ram = true;
  712. info->ram = g_malloc0(sizeof(*info->ram));
  713. info->ram->transferred = ram_counters.transferred;
  714. info->ram->total = ram_bytes_total();
  715. info->ram->duplicate = ram_counters.duplicate;
  716. /* legacy value. It is not used anymore */
  717. info->ram->skipped = 0;
  718. info->ram->normal = ram_counters.normal;
  719. info->ram->normal_bytes = ram_counters.normal *
  720. qemu_target_page_size();
  721. info->ram->mbps = s->mbps;
  722. info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
  723. info->ram->postcopy_requests = ram_counters.postcopy_requests;
  724. info->ram->page_size = qemu_target_page_size();
  725. info->ram->multifd_bytes = ram_counters.multifd_bytes;
  726. info->ram->pages_per_second = s->pages_per_second;
  727. if (migrate_use_xbzrle()) {
  728. info->has_xbzrle_cache = true;
  729. info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
  730. info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
  731. info->xbzrle_cache->bytes = xbzrle_counters.bytes;
  732. info->xbzrle_cache->pages = xbzrle_counters.pages;
  733. info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss;
  734. info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate;
  735. info->xbzrle_cache->overflow = xbzrle_counters.overflow;
  736. }
  737. if (migrate_use_compression()) {
  738. info->has_compression = true;
  739. info->compression = g_malloc0(sizeof(*info->compression));
  740. info->compression->pages = compression_counters.pages;
  741. info->compression->busy = compression_counters.busy;
  742. info->compression->busy_rate = compression_counters.busy_rate;
  743. info->compression->compressed_size =
  744. compression_counters.compressed_size;
  745. info->compression->compression_rate =
  746. compression_counters.compression_rate;
  747. }
  748. if (cpu_throttle_active()) {
  749. info->has_cpu_throttle_percentage = true;
  750. info->cpu_throttle_percentage = cpu_throttle_get_percentage();
  751. }
  752. if (s->state != MIGRATION_STATUS_COMPLETED) {
  753. info->ram->remaining = ram_bytes_remaining();
  754. info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
  755. }
  756. }
  757. static void populate_disk_info(MigrationInfo *info)
  758. {
  759. if (blk_mig_active()) {
  760. info->has_disk = true;
  761. info->disk = g_malloc0(sizeof(*info->disk));
  762. info->disk->transferred = blk_mig_bytes_transferred();
  763. info->disk->remaining = blk_mig_bytes_remaining();
  764. info->disk->total = blk_mig_bytes_total();
  765. }
  766. }
  767. static void fill_source_migration_info(MigrationInfo *info)
  768. {
  769. MigrationState *s = migrate_get_current();
  770. switch (s->state) {
  771. case MIGRATION_STATUS_NONE:
  772. /* no migration has happened ever */
  773. /* do not overwrite destination migration status */
  774. return;
  775. break;
  776. case MIGRATION_STATUS_SETUP:
  777. info->has_status = true;
  778. info->has_total_time = false;
  779. break;
  780. case MIGRATION_STATUS_ACTIVE:
  781. case MIGRATION_STATUS_CANCELLING:
  782. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  783. case MIGRATION_STATUS_PRE_SWITCHOVER:
  784. case MIGRATION_STATUS_DEVICE:
  785. case MIGRATION_STATUS_POSTCOPY_PAUSED:
  786. case MIGRATION_STATUS_POSTCOPY_RECOVER:
  787. /* TODO add some postcopy stats */
  788. info->has_status = true;
  789. info->has_total_time = true;
  790. info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
  791. - s->start_time;
  792. info->has_expected_downtime = true;
  793. info->expected_downtime = s->expected_downtime;
  794. info->has_setup_time = true;
  795. info->setup_time = s->setup_time;
  796. populate_ram_info(info, s);
  797. populate_disk_info(info);
  798. break;
  799. case MIGRATION_STATUS_COLO:
  800. info->has_status = true;
  801. /* TODO: display COLO specific information (checkpoint info etc.) */
  802. break;
  803. case MIGRATION_STATUS_COMPLETED:
  804. info->has_status = true;
  805. info->has_total_time = true;
  806. info->total_time = s->total_time;
  807. info->has_downtime = true;
  808. info->downtime = s->downtime;
  809. info->has_setup_time = true;
  810. info->setup_time = s->setup_time;
  811. populate_ram_info(info, s);
  812. break;
  813. case MIGRATION_STATUS_FAILED:
  814. info->has_status = true;
  815. if (s->error) {
  816. info->has_error_desc = true;
  817. info->error_desc = g_strdup(error_get_pretty(s->error));
  818. }
  819. break;
  820. case MIGRATION_STATUS_CANCELLED:
  821. info->has_status = true;
  822. break;
  823. }
  824. info->status = s->state;
  825. }
  826. /**
  827. * @migration_caps_check - check capability validity
  828. *
  829. * @cap_list: old capability list, array of bool
  830. * @params: new capabilities to be applied soon
  831. * @errp: set *errp if the check failed, with reason
  832. *
  833. * Returns true if check passed, otherwise false.
  834. */
  835. static bool migrate_caps_check(bool *cap_list,
  836. MigrationCapabilityStatusList *params,
  837. Error **errp)
  838. {
  839. MigrationCapabilityStatusList *cap;
  840. bool old_postcopy_cap;
  841. MigrationIncomingState *mis = migration_incoming_get_current();
  842. old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM];
  843. for (cap = params; cap; cap = cap->next) {
  844. cap_list[cap->value->capability] = cap->value->state;
  845. }
  846. #ifndef CONFIG_LIVE_BLOCK_MIGRATION
  847. if (cap_list[MIGRATION_CAPABILITY_BLOCK]) {
  848. error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) "
  849. "block migration");
  850. error_append_hint(errp, "Use drive_mirror+NBD instead.\n");
  851. return false;
  852. }
  853. #endif
  854. #ifndef CONFIG_REPLICATION
  855. if (cap_list[MIGRATION_CAPABILITY_X_COLO]) {
  856. error_setg(errp, "QEMU compiled without replication module"
  857. " can't enable COLO");
  858. error_append_hint(errp, "Please enable replication before COLO.\n");
  859. return false;
  860. }
  861. #endif
  862. if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
  863. if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
  864. /* The decompression threads asynchronously write into RAM
  865. * rather than use the atomic copies needed to avoid
  866. * userfaulting. It should be possible to fix the decompression
  867. * threads for compatibility in future.
  868. */
  869. error_setg(errp, "Postcopy is not currently compatible "
  870. "with compression");
  871. return false;
  872. }
  873. /* This check is reasonably expensive, so only when it's being
  874. * set the first time, also it's only the destination that needs
  875. * special support.
  876. */
  877. if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) &&
  878. !postcopy_ram_supported_by_host(mis)) {
  879. /* postcopy_ram_supported_by_host will have emitted a more
  880. * detailed message
  881. */
  882. error_setg(errp, "Postcopy is not supported");
  883. return false;
  884. }
  885. if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) {
  886. error_setg(errp, "Postcopy is not compatible with ignore-shared");
  887. return false;
  888. }
  889. }
  890. return true;
  891. }
  892. static void fill_destination_migration_info(MigrationInfo *info)
  893. {
  894. MigrationIncomingState *mis = migration_incoming_get_current();
  895. if (mis->socket_address_list) {
  896. info->has_socket_address = true;
  897. info->socket_address =
  898. QAPI_CLONE(SocketAddressList, mis->socket_address_list);
  899. }
  900. switch (mis->state) {
  901. case MIGRATION_STATUS_NONE:
  902. return;
  903. break;
  904. case MIGRATION_STATUS_SETUP:
  905. case MIGRATION_STATUS_CANCELLING:
  906. case MIGRATION_STATUS_CANCELLED:
  907. case MIGRATION_STATUS_ACTIVE:
  908. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  909. case MIGRATION_STATUS_POSTCOPY_PAUSED:
  910. case MIGRATION_STATUS_POSTCOPY_RECOVER:
  911. case MIGRATION_STATUS_FAILED:
  912. case MIGRATION_STATUS_COLO:
  913. info->has_status = true;
  914. break;
  915. case MIGRATION_STATUS_COMPLETED:
  916. info->has_status = true;
  917. fill_destination_postcopy_migration_info(info);
  918. break;
  919. }
  920. info->status = mis->state;
  921. }
  922. MigrationInfo *qmp_query_migrate(Error **errp)
  923. {
  924. MigrationInfo *info = g_malloc0(sizeof(*info));
  925. fill_destination_migration_info(info);
  926. fill_source_migration_info(info);
  927. return info;
  928. }
  929. void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
  930. Error **errp)
  931. {
  932. MigrationState *s = migrate_get_current();
  933. MigrationCapabilityStatusList *cap;
  934. bool cap_list[MIGRATION_CAPABILITY__MAX];
  935. if (migration_is_setup_or_active(s->state)) {
  936. error_setg(errp, QERR_MIGRATION_ACTIVE);
  937. return;
  938. }
  939. memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list));
  940. if (!migrate_caps_check(cap_list, params, errp)) {
  941. return;
  942. }
  943. for (cap = params; cap; cap = cap->next) {
  944. s->enabled_capabilities[cap->value->capability] = cap->value->state;
  945. }
  946. }
  947. /*
  948. * Check whether the parameters are valid. Error will be put into errp
  949. * (if provided). Return true if valid, otherwise false.
  950. */
  951. static bool migrate_params_check(MigrationParameters *params, Error **errp)
  952. {
  953. if (params->has_compress_level &&
  954. (params->compress_level > 9)) {
  955. error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
  956. "is invalid, it should be in the range of 0 to 9");
  957. return false;
  958. }
  959. if (params->has_compress_threads && (params->compress_threads < 1)) {
  960. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  961. "compress_threads",
  962. "is invalid, it should be in the range of 1 to 255");
  963. return false;
  964. }
  965. if (params->has_decompress_threads && (params->decompress_threads < 1)) {
  966. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  967. "decompress_threads",
  968. "is invalid, it should be in the range of 1 to 255");
  969. return false;
  970. }
  971. if (params->has_cpu_throttle_initial &&
  972. (params->cpu_throttle_initial < 1 ||
  973. params->cpu_throttle_initial > 99)) {
  974. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  975. "cpu_throttle_initial",
  976. "an integer in the range of 1 to 99");
  977. return false;
  978. }
  979. if (params->has_cpu_throttle_increment &&
  980. (params->cpu_throttle_increment < 1 ||
  981. params->cpu_throttle_increment > 99)) {
  982. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  983. "cpu_throttle_increment",
  984. "an integer in the range of 1 to 99");
  985. return false;
  986. }
  987. if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) {
  988. error_setg(errp, "Parameter 'max_bandwidth' expects an integer in the"
  989. " range of 0 to %zu bytes/second", SIZE_MAX);
  990. return false;
  991. }
  992. if (params->has_downtime_limit &&
  993. (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
  994. error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
  995. "the range of 0 to %d milliseconds",
  996. MAX_MIGRATE_DOWNTIME);
  997. return false;
  998. }
  999. /* x_checkpoint_delay is now always positive */
  1000. if (params->has_multifd_channels && (params->multifd_channels < 1)) {
  1001. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1002. "multifd_channels",
  1003. "is invalid, it should be in the range of 1 to 255");
  1004. return false;
  1005. }
  1006. if (params->has_xbzrle_cache_size &&
  1007. (params->xbzrle_cache_size < qemu_target_page_size() ||
  1008. !is_power_of_2(params->xbzrle_cache_size))) {
  1009. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1010. "xbzrle_cache_size",
  1011. "is invalid, it should be bigger than target page size"
  1012. " and a power of two");
  1013. return false;
  1014. }
  1015. if (params->has_max_cpu_throttle &&
  1016. (params->max_cpu_throttle < params->cpu_throttle_initial ||
  1017. params->max_cpu_throttle > 99)) {
  1018. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1019. "max_cpu_throttle",
  1020. "an integer in the range of cpu_throttle_initial to 99");
  1021. return false;
  1022. }
  1023. if (params->has_announce_initial &&
  1024. params->announce_initial > 100000) {
  1025. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1026. "announce_initial",
  1027. "is invalid, it must be less than 100000 ms");
  1028. return false;
  1029. }
  1030. if (params->has_announce_max &&
  1031. params->announce_max > 100000) {
  1032. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1033. "announce_max",
  1034. "is invalid, it must be less than 100000 ms");
  1035. return false;
  1036. }
  1037. if (params->has_announce_rounds &&
  1038. params->announce_rounds > 1000) {
  1039. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1040. "announce_rounds",
  1041. "is invalid, it must be in the range of 0 to 1000");
  1042. return false;
  1043. }
  1044. if (params->has_announce_step &&
  1045. (params->announce_step < 1 ||
  1046. params->announce_step > 10000)) {
  1047. error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
  1048. "announce_step",
  1049. "is invalid, it must be in the range of 1 to 10000 ms");
  1050. return false;
  1051. }
  1052. return true;
  1053. }
  1054. static void migrate_params_test_apply(MigrateSetParameters *params,
  1055. MigrationParameters *dest)
  1056. {
  1057. *dest = migrate_get_current()->parameters;
  1058. /* TODO use QAPI_CLONE() instead of duplicating it inline */
  1059. if (params->has_compress_level) {
  1060. dest->compress_level = params->compress_level;
  1061. }
  1062. if (params->has_compress_threads) {
  1063. dest->compress_threads = params->compress_threads;
  1064. }
  1065. if (params->has_compress_wait_thread) {
  1066. dest->compress_wait_thread = params->compress_wait_thread;
  1067. }
  1068. if (params->has_decompress_threads) {
  1069. dest->decompress_threads = params->decompress_threads;
  1070. }
  1071. if (params->has_cpu_throttle_initial) {
  1072. dest->cpu_throttle_initial = params->cpu_throttle_initial;
  1073. }
  1074. if (params->has_cpu_throttle_increment) {
  1075. dest->cpu_throttle_increment = params->cpu_throttle_increment;
  1076. }
  1077. if (params->has_tls_creds) {
  1078. assert(params->tls_creds->type == QTYPE_QSTRING);
  1079. dest->tls_creds = g_strdup(params->tls_creds->u.s);
  1080. }
  1081. if (params->has_tls_hostname) {
  1082. assert(params->tls_hostname->type == QTYPE_QSTRING);
  1083. dest->tls_hostname = g_strdup(params->tls_hostname->u.s);
  1084. }
  1085. if (params->has_max_bandwidth) {
  1086. dest->max_bandwidth = params->max_bandwidth;
  1087. }
  1088. if (params->has_downtime_limit) {
  1089. dest->downtime_limit = params->downtime_limit;
  1090. }
  1091. if (params->has_x_checkpoint_delay) {
  1092. dest->x_checkpoint_delay = params->x_checkpoint_delay;
  1093. }
  1094. if (params->has_block_incremental) {
  1095. dest->block_incremental = params->block_incremental;
  1096. }
  1097. if (params->has_multifd_channels) {
  1098. dest->multifd_channels = params->multifd_channels;
  1099. }
  1100. if (params->has_xbzrle_cache_size) {
  1101. dest->xbzrle_cache_size = params->xbzrle_cache_size;
  1102. }
  1103. if (params->has_max_postcopy_bandwidth) {
  1104. dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth;
  1105. }
  1106. if (params->has_max_cpu_throttle) {
  1107. dest->max_cpu_throttle = params->max_cpu_throttle;
  1108. }
  1109. if (params->has_announce_initial) {
  1110. dest->announce_initial = params->announce_initial;
  1111. }
  1112. if (params->has_announce_max) {
  1113. dest->announce_max = params->announce_max;
  1114. }
  1115. if (params->has_announce_rounds) {
  1116. dest->announce_rounds = params->announce_rounds;
  1117. }
  1118. if (params->has_announce_step) {
  1119. dest->announce_step = params->announce_step;
  1120. }
  1121. }
  1122. static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
  1123. {
  1124. MigrationState *s = migrate_get_current();
  1125. /* TODO use QAPI_CLONE() instead of duplicating it inline */
  1126. if (params->has_compress_level) {
  1127. s->parameters.compress_level = params->compress_level;
  1128. }
  1129. if (params->has_compress_threads) {
  1130. s->parameters.compress_threads = params->compress_threads;
  1131. }
  1132. if (params->has_compress_wait_thread) {
  1133. s->parameters.compress_wait_thread = params->compress_wait_thread;
  1134. }
  1135. if (params->has_decompress_threads) {
  1136. s->parameters.decompress_threads = params->decompress_threads;
  1137. }
  1138. if (params->has_cpu_throttle_initial) {
  1139. s->parameters.cpu_throttle_initial = params->cpu_throttle_initial;
  1140. }
  1141. if (params->has_cpu_throttle_increment) {
  1142. s->parameters.cpu_throttle_increment = params->cpu_throttle_increment;
  1143. }
  1144. if (params->has_tls_creds) {
  1145. g_free(s->parameters.tls_creds);
  1146. assert(params->tls_creds->type == QTYPE_QSTRING);
  1147. s->parameters.tls_creds = g_strdup(params->tls_creds->u.s);
  1148. }
  1149. if (params->has_tls_hostname) {
  1150. g_free(s->parameters.tls_hostname);
  1151. assert(params->tls_hostname->type == QTYPE_QSTRING);
  1152. s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s);
  1153. }
  1154. if (params->has_tls_authz) {
  1155. g_free(s->parameters.tls_authz);
  1156. assert(params->tls_authz->type == QTYPE_QSTRING);
  1157. s->parameters.tls_authz = g_strdup(params->tls_authz->u.s);
  1158. }
  1159. if (params->has_max_bandwidth) {
  1160. s->parameters.max_bandwidth = params->max_bandwidth;
  1161. if (s->to_dst_file && !migration_in_postcopy()) {
  1162. qemu_file_set_rate_limit(s->to_dst_file,
  1163. s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
  1164. }
  1165. }
  1166. if (params->has_downtime_limit) {
  1167. s->parameters.downtime_limit = params->downtime_limit;
  1168. }
  1169. if (params->has_x_checkpoint_delay) {
  1170. s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
  1171. if (migration_in_colo_state()) {
  1172. colo_checkpoint_notify(s);
  1173. }
  1174. }
  1175. if (params->has_block_incremental) {
  1176. s->parameters.block_incremental = params->block_incremental;
  1177. }
  1178. if (params->has_multifd_channels) {
  1179. s->parameters.multifd_channels = params->multifd_channels;
  1180. }
  1181. if (params->has_xbzrle_cache_size) {
  1182. s->parameters.xbzrle_cache_size = params->xbzrle_cache_size;
  1183. xbzrle_cache_resize(params->xbzrle_cache_size, errp);
  1184. }
  1185. if (params->has_max_postcopy_bandwidth) {
  1186. s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth;
  1187. if (s->to_dst_file && migration_in_postcopy()) {
  1188. qemu_file_set_rate_limit(s->to_dst_file,
  1189. s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO);
  1190. }
  1191. }
  1192. if (params->has_max_cpu_throttle) {
  1193. s->parameters.max_cpu_throttle = params->max_cpu_throttle;
  1194. }
  1195. if (params->has_announce_initial) {
  1196. s->parameters.announce_initial = params->announce_initial;
  1197. }
  1198. if (params->has_announce_max) {
  1199. s->parameters.announce_max = params->announce_max;
  1200. }
  1201. if (params->has_announce_rounds) {
  1202. s->parameters.announce_rounds = params->announce_rounds;
  1203. }
  1204. if (params->has_announce_step) {
  1205. s->parameters.announce_step = params->announce_step;
  1206. }
  1207. }
  1208. void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
  1209. {
  1210. MigrationParameters tmp;
  1211. /* TODO Rewrite "" to null instead */
  1212. if (params->has_tls_creds
  1213. && params->tls_creds->type == QTYPE_QNULL) {
  1214. qobject_unref(params->tls_creds->u.n);
  1215. params->tls_creds->type = QTYPE_QSTRING;
  1216. params->tls_creds->u.s = strdup("");
  1217. }
  1218. /* TODO Rewrite "" to null instead */
  1219. if (params->has_tls_hostname
  1220. && params->tls_hostname->type == QTYPE_QNULL) {
  1221. qobject_unref(params->tls_hostname->u.n);
  1222. params->tls_hostname->type = QTYPE_QSTRING;
  1223. params->tls_hostname->u.s = strdup("");
  1224. }
  1225. migrate_params_test_apply(params, &tmp);
  1226. if (!migrate_params_check(&tmp, errp)) {
  1227. /* Invalid parameter */
  1228. return;
  1229. }
  1230. migrate_params_apply(params, errp);
  1231. }
  1232. void qmp_migrate_start_postcopy(Error **errp)
  1233. {
  1234. MigrationState *s = migrate_get_current();
  1235. if (!migrate_postcopy()) {
  1236. error_setg(errp, "Enable postcopy with migrate_set_capability before"
  1237. " the start of migration");
  1238. return;
  1239. }
  1240. if (s->state == MIGRATION_STATUS_NONE) {
  1241. error_setg(errp, "Postcopy must be started after migration has been"
  1242. " started");
  1243. return;
  1244. }
  1245. /*
  1246. * we don't error if migration has finished since that would be racy
  1247. * with issuing this command.
  1248. */
  1249. atomic_set(&s->start_postcopy, true);
  1250. }
  1251. /* shared migration helpers */
  1252. void migrate_set_state(int *state, int old_state, int new_state)
  1253. {
  1254. assert(new_state < MIGRATION_STATUS__MAX);
  1255. if (atomic_cmpxchg(state, old_state, new_state) == old_state) {
  1256. trace_migrate_set_state(MigrationStatus_str(new_state));
  1257. migrate_generate_event(new_state);
  1258. }
  1259. }
  1260. static MigrationCapabilityStatusList *migrate_cap_add(
  1261. MigrationCapabilityStatusList *list,
  1262. MigrationCapability index,
  1263. bool state)
  1264. {
  1265. MigrationCapabilityStatusList *cap;
  1266. cap = g_new0(MigrationCapabilityStatusList, 1);
  1267. cap->value = g_new0(MigrationCapabilityStatus, 1);
  1268. cap->value->capability = index;
  1269. cap->value->state = state;
  1270. cap->next = list;
  1271. return cap;
  1272. }
  1273. void migrate_set_block_enabled(bool value, Error **errp)
  1274. {
  1275. MigrationCapabilityStatusList *cap;
  1276. cap = migrate_cap_add(NULL, MIGRATION_CAPABILITY_BLOCK, value);
  1277. qmp_migrate_set_capabilities(cap, errp);
  1278. qapi_free_MigrationCapabilityStatusList(cap);
  1279. }
  1280. static void migrate_set_block_incremental(MigrationState *s, bool value)
  1281. {
  1282. s->parameters.block_incremental = value;
  1283. }
  1284. static void block_cleanup_parameters(MigrationState *s)
  1285. {
  1286. if (s->must_remove_block_options) {
  1287. /* setting to false can never fail */
  1288. migrate_set_block_enabled(false, &error_abort);
  1289. migrate_set_block_incremental(s, false);
  1290. s->must_remove_block_options = false;
  1291. }
  1292. }
  1293. static void migrate_fd_cleanup(MigrationState *s)
  1294. {
  1295. qemu_bh_delete(s->cleanup_bh);
  1296. s->cleanup_bh = NULL;
  1297. qemu_savevm_state_cleanup();
  1298. if (s->to_dst_file) {
  1299. QEMUFile *tmp;
  1300. trace_migrate_fd_cleanup();
  1301. qemu_mutex_unlock_iothread();
  1302. if (s->migration_thread_running) {
  1303. qemu_thread_join(&s->thread);
  1304. s->migration_thread_running = false;
  1305. }
  1306. qemu_mutex_lock_iothread();
  1307. multifd_save_cleanup();
  1308. qemu_mutex_lock(&s->qemu_file_lock);
  1309. tmp = s->to_dst_file;
  1310. s->to_dst_file = NULL;
  1311. qemu_mutex_unlock(&s->qemu_file_lock);
  1312. /*
  1313. * Close the file handle without the lock to make sure the
  1314. * critical section won't block for long.
  1315. */
  1316. qemu_fclose(tmp);
  1317. }
  1318. assert((s->state != MIGRATION_STATUS_ACTIVE) &&
  1319. (s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE));
  1320. if (s->state == MIGRATION_STATUS_CANCELLING) {
  1321. migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
  1322. MIGRATION_STATUS_CANCELLED);
  1323. }
  1324. if (s->error) {
  1325. /* It is used on info migrate. We can't free it */
  1326. error_report_err(error_copy(s->error));
  1327. }
  1328. notifier_list_notify(&migration_state_notifiers, s);
  1329. block_cleanup_parameters(s);
  1330. }
  1331. static void migrate_fd_cleanup_schedule(MigrationState *s)
  1332. {
  1333. /*
  1334. * Ref the state for bh, because it may be called when
  1335. * there're already no other refs
  1336. */
  1337. object_ref(OBJECT(s));
  1338. qemu_bh_schedule(s->cleanup_bh);
  1339. }
  1340. static void migrate_fd_cleanup_bh(void *opaque)
  1341. {
  1342. MigrationState *s = opaque;
  1343. migrate_fd_cleanup(s);
  1344. object_unref(OBJECT(s));
  1345. }
  1346. void migrate_set_error(MigrationState *s, const Error *error)
  1347. {
  1348. qemu_mutex_lock(&s->error_mutex);
  1349. if (!s->error) {
  1350. s->error = error_copy(error);
  1351. }
  1352. qemu_mutex_unlock(&s->error_mutex);
  1353. }
  1354. void migrate_fd_error(MigrationState *s, const Error *error)
  1355. {
  1356. trace_migrate_fd_error(error_get_pretty(error));
  1357. assert(s->to_dst_file == NULL);
  1358. migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
  1359. MIGRATION_STATUS_FAILED);
  1360. migrate_set_error(s, error);
  1361. }
  1362. static void migrate_fd_cancel(MigrationState *s)
  1363. {
  1364. int old_state ;
  1365. QEMUFile *f = migrate_get_current()->to_dst_file;
  1366. trace_migrate_fd_cancel();
  1367. if (s->rp_state.from_dst_file) {
  1368. /* shutdown the rp socket, so causing the rp thread to shutdown */
  1369. qemu_file_shutdown(s->rp_state.from_dst_file);
  1370. }
  1371. do {
  1372. old_state = s->state;
  1373. if (!migration_is_setup_or_active(old_state)) {
  1374. break;
  1375. }
  1376. /* If the migration is paused, kick it out of the pause */
  1377. if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) {
  1378. qemu_sem_post(&s->pause_sem);
  1379. }
  1380. migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
  1381. } while (s->state != MIGRATION_STATUS_CANCELLING);
  1382. /*
  1383. * If we're unlucky the migration code might be stuck somewhere in a
  1384. * send/write while the network has failed and is waiting to timeout;
  1385. * if we've got shutdown(2) available then we can force it to quit.
  1386. * The outgoing qemu file gets closed in migrate_fd_cleanup that is
  1387. * called in a bh, so there is no race against this cancel.
  1388. */
  1389. if (s->state == MIGRATION_STATUS_CANCELLING && f) {
  1390. qemu_file_shutdown(f);
  1391. }
  1392. if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
  1393. Error *local_err = NULL;
  1394. bdrv_invalidate_cache_all(&local_err);
  1395. if (local_err) {
  1396. error_report_err(local_err);
  1397. } else {
  1398. s->block_inactive = false;
  1399. }
  1400. }
  1401. }
  1402. void add_migration_state_change_notifier(Notifier *notify)
  1403. {
  1404. notifier_list_add(&migration_state_notifiers, notify);
  1405. }
  1406. void remove_migration_state_change_notifier(Notifier *notify)
  1407. {
  1408. notifier_remove(notify);
  1409. }
  1410. bool migration_in_setup(MigrationState *s)
  1411. {
  1412. return s->state == MIGRATION_STATUS_SETUP;
  1413. }
  1414. bool migration_has_finished(MigrationState *s)
  1415. {
  1416. return s->state == MIGRATION_STATUS_COMPLETED;
  1417. }
  1418. bool migration_has_failed(MigrationState *s)
  1419. {
  1420. return (s->state == MIGRATION_STATUS_CANCELLED ||
  1421. s->state == MIGRATION_STATUS_FAILED);
  1422. }
  1423. bool migration_in_postcopy(void)
  1424. {
  1425. MigrationState *s = migrate_get_current();
  1426. return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
  1427. }
  1428. bool migration_in_postcopy_after_devices(MigrationState *s)
  1429. {
  1430. return migration_in_postcopy() && s->postcopy_after_devices;
  1431. }
  1432. bool migration_is_idle(void)
  1433. {
  1434. MigrationState *s = current_migration;
  1435. if (!s) {
  1436. return true;
  1437. }
  1438. switch (s->state) {
  1439. case MIGRATION_STATUS_NONE:
  1440. case MIGRATION_STATUS_CANCELLED:
  1441. case MIGRATION_STATUS_COMPLETED:
  1442. case MIGRATION_STATUS_FAILED:
  1443. return true;
  1444. case MIGRATION_STATUS_SETUP:
  1445. case MIGRATION_STATUS_CANCELLING:
  1446. case MIGRATION_STATUS_ACTIVE:
  1447. case MIGRATION_STATUS_POSTCOPY_ACTIVE:
  1448. case MIGRATION_STATUS_COLO:
  1449. case MIGRATION_STATUS_PRE_SWITCHOVER:
  1450. case MIGRATION_STATUS_DEVICE:
  1451. return false;
  1452. case MIGRATION_STATUS__MAX:
  1453. g_assert_not_reached();
  1454. }
  1455. return false;
  1456. }
  1457. void migrate_init(MigrationState *s)
  1458. {
  1459. /*
  1460. * Reinitialise all migration state, except
  1461. * parameters/capabilities that the user set, and
  1462. * locks.
  1463. */
  1464. s->bytes_xfer = 0;
  1465. s->cleanup_bh = 0;
  1466. s->to_dst_file = NULL;
  1467. s->state = MIGRATION_STATUS_NONE;
  1468. s->rp_state.from_dst_file = NULL;
  1469. s->rp_state.error = false;
  1470. s->mbps = 0.0;
  1471. s->pages_per_second = 0.0;
  1472. s->downtime = 0;
  1473. s->expected_downtime = 0;
  1474. s->setup_time = 0;
  1475. s->start_postcopy = false;
  1476. s->postcopy_after_devices = false;
  1477. s->migration_thread_running = false;
  1478. error_free(s->error);
  1479. s->error = NULL;
  1480. migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
  1481. s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  1482. s->total_time = 0;
  1483. s->vm_was_running = false;
  1484. s->iteration_initial_bytes = 0;
  1485. s->threshold_size = 0;
  1486. }
  1487. static GSList *migration_blockers;
  1488. int migrate_add_blocker(Error *reason, Error **errp)
  1489. {
  1490. if (only_migratable) {
  1491. error_propagate_prepend(errp, error_copy(reason),
  1492. "disallowing migration blocker "
  1493. "(--only-migratable) for: ");
  1494. return -EACCES;
  1495. }
  1496. if (migration_is_idle()) {
  1497. migration_blockers = g_slist_prepend(migration_blockers, reason);
  1498. return 0;
  1499. }
  1500. error_propagate_prepend(errp, error_copy(reason),
  1501. "disallowing migration blocker "
  1502. "(migration in progress) for: ");
  1503. return -EBUSY;
  1504. }
  1505. void migrate_del_blocker(Error *reason)
  1506. {
  1507. migration_blockers = g_slist_remove(migration_blockers, reason);
  1508. }
  1509. void qmp_migrate_incoming(const char *uri, Error **errp)
  1510. {
  1511. Error *local_err = NULL;
  1512. static bool once = true;
  1513. if (!deferred_incoming) {
  1514. error_setg(errp, "For use with '-incoming defer'");
  1515. return;
  1516. }
  1517. if (!once) {
  1518. error_setg(errp, "The incoming migration has already been started");
  1519. }
  1520. qemu_start_incoming_migration(uri, &local_err);
  1521. if (local_err) {
  1522. error_propagate(errp, local_err);
  1523. return;
  1524. }
  1525. once = false;
  1526. }
  1527. void qmp_migrate_recover(const char *uri, Error **errp)
  1528. {
  1529. MigrationIncomingState *mis = migration_incoming_get_current();
  1530. if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
  1531. error_setg(errp, "Migrate recover can only be run "
  1532. "when postcopy is paused.");
  1533. return;
  1534. }
  1535. if (atomic_cmpxchg(&mis->postcopy_recover_triggered,
  1536. false, true) == true) {
  1537. error_setg(errp, "Migrate recovery is triggered already");
  1538. return;
  1539. }
  1540. /*
  1541. * Note that this call will never start a real migration; it will
  1542. * only re-setup the migration stream and poke existing migration
  1543. * to continue using that newly established channel.
  1544. */
  1545. qemu_start_incoming_migration(uri, errp);
  1546. }
  1547. void qmp_migrate_pause(Error **errp)
  1548. {
  1549. MigrationState *ms = migrate_get_current();
  1550. MigrationIncomingState *mis = migration_incoming_get_current();
  1551. int ret;
  1552. if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
  1553. /* Source side, during postcopy */
  1554. qemu_mutex_lock(&ms->qemu_file_lock);
  1555. ret = qemu_file_shutdown(ms->to_dst_file);
  1556. qemu_mutex_unlock(&ms->qemu_file_lock);
  1557. if (ret) {
  1558. error_setg(errp, "Failed to pause source migration");
  1559. }
  1560. return;
  1561. }
  1562. if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
  1563. ret = qemu_file_shutdown(mis->from_src_file);
  1564. if (ret) {
  1565. error_setg(errp, "Failed to pause destination migration");
  1566. }
  1567. return;
  1568. }
  1569. error_setg(errp, "migrate-pause is currently only supported "
  1570. "during postcopy-active state");
  1571. }
  1572. bool migration_is_blocked(Error **errp)
  1573. {
  1574. if (qemu_savevm_state_blocked(errp)) {
  1575. return true;
  1576. }
  1577. if (migration_blockers) {
  1578. error_propagate(errp, error_copy(migration_blockers->data));
  1579. return true;
  1580. }
  1581. return false;
  1582. }
  1583. /* Returns true if continue to migrate, or false if error detected */
  1584. static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
  1585. bool resume, Error **errp)
  1586. {
  1587. Error *local_err = NULL;
  1588. if (resume) {
  1589. if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
  1590. error_setg(errp, "Cannot resume if there is no "
  1591. "paused migration");
  1592. return false;
  1593. }
  1594. /*
  1595. * Postcopy recovery won't work well with release-ram
  1596. * capability since release-ram will drop the page buffer as
  1597. * long as the page is put into the send buffer. So if there
  1598. * is a network failure happened, any page buffers that have
  1599. * not yet reached the destination VM but have already been
  1600. * sent from the source VM will be lost forever. Let's refuse
  1601. * the client from resuming such a postcopy migration.
  1602. * Luckily release-ram was designed to only be used when src
  1603. * and destination VMs are on the same host, so it should be
  1604. * fine.
  1605. */
  1606. if (migrate_release_ram()) {
  1607. error_setg(errp, "Postcopy recovery cannot work "
  1608. "when release-ram capability is set");
  1609. return false;
  1610. }
  1611. /* This is a resume, skip init status */
  1612. return true;
  1613. }
  1614. if (migration_is_setup_or_active(s->state) ||
  1615. s->state == MIGRATION_STATUS_CANCELLING ||
  1616. s->state == MIGRATION_STATUS_COLO) {
  1617. error_setg(errp, QERR_MIGRATION_ACTIVE);
  1618. return false;
  1619. }
  1620. if (runstate_check(RUN_STATE_INMIGRATE)) {
  1621. error_setg(errp, "Guest is waiting for an incoming migration");
  1622. return false;
  1623. }
  1624. if (migration_is_blocked(errp)) {
  1625. return false;
  1626. }
  1627. if (blk || blk_inc) {
  1628. if (migrate_use_block() || migrate_use_block_incremental()) {
  1629. error_setg(errp, "Command options are incompatible with "
  1630. "current migration capabilities");
  1631. return false;
  1632. }
  1633. migrate_set_block_enabled(true, &local_err);
  1634. if (local_err) {
  1635. error_propagate(errp, local_err);
  1636. return false;
  1637. }
  1638. s->must_remove_block_options = true;
  1639. }
  1640. if (blk_inc) {
  1641. migrate_set_block_incremental(s, true);
  1642. }
  1643. migrate_init(s);
  1644. return true;
  1645. }
  1646. void qmp_migrate(const char *uri, bool has_blk, bool blk,
  1647. bool has_inc, bool inc, bool has_detach, bool detach,
  1648. bool has_resume, bool resume, Error **errp)
  1649. {
  1650. Error *local_err = NULL;
  1651. MigrationState *s = migrate_get_current();
  1652. const char *p;
  1653. if (!migrate_prepare(s, has_blk && blk, has_inc && inc,
  1654. has_resume && resume, errp)) {
  1655. /* Error detected, put into errp */
  1656. return;
  1657. }
  1658. if (strstart(uri, "tcp:", &p)) {
  1659. tcp_start_outgoing_migration(s, p, &local_err);
  1660. #ifdef CONFIG_RDMA
  1661. } else if (strstart(uri, "rdma:", &p)) {
  1662. rdma_start_outgoing_migration(s, p, &local_err);
  1663. #endif
  1664. } else if (strstart(uri, "exec:", &p)) {
  1665. exec_start_outgoing_migration(s, p, &local_err);
  1666. } else if (strstart(uri, "unix:", &p)) {
  1667. unix_start_outgoing_migration(s, p, &local_err);
  1668. } else if (strstart(uri, "fd:", &p)) {
  1669. fd_start_outgoing_migration(s, p, &local_err);
  1670. } else {
  1671. error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
  1672. "a valid migration protocol");
  1673. migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
  1674. MIGRATION_STATUS_FAILED);
  1675. block_cleanup_parameters(s);
  1676. return;
  1677. }
  1678. if (local_err) {
  1679. migrate_fd_error(s, local_err);
  1680. error_propagate(errp, local_err);
  1681. return;
  1682. }
  1683. }
  1684. void qmp_migrate_cancel(Error **errp)
  1685. {
  1686. migrate_fd_cancel(migrate_get_current());
  1687. }
  1688. void qmp_migrate_continue(MigrationStatus state, Error **errp)
  1689. {
  1690. MigrationState *s = migrate_get_current();
  1691. if (s->state != state) {
  1692. error_setg(errp, "Migration not in expected state: %s",
  1693. MigrationStatus_str(s->state));
  1694. return;
  1695. }
  1696. qemu_sem_post(&s->pause_sem);
  1697. }
  1698. void qmp_migrate_set_cache_size(int64_t value, Error **errp)
  1699. {
  1700. MigrateSetParameters p = {
  1701. .has_xbzrle_cache_size = true,
  1702. .xbzrle_cache_size = value,
  1703. };
  1704. qmp_migrate_set_parameters(&p, errp);
  1705. }
  1706. int64_t qmp_query_migrate_cache_size(Error **errp)
  1707. {
  1708. return migrate_xbzrle_cache_size();
  1709. }
  1710. void qmp_migrate_set_speed(int64_t value, Error **errp)
  1711. {
  1712. MigrateSetParameters p = {
  1713. .has_max_bandwidth = true,
  1714. .max_bandwidth = value,
  1715. };
  1716. qmp_migrate_set_parameters(&p, errp);
  1717. }
  1718. void qmp_migrate_set_downtime(double value, Error **errp)
  1719. {
  1720. if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) {
  1721. error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
  1722. "the range of 0 to %d seconds",
  1723. MAX_MIGRATE_DOWNTIME_SECONDS);
  1724. return;
  1725. }
  1726. value *= 1000; /* Convert to milliseconds */
  1727. value = MAX(0, MIN(INT64_MAX, value));
  1728. MigrateSetParameters p = {
  1729. .has_downtime_limit = true,
  1730. .downtime_limit = value,
  1731. };
  1732. qmp_migrate_set_parameters(&p, errp);
  1733. }
  1734. bool migrate_release_ram(void)
  1735. {
  1736. MigrationState *s;
  1737. s = migrate_get_current();
  1738. return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
  1739. }
  1740. bool migrate_postcopy_ram(void)
  1741. {
  1742. MigrationState *s;
  1743. s = migrate_get_current();
  1744. return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
  1745. }
  1746. bool migrate_postcopy(void)
  1747. {
  1748. return migrate_postcopy_ram() || migrate_dirty_bitmaps();
  1749. }
  1750. bool migrate_auto_converge(void)
  1751. {
  1752. MigrationState *s;
  1753. s = migrate_get_current();
  1754. return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
  1755. }
  1756. bool migrate_zero_blocks(void)
  1757. {
  1758. MigrationState *s;
  1759. s = migrate_get_current();
  1760. return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
  1761. }
  1762. bool migrate_postcopy_blocktime(void)
  1763. {
  1764. MigrationState *s;
  1765. s = migrate_get_current();
  1766. return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME];
  1767. }
  1768. bool migrate_use_compression(void)
  1769. {
  1770. MigrationState *s;
  1771. s = migrate_get_current();
  1772. return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
  1773. }
  1774. int migrate_compress_level(void)
  1775. {
  1776. MigrationState *s;
  1777. s = migrate_get_current();
  1778. return s->parameters.compress_level;
  1779. }
  1780. int migrate_compress_threads(void)
  1781. {
  1782. MigrationState *s;
  1783. s = migrate_get_current();
  1784. return s->parameters.compress_threads;
  1785. }
  1786. int migrate_compress_wait_thread(void)
  1787. {
  1788. MigrationState *s;
  1789. s = migrate_get_current();
  1790. return s->parameters.compress_wait_thread;
  1791. }
  1792. int migrate_decompress_threads(void)
  1793. {
  1794. MigrationState *s;
  1795. s = migrate_get_current();
  1796. return s->parameters.decompress_threads;
  1797. }
  1798. bool migrate_dirty_bitmaps(void)
  1799. {
  1800. MigrationState *s;
  1801. s = migrate_get_current();
  1802. return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
  1803. }
  1804. bool migrate_ignore_shared(void)
  1805. {
  1806. MigrationState *s;
  1807. s = migrate_get_current();
  1808. return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED];
  1809. }
  1810. bool migrate_use_events(void)
  1811. {
  1812. MigrationState *s;
  1813. s = migrate_get_current();
  1814. return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
  1815. }
  1816. bool migrate_use_multifd(void)
  1817. {
  1818. MigrationState *s;
  1819. s = migrate_get_current();
  1820. return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD];
  1821. }
  1822. bool migrate_pause_before_switchover(void)
  1823. {
  1824. MigrationState *s;
  1825. s = migrate_get_current();
  1826. return s->enabled_capabilities[
  1827. MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER];
  1828. }
  1829. int migrate_multifd_channels(void)
  1830. {
  1831. MigrationState *s;
  1832. s = migrate_get_current();
  1833. return s->parameters.multifd_channels;
  1834. }
  1835. int migrate_use_xbzrle(void)
  1836. {
  1837. MigrationState *s;
  1838. s = migrate_get_current();
  1839. return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
  1840. }
  1841. int64_t migrate_xbzrle_cache_size(void)
  1842. {
  1843. MigrationState *s;
  1844. s = migrate_get_current();
  1845. return s->parameters.xbzrle_cache_size;
  1846. }
  1847. static int64_t migrate_max_postcopy_bandwidth(void)
  1848. {
  1849. MigrationState *s;
  1850. s = migrate_get_current();
  1851. return s->parameters.max_postcopy_bandwidth;
  1852. }
  1853. bool migrate_use_block(void)
  1854. {
  1855. MigrationState *s;
  1856. s = migrate_get_current();
  1857. return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK];
  1858. }
  1859. bool migrate_use_return_path(void)
  1860. {
  1861. MigrationState *s;
  1862. s = migrate_get_current();
  1863. return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH];
  1864. }
  1865. bool migrate_use_block_incremental(void)
  1866. {
  1867. MigrationState *s;
  1868. s = migrate_get_current();
  1869. return s->parameters.block_incremental;
  1870. }
  1871. /* migration thread support */
  1872. /*
  1873. * Something bad happened to the RP stream, mark an error
  1874. * The caller shall print or trace something to indicate why
  1875. */
  1876. static void mark_source_rp_bad(MigrationState *s)
  1877. {
  1878. s->rp_state.error = true;
  1879. }
  1880. static struct rp_cmd_args {
  1881. ssize_t len; /* -1 = variable */
  1882. const char *name;
  1883. } rp_cmd_args[] = {
  1884. [MIG_RP_MSG_INVALID] = { .len = -1, .name = "INVALID" },
  1885. [MIG_RP_MSG_SHUT] = { .len = 4, .name = "SHUT" },
  1886. [MIG_RP_MSG_PONG] = { .len = 4, .name = "PONG" },
  1887. [MIG_RP_MSG_REQ_PAGES] = { .len = 12, .name = "REQ_PAGES" },
  1888. [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" },
  1889. [MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" },
  1890. [MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" },
  1891. [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" },
  1892. };
  1893. /*
  1894. * Process a request for pages received on the return path,
  1895. * We're allowed to send more than requested (e.g. to round to our page size)
  1896. * and we don't need to send pages that have already been sent.
  1897. */
  1898. static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
  1899. ram_addr_t start, size_t len)
  1900. {
  1901. long our_host_ps = getpagesize();
  1902. trace_migrate_handle_rp_req_pages(rbname, start, len);
  1903. /*
  1904. * Since we currently insist on matching page sizes, just sanity check
  1905. * we're being asked for whole host pages.
  1906. */
  1907. if (start & (our_host_ps-1) ||
  1908. (len & (our_host_ps-1))) {
  1909. error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
  1910. " len: %zd", __func__, start, len);
  1911. mark_source_rp_bad(ms);
  1912. return;
  1913. }
  1914. if (ram_save_queue_pages(rbname, start, len)) {
  1915. mark_source_rp_bad(ms);
  1916. }
  1917. }
  1918. /* Return true to retry, false to quit */
  1919. static bool postcopy_pause_return_path_thread(MigrationState *s)
  1920. {
  1921. trace_postcopy_pause_return_path();
  1922. qemu_sem_wait(&s->postcopy_pause_rp_sem);
  1923. trace_postcopy_pause_return_path_continued();
  1924. return true;
  1925. }
  1926. static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name)
  1927. {
  1928. RAMBlock *block = qemu_ram_block_by_name(block_name);
  1929. if (!block) {
  1930. error_report("%s: invalid block name '%s'", __func__, block_name);
  1931. return -EINVAL;
  1932. }
  1933. /* Fetch the received bitmap and refresh the dirty bitmap */
  1934. return ram_dirty_bitmap_reload(s, block);
  1935. }
  1936. static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value)
  1937. {
  1938. trace_source_return_path_thread_resume_ack(value);
  1939. if (value != MIGRATION_RESUME_ACK_VALUE) {
  1940. error_report("%s: illegal resume_ack value %"PRIu32,
  1941. __func__, value);
  1942. return -1;
  1943. }
  1944. /* Now both sides are active. */
  1945. migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
  1946. MIGRATION_STATUS_POSTCOPY_ACTIVE);
  1947. /* Notify send thread that time to continue send pages */
  1948. qemu_sem_post(&s->rp_state.rp_sem);
  1949. return 0;
  1950. }
  1951. /*
  1952. * Handles messages sent on the return path towards the source VM
  1953. *
  1954. */
  1955. static void *source_return_path_thread(void *opaque)
  1956. {
  1957. MigrationState *ms = opaque;
  1958. QEMUFile *rp = ms->rp_state.from_dst_file;
  1959. uint16_t header_len, header_type;
  1960. uint8_t buf[512];
  1961. uint32_t tmp32, sibling_error;
  1962. ram_addr_t start = 0; /* =0 to silence warning */
  1963. size_t len = 0, expected_len;
  1964. int res;
  1965. trace_source_return_path_thread_entry();
  1966. rcu_register_thread();
  1967. retry:
  1968. while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
  1969. migration_is_setup_or_active(ms->state)) {
  1970. trace_source_return_path_thread_loop_top();
  1971. header_type = qemu_get_be16(rp);
  1972. header_len = qemu_get_be16(rp);
  1973. if (qemu_file_get_error(rp)) {
  1974. mark_source_rp_bad(ms);
  1975. goto out;
  1976. }
  1977. if (header_type >= MIG_RP_MSG_MAX ||
  1978. header_type == MIG_RP_MSG_INVALID) {
  1979. error_report("RP: Received invalid message 0x%04x length 0x%04x",
  1980. header_type, header_len);
  1981. mark_source_rp_bad(ms);
  1982. goto out;
  1983. }
  1984. if ((rp_cmd_args[header_type].len != -1 &&
  1985. header_len != rp_cmd_args[header_type].len) ||
  1986. header_len > sizeof(buf)) {
  1987. error_report("RP: Received '%s' message (0x%04x) with"
  1988. "incorrect length %d expecting %zu",
  1989. rp_cmd_args[header_type].name, header_type, header_len,
  1990. (size_t)rp_cmd_args[header_type].len);
  1991. mark_source_rp_bad(ms);
  1992. goto out;
  1993. }
  1994. /* We know we've got a valid header by this point */
  1995. res = qemu_get_buffer(rp, buf, header_len);
  1996. if (res != header_len) {
  1997. error_report("RP: Failed reading data for message 0x%04x"
  1998. " read %d expected %d",
  1999. header_type, res, header_len);
  2000. mark_source_rp_bad(ms);
  2001. goto out;
  2002. }
  2003. /* OK, we have the message and the data */
  2004. switch (header_type) {
  2005. case MIG_RP_MSG_SHUT:
  2006. sibling_error = ldl_be_p(buf);
  2007. trace_source_return_path_thread_shut(sibling_error);
  2008. if (sibling_error) {
  2009. error_report("RP: Sibling indicated error %d", sibling_error);
  2010. mark_source_rp_bad(ms);
  2011. }
  2012. /*
  2013. * We'll let the main thread deal with closing the RP
  2014. * we could do a shutdown(2) on it, but we're the only user
  2015. * anyway, so there's nothing gained.
  2016. */
  2017. goto out;
  2018. case MIG_RP_MSG_PONG:
  2019. tmp32 = ldl_be_p(buf);
  2020. trace_source_return_path_thread_pong(tmp32);
  2021. break;
  2022. case MIG_RP_MSG_REQ_PAGES:
  2023. start = ldq_be_p(buf);
  2024. len = ldl_be_p(buf + 8);
  2025. migrate_handle_rp_req_pages(ms, NULL, start, len);
  2026. break;
  2027. case MIG_RP_MSG_REQ_PAGES_ID:
  2028. expected_len = 12 + 1; /* header + termination */
  2029. if (header_len >= expected_len) {
  2030. start = ldq_be_p(buf);
  2031. len = ldl_be_p(buf + 8);
  2032. /* Now we expect an idstr */
  2033. tmp32 = buf[12]; /* Length of the following idstr */
  2034. buf[13 + tmp32] = '\0';
  2035. expected_len += tmp32;
  2036. }
  2037. if (header_len != expected_len) {
  2038. error_report("RP: Req_Page_id with length %d expecting %zd",
  2039. header_len, expected_len);
  2040. mark_source_rp_bad(ms);
  2041. goto out;
  2042. }
  2043. migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
  2044. break;
  2045. case MIG_RP_MSG_RECV_BITMAP:
  2046. if (header_len < 1) {
  2047. error_report("%s: missing block name", __func__);
  2048. mark_source_rp_bad(ms);
  2049. goto out;
  2050. }
  2051. /* Format: len (1B) + idstr (<255B). This ends the idstr. */
  2052. buf[buf[0] + 1] = '\0';
  2053. if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) {
  2054. mark_source_rp_bad(ms);
  2055. goto out;
  2056. }
  2057. break;
  2058. case MIG_RP_MSG_RESUME_ACK:
  2059. tmp32 = ldl_be_p(buf);
  2060. if (migrate_handle_rp_resume_ack(ms, tmp32)) {
  2061. mark_source_rp_bad(ms);
  2062. goto out;
  2063. }
  2064. break;
  2065. default:
  2066. break;
  2067. }
  2068. }
  2069. out:
  2070. res = qemu_file_get_error(rp);
  2071. if (res) {
  2072. if (res == -EIO) {
  2073. /*
  2074. * Maybe there is something we can do: it looks like a
  2075. * network down issue, and we pause for a recovery.
  2076. */
  2077. if (postcopy_pause_return_path_thread(ms)) {
  2078. /* Reload rp, reset the rest */
  2079. if (rp != ms->rp_state.from_dst_file) {
  2080. qemu_fclose(rp);
  2081. rp = ms->rp_state.from_dst_file;
  2082. }
  2083. ms->rp_state.error = false;
  2084. goto retry;
  2085. }
  2086. }
  2087. trace_source_return_path_thread_bad_end();
  2088. mark_source_rp_bad(ms);
  2089. }
  2090. trace_source_return_path_thread_end();
  2091. ms->rp_state.from_dst_file = NULL;
  2092. qemu_fclose(rp);
  2093. rcu_unregister_thread();
  2094. return NULL;
  2095. }
  2096. static int open_return_path_on_source(MigrationState *ms,
  2097. bool create_thread)
  2098. {
  2099. ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
  2100. if (!ms->rp_state.from_dst_file) {
  2101. return -1;
  2102. }
  2103. trace_open_return_path_on_source();
  2104. if (!create_thread) {
  2105. /* We're done */
  2106. return 0;
  2107. }
  2108. qemu_thread_create(&ms->rp_state.rp_thread, "return path",
  2109. source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
  2110. trace_open_return_path_on_source_continue();
  2111. return 0;
  2112. }
  2113. /* Returns 0 if the RP was ok, otherwise there was an error on the RP */
  2114. static int await_return_path_close_on_source(MigrationState *ms)
  2115. {
  2116. /*
  2117. * If this is a normal exit then the destination will send a SHUT and the
  2118. * rp_thread will exit, however if there's an error we need to cause
  2119. * it to exit.
  2120. */
  2121. if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) {
  2122. /*
  2123. * shutdown(2), if we have it, will cause it to unblock if it's stuck
  2124. * waiting for the destination.
  2125. */
  2126. qemu_file_shutdown(ms->rp_state.from_dst_file);
  2127. mark_source_rp_bad(ms);
  2128. }
  2129. trace_await_return_path_close_on_source_joining();
  2130. qemu_thread_join(&ms->rp_state.rp_thread);
  2131. trace_await_return_path_close_on_source_close();
  2132. return ms->rp_state.error;
  2133. }
  2134. /*
  2135. * Switch from normal iteration to postcopy
  2136. * Returns non-0 on error
  2137. */
  2138. static int postcopy_start(MigrationState *ms)
  2139. {
  2140. int ret;
  2141. QIOChannelBuffer *bioc;
  2142. QEMUFile *fb;
  2143. int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  2144. int64_t bandwidth = migrate_max_postcopy_bandwidth();
  2145. bool restart_block = false;
  2146. int cur_state = MIGRATION_STATUS_ACTIVE;
  2147. if (!migrate_pause_before_switchover()) {
  2148. migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
  2149. MIGRATION_STATUS_POSTCOPY_ACTIVE);
  2150. }
  2151. trace_postcopy_start();
  2152. qemu_mutex_lock_iothread();
  2153. trace_postcopy_start_set_run();
  2154. qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
  2155. global_state_store();
  2156. ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
  2157. if (ret < 0) {
  2158. goto fail;
  2159. }
  2160. ret = migration_maybe_pause(ms, &cur_state,
  2161. MIGRATION_STATUS_POSTCOPY_ACTIVE);
  2162. if (ret < 0) {
  2163. goto fail;
  2164. }
  2165. ret = bdrv_inactivate_all();
  2166. if (ret < 0) {
  2167. goto fail;
  2168. }
  2169. restart_block = true;
  2170. /*
  2171. * Cause any non-postcopiable, but iterative devices to
  2172. * send out their final data.
  2173. */
  2174. qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false);
  2175. /*
  2176. * in Finish migrate and with the io-lock held everything should
  2177. * be quiet, but we've potentially still got dirty pages and we
  2178. * need to tell the destination to throw any pages it's already received
  2179. * that are dirty
  2180. */
  2181. if (migrate_postcopy_ram()) {
  2182. if (ram_postcopy_send_discard_bitmap(ms)) {
  2183. error_report("postcopy send discard bitmap failed");
  2184. goto fail;
  2185. }
  2186. }
  2187. /*
  2188. * send rest of state - note things that are doing postcopy
  2189. * will notice we're in POSTCOPY_ACTIVE and not actually
  2190. * wrap their state up here
  2191. */
  2192. /* 0 max-postcopy-bandwidth means unlimited */
  2193. if (!bandwidth) {
  2194. qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX);
  2195. } else {
  2196. qemu_file_set_rate_limit(ms->to_dst_file, bandwidth / XFER_LIMIT_RATIO);
  2197. }
  2198. if (migrate_postcopy_ram()) {
  2199. /* Ping just for debugging, helps line traces up */
  2200. qemu_savevm_send_ping(ms->to_dst_file, 2);
  2201. }
  2202. /*
  2203. * While loading the device state we may trigger page transfer
  2204. * requests and the fd must be free to process those, and thus
  2205. * the destination must read the whole device state off the fd before
  2206. * it starts processing it. Unfortunately the ad-hoc migration format
  2207. * doesn't allow the destination to know the size to read without fully
  2208. * parsing it through each devices load-state code (especially the open
  2209. * coded devices that use get/put).
  2210. * So we wrap the device state up in a package with a length at the start;
  2211. * to do this we use a qemu_buf to hold the whole of the device state.
  2212. */
  2213. bioc = qio_channel_buffer_new(4096);
  2214. qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer");
  2215. fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
  2216. object_unref(OBJECT(bioc));
  2217. /*
  2218. * Make sure the receiver can get incoming pages before we send the rest
  2219. * of the state
  2220. */
  2221. qemu_savevm_send_postcopy_listen(fb);
  2222. qemu_savevm_state_complete_precopy(fb, false, false);
  2223. if (migrate_postcopy_ram()) {
  2224. qemu_savevm_send_ping(fb, 3);
  2225. }
  2226. qemu_savevm_send_postcopy_run(fb);
  2227. /* <><> end of stuff going into the package */
  2228. /* Last point of recovery; as soon as we send the package the destination
  2229. * can open devices and potentially start running.
  2230. * Lets just check again we've not got any errors.
  2231. */
  2232. ret = qemu_file_get_error(ms->to_dst_file);
  2233. if (ret) {
  2234. error_report("postcopy_start: Migration stream errored (pre package)");
  2235. goto fail_closefb;
  2236. }
  2237. restart_block = false;
  2238. /* Now send that blob */
  2239. if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
  2240. goto fail_closefb;
  2241. }
  2242. qemu_fclose(fb);
  2243. /* Send a notify to give a chance for anything that needs to happen
  2244. * at the transition to postcopy and after the device state; in particular
  2245. * spice needs to trigger a transition now
  2246. */
  2247. ms->postcopy_after_devices = true;
  2248. notifier_list_notify(&migration_state_notifiers, ms);
  2249. ms->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
  2250. qemu_mutex_unlock_iothread();
  2251. if (migrate_postcopy_ram()) {
  2252. /*
  2253. * Although this ping is just for debug, it could potentially be
  2254. * used for getting a better measurement of downtime at the source.
  2255. */
  2256. qemu_savevm_send_ping(ms->to_dst_file, 4);
  2257. }
  2258. if (migrate_release_ram()) {
  2259. ram_postcopy_migrated_memory_release(ms);
  2260. }
  2261. ret = qemu_file_get_error(ms->to_dst_file);
  2262. if (ret) {
  2263. error_report("postcopy_start: Migration stream errored");
  2264. migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
  2265. MIGRATION_STATUS_FAILED);
  2266. }
  2267. return ret;
  2268. fail_closefb:
  2269. qemu_fclose(fb);
  2270. fail:
  2271. migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
  2272. MIGRATION_STATUS_FAILED);
  2273. if (restart_block) {
  2274. /* A failure happened early enough that we know the destination hasn't
  2275. * accessed block devices, so we're safe to recover.
  2276. */
  2277. Error *local_err = NULL;
  2278. bdrv_invalidate_cache_all(&local_err);
  2279. if (local_err) {
  2280. error_report_err(local_err);
  2281. }
  2282. }
  2283. qemu_mutex_unlock_iothread();
  2284. return -1;
  2285. }
  2286. /**
  2287. * migration_maybe_pause: Pause if required to by
  2288. * migrate_pause_before_switchover called with the iothread locked
  2289. * Returns: 0 on success
  2290. */
  2291. static int migration_maybe_pause(MigrationState *s,
  2292. int *current_active_state,
  2293. int new_state)
  2294. {
  2295. if (!migrate_pause_before_switchover()) {
  2296. return 0;
  2297. }
  2298. /* Since leaving this state is not atomic with posting the semaphore
  2299. * it's possible that someone could have issued multiple migrate_continue
  2300. * and the semaphore is incorrectly positive at this point;
  2301. * the docs say it's undefined to reinit a semaphore that's already
  2302. * init'd, so use timedwait to eat up any existing posts.
  2303. */
  2304. while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) {
  2305. /* This block intentionally left blank */
  2306. }
  2307. qemu_mutex_unlock_iothread();
  2308. migrate_set_state(&s->state, *current_active_state,
  2309. MIGRATION_STATUS_PRE_SWITCHOVER);
  2310. qemu_sem_wait(&s->pause_sem);
  2311. migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
  2312. new_state);
  2313. *current_active_state = new_state;
  2314. qemu_mutex_lock_iothread();
  2315. return s->state == new_state ? 0 : -EINVAL;
  2316. }
  2317. /**
  2318. * migration_completion: Used by migration_thread when there's not much left.
  2319. * The caller 'breaks' the loop when this returns.
  2320. *
  2321. * @s: Current migration state
  2322. */
  2323. static void migration_completion(MigrationState *s)
  2324. {
  2325. int ret;
  2326. int current_active_state = s->state;
  2327. if (s->state == MIGRATION_STATUS_ACTIVE) {
  2328. qemu_mutex_lock_iothread();
  2329. s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  2330. qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
  2331. s->vm_was_running = runstate_is_running();
  2332. ret = global_state_store();
  2333. if (!ret) {
  2334. bool inactivate = !migrate_colo_enabled();
  2335. ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
  2336. if (ret >= 0) {
  2337. ret = migration_maybe_pause(s, &current_active_state,
  2338. MIGRATION_STATUS_DEVICE);
  2339. }
  2340. if (ret >= 0) {
  2341. qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
  2342. ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
  2343. inactivate);
  2344. }
  2345. if (inactivate && ret >= 0) {
  2346. s->block_inactive = true;
  2347. }
  2348. }
  2349. qemu_mutex_unlock_iothread();
  2350. if (ret < 0) {
  2351. goto fail;
  2352. }
  2353. } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
  2354. trace_migration_completion_postcopy_end();
  2355. qemu_savevm_state_complete_postcopy(s->to_dst_file);
  2356. trace_migration_completion_postcopy_end_after_complete();
  2357. }
  2358. /*
  2359. * If rp was opened we must clean up the thread before
  2360. * cleaning everything else up (since if there are no failures
  2361. * it will wait for the destination to send it's status in
  2362. * a SHUT command).
  2363. */
  2364. if (s->rp_state.from_dst_file) {
  2365. int rp_error;
  2366. trace_migration_return_path_end_before();
  2367. rp_error = await_return_path_close_on_source(s);
  2368. trace_migration_return_path_end_after(rp_error);
  2369. if (rp_error) {
  2370. goto fail_invalidate;
  2371. }
  2372. }
  2373. if (qemu_file_get_error(s->to_dst_file)) {
  2374. trace_migration_completion_file_err();
  2375. goto fail_invalidate;
  2376. }
  2377. if (!migrate_colo_enabled()) {
  2378. migrate_set_state(&s->state, current_active_state,
  2379. MIGRATION_STATUS_COMPLETED);
  2380. }
  2381. return;
  2382. fail_invalidate:
  2383. /* If not doing postcopy, vm_start() will be called: let's regain
  2384. * control on images.
  2385. */
  2386. if (s->state == MIGRATION_STATUS_ACTIVE ||
  2387. s->state == MIGRATION_STATUS_DEVICE) {
  2388. Error *local_err = NULL;
  2389. qemu_mutex_lock_iothread();
  2390. bdrv_invalidate_cache_all(&local_err);
  2391. if (local_err) {
  2392. error_report_err(local_err);
  2393. } else {
  2394. s->block_inactive = false;
  2395. }
  2396. qemu_mutex_unlock_iothread();
  2397. }
  2398. fail:
  2399. migrate_set_state(&s->state, current_active_state,
  2400. MIGRATION_STATUS_FAILED);
  2401. }
  2402. bool migrate_colo_enabled(void)
  2403. {
  2404. MigrationState *s = migrate_get_current();
  2405. return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
  2406. }
  2407. typedef enum MigThrError {
  2408. /* No error detected */
  2409. MIG_THR_ERR_NONE = 0,
  2410. /* Detected error, but resumed successfully */
  2411. MIG_THR_ERR_RECOVERED = 1,
  2412. /* Detected fatal error, need to exit */
  2413. MIG_THR_ERR_FATAL = 2,
  2414. } MigThrError;
  2415. static int postcopy_resume_handshake(MigrationState *s)
  2416. {
  2417. qemu_savevm_send_postcopy_resume(s->to_dst_file);
  2418. while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
  2419. qemu_sem_wait(&s->rp_state.rp_sem);
  2420. }
  2421. if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
  2422. return 0;
  2423. }
  2424. return -1;
  2425. }
  2426. /* Return zero if success, or <0 for error */
  2427. static int postcopy_do_resume(MigrationState *s)
  2428. {
  2429. int ret;
  2430. /*
  2431. * Call all the resume_prepare() hooks, so that modules can be
  2432. * ready for the migration resume.
  2433. */
  2434. ret = qemu_savevm_state_resume_prepare(s);
  2435. if (ret) {
  2436. error_report("%s: resume_prepare() failure detected: %d",
  2437. __func__, ret);
  2438. return ret;
  2439. }
  2440. /*
  2441. * Last handshake with destination on the resume (destination will
  2442. * switch to postcopy-active afterwards)
  2443. */
  2444. ret = postcopy_resume_handshake(s);
  2445. if (ret) {
  2446. error_report("%s: handshake failed: %d", __func__, ret);
  2447. return ret;
  2448. }
  2449. return 0;
  2450. }
  2451. /*
  2452. * We don't return until we are in a safe state to continue current
  2453. * postcopy migration. Returns MIG_THR_ERR_RECOVERED if recovered, or
  2454. * MIG_THR_ERR_FATAL if unrecovery failure happened.
  2455. */
  2456. static MigThrError postcopy_pause(MigrationState *s)
  2457. {
  2458. assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
  2459. while (true) {
  2460. QEMUFile *file;
  2461. migrate_set_state(&s->state, s->state,
  2462. MIGRATION_STATUS_POSTCOPY_PAUSED);
  2463. /* Current channel is possibly broken. Release it. */
  2464. assert(s->to_dst_file);
  2465. qemu_mutex_lock(&s->qemu_file_lock);
  2466. file = s->to_dst_file;
  2467. s->to_dst_file = NULL;
  2468. qemu_mutex_unlock(&s->qemu_file_lock);
  2469. qemu_file_shutdown(file);
  2470. qemu_fclose(file);
  2471. error_report("Detected IO failure for postcopy. "
  2472. "Migration paused.");
  2473. /*
  2474. * We wait until things fixed up. Then someone will setup the
  2475. * status back for us.
  2476. */
  2477. while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
  2478. qemu_sem_wait(&s->postcopy_pause_sem);
  2479. }
  2480. if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
  2481. /* Woken up by a recover procedure. Give it a shot */
  2482. /*
  2483. * Firstly, let's wake up the return path now, with a new
  2484. * return path channel.
  2485. */
  2486. qemu_sem_post(&s->postcopy_pause_rp_sem);
  2487. /* Do the resume logic */
  2488. if (postcopy_do_resume(s) == 0) {
  2489. /* Let's continue! */
  2490. trace_postcopy_pause_continued();
  2491. return MIG_THR_ERR_RECOVERED;
  2492. } else {
  2493. /*
  2494. * Something wrong happened during the recovery, let's
  2495. * pause again. Pause is always better than throwing
  2496. * data away.
  2497. */
  2498. continue;
  2499. }
  2500. } else {
  2501. /* This is not right... Time to quit. */
  2502. return MIG_THR_ERR_FATAL;
  2503. }
  2504. }
  2505. }
  2506. static MigThrError migration_detect_error(MigrationState *s)
  2507. {
  2508. int ret;
  2509. int state = s->state;
  2510. if (state == MIGRATION_STATUS_CANCELLING ||
  2511. state == MIGRATION_STATUS_CANCELLED) {
  2512. /* End the migration, but don't set the state to failed */
  2513. return MIG_THR_ERR_FATAL;
  2514. }
  2515. /* Try to detect any file errors */
  2516. ret = qemu_file_get_error(s->to_dst_file);
  2517. if (!ret) {
  2518. /* Everything is fine */
  2519. return MIG_THR_ERR_NONE;
  2520. }
  2521. if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) {
  2522. /*
  2523. * For postcopy, we allow the network to be down for a
  2524. * while. After that, it can be continued by a
  2525. * recovery phase.
  2526. */
  2527. return postcopy_pause(s);
  2528. } else {
  2529. /*
  2530. * For precopy (or postcopy with error outside IO), we fail
  2531. * with no time.
  2532. */
  2533. migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED);
  2534. trace_migration_thread_file_err();
  2535. /* Time to stop the migration, now. */
  2536. return MIG_THR_ERR_FATAL;
  2537. }
  2538. }
  2539. /* How many bytes have we transferred since the beggining of the migration */
  2540. static uint64_t migration_total_bytes(MigrationState *s)
  2541. {
  2542. return qemu_ftell(s->to_dst_file) + ram_counters.multifd_bytes;
  2543. }
  2544. static void migration_calculate_complete(MigrationState *s)
  2545. {
  2546. uint64_t bytes = migration_total_bytes(s);
  2547. int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  2548. int64_t transfer_time;
  2549. s->total_time = end_time - s->start_time;
  2550. if (!s->downtime) {
  2551. /*
  2552. * It's still not set, so we are precopy migration. For
  2553. * postcopy, downtime is calculated during postcopy_start().
  2554. */
  2555. s->downtime = end_time - s->downtime_start;
  2556. }
  2557. transfer_time = s->total_time - s->setup_time;
  2558. if (transfer_time) {
  2559. s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
  2560. }
  2561. }
  2562. static void migration_update_counters(MigrationState *s,
  2563. int64_t current_time)
  2564. {
  2565. uint64_t transferred, transferred_pages, time_spent;
  2566. uint64_t current_bytes; /* bytes transferred since the beginning */
  2567. double bandwidth;
  2568. if (current_time < s->iteration_start_time + BUFFER_DELAY) {
  2569. return;
  2570. }
  2571. current_bytes = migration_total_bytes(s);
  2572. transferred = current_bytes - s->iteration_initial_bytes;
  2573. time_spent = current_time - s->iteration_start_time;
  2574. bandwidth = (double)transferred / time_spent;
  2575. s->threshold_size = bandwidth * s->parameters.downtime_limit;
  2576. s->mbps = (((double) transferred * 8.0) /
  2577. ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
  2578. transferred_pages = ram_get_total_transferred_pages() -
  2579. s->iteration_initial_pages;
  2580. s->pages_per_second = (double) transferred_pages /
  2581. (((double) time_spent / 1000.0));
  2582. /*
  2583. * if we haven't sent anything, we don't want to
  2584. * recalculate. 10000 is a small enough number for our purposes
  2585. */
  2586. if (ram_counters.dirty_pages_rate && transferred > 10000) {
  2587. s->expected_downtime = ram_counters.remaining / bandwidth;
  2588. }
  2589. qemu_file_reset_rate_limit(s->to_dst_file);
  2590. s->iteration_start_time = current_time;
  2591. s->iteration_initial_bytes = current_bytes;
  2592. s->iteration_initial_pages = ram_get_total_transferred_pages();
  2593. trace_migrate_transferred(transferred, time_spent,
  2594. bandwidth, s->threshold_size);
  2595. }
  2596. /* Migration thread iteration status */
  2597. typedef enum {
  2598. MIG_ITERATE_RESUME, /* Resume current iteration */
  2599. MIG_ITERATE_SKIP, /* Skip current iteration */
  2600. MIG_ITERATE_BREAK, /* Break the loop */
  2601. } MigIterateState;
  2602. /*
  2603. * Return true if continue to the next iteration directly, false
  2604. * otherwise.
  2605. */
  2606. static MigIterateState migration_iteration_run(MigrationState *s)
  2607. {
  2608. uint64_t pending_size, pend_pre, pend_compat, pend_post;
  2609. bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
  2610. qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, &pend_pre,
  2611. &pend_compat, &pend_post);
  2612. pending_size = pend_pre + pend_compat + pend_post;
  2613. trace_migrate_pending(pending_size, s->threshold_size,
  2614. pend_pre, pend_compat, pend_post);
  2615. if (pending_size && pending_size >= s->threshold_size) {
  2616. /* Still a significant amount to transfer */
  2617. if (migrate_postcopy() && !in_postcopy &&
  2618. pend_pre <= s->threshold_size &&
  2619. atomic_read(&s->start_postcopy)) {
  2620. if (postcopy_start(s)) {
  2621. error_report("%s: postcopy failed to start", __func__);
  2622. }
  2623. return MIG_ITERATE_SKIP;
  2624. }
  2625. /* Just another iteration step */
  2626. qemu_savevm_state_iterate(s->to_dst_file,
  2627. s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
  2628. } else {
  2629. trace_migration_thread_low_pending(pending_size);
  2630. migration_completion(s);
  2631. return MIG_ITERATE_BREAK;
  2632. }
  2633. return MIG_ITERATE_RESUME;
  2634. }
  2635. static void migration_iteration_finish(MigrationState *s)
  2636. {
  2637. /* If we enabled cpu throttling for auto-converge, turn it off. */
  2638. cpu_throttle_stop();
  2639. qemu_mutex_lock_iothread();
  2640. switch (s->state) {
  2641. case MIGRATION_STATUS_COMPLETED:
  2642. migration_calculate_complete(s);
  2643. runstate_set(RUN_STATE_POSTMIGRATE);
  2644. break;
  2645. case MIGRATION_STATUS_ACTIVE:
  2646. /*
  2647. * We should really assert here, but since it's during
  2648. * migration, let's try to reduce the usage of assertions.
  2649. */
  2650. if (!migrate_colo_enabled()) {
  2651. error_report("%s: critical error: calling COLO code without "
  2652. "COLO enabled", __func__);
  2653. }
  2654. migrate_start_colo_process(s);
  2655. /*
  2656. * Fixme: we will run VM in COLO no matter its old running state.
  2657. * After exited COLO, we will keep running.
  2658. */
  2659. s->vm_was_running = true;
  2660. /* Fallthrough */
  2661. case MIGRATION_STATUS_FAILED:
  2662. case MIGRATION_STATUS_CANCELLED:
  2663. case MIGRATION_STATUS_CANCELLING:
  2664. if (s->vm_was_running) {
  2665. vm_start();
  2666. } else {
  2667. if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
  2668. runstate_set(RUN_STATE_POSTMIGRATE);
  2669. }
  2670. }
  2671. break;
  2672. default:
  2673. /* Should not reach here, but if so, forgive the VM. */
  2674. error_report("%s: Unknown ending state %d", __func__, s->state);
  2675. break;
  2676. }
  2677. migrate_fd_cleanup_schedule(s);
  2678. qemu_mutex_unlock_iothread();
  2679. }
  2680. void migration_make_urgent_request(void)
  2681. {
  2682. qemu_sem_post(&migrate_get_current()->rate_limit_sem);
  2683. }
  2684. void migration_consume_urgent_request(void)
  2685. {
  2686. qemu_sem_wait(&migrate_get_current()->rate_limit_sem);
  2687. }
  2688. /*
  2689. * Master migration thread on the source VM.
  2690. * It drives the migration and pumps the data down the outgoing channel.
  2691. */
  2692. static void *migration_thread(void *opaque)
  2693. {
  2694. MigrationState *s = opaque;
  2695. int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
  2696. MigThrError thr_error;
  2697. bool urgent = false;
  2698. rcu_register_thread();
  2699. object_ref(OBJECT(s));
  2700. s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  2701. qemu_savevm_state_header(s->to_dst_file);
  2702. /*
  2703. * If we opened the return path, we need to make sure dst has it
  2704. * opened as well.
  2705. */
  2706. if (s->rp_state.from_dst_file) {
  2707. /* Now tell the dest that it should open its end so it can reply */
  2708. qemu_savevm_send_open_return_path(s->to_dst_file);
  2709. /* And do a ping that will make stuff easier to debug */
  2710. qemu_savevm_send_ping(s->to_dst_file, 1);
  2711. }
  2712. if (migrate_postcopy()) {
  2713. /*
  2714. * Tell the destination that we *might* want to do postcopy later;
  2715. * if the other end can't do postcopy it should fail now, nice and
  2716. * early.
  2717. */
  2718. qemu_savevm_send_postcopy_advise(s->to_dst_file);
  2719. }
  2720. if (migrate_colo_enabled()) {
  2721. /* Notify migration destination that we enable COLO */
  2722. qemu_savevm_send_colo_enable(s->to_dst_file);
  2723. }
  2724. qemu_savevm_state_setup(s->to_dst_file);
  2725. s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
  2726. migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
  2727. MIGRATION_STATUS_ACTIVE);
  2728. trace_migration_thread_setup_complete();
  2729. while (s->state == MIGRATION_STATUS_ACTIVE ||
  2730. s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
  2731. int64_t current_time;
  2732. if (urgent || !qemu_file_rate_limit(s->to_dst_file)) {
  2733. MigIterateState iter_state = migration_iteration_run(s);
  2734. if (iter_state == MIG_ITERATE_SKIP) {
  2735. continue;
  2736. } else if (iter_state == MIG_ITERATE_BREAK) {
  2737. break;
  2738. }
  2739. }
  2740. /*
  2741. * Try to detect any kind of failures, and see whether we
  2742. * should stop the migration now.
  2743. */
  2744. thr_error = migration_detect_error(s);
  2745. if (thr_error == MIG_THR_ERR_FATAL) {
  2746. /* Stop migration */
  2747. break;
  2748. } else if (thr_error == MIG_THR_ERR_RECOVERED) {
  2749. /*
  2750. * Just recovered from a e.g. network failure, reset all
  2751. * the local variables. This is important to avoid
  2752. * breaking transferred_bytes and bandwidth calculation
  2753. */
  2754. s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  2755. s->iteration_initial_bytes = 0;
  2756. }
  2757. current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
  2758. migration_update_counters(s, current_time);
  2759. urgent = false;
  2760. if (qemu_file_rate_limit(s->to_dst_file)) {
  2761. /* Wait for a delay to do rate limiting OR
  2762. * something urgent to post the semaphore.
  2763. */
  2764. int ms = s->iteration_start_time + BUFFER_DELAY - current_time;
  2765. trace_migration_thread_ratelimit_pre(ms);
  2766. if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) {
  2767. /* We were worken by one or more urgent things but
  2768. * the timedwait will have consumed one of them.
  2769. * The service routine for the urgent wake will dec
  2770. * the semaphore itself for each item it consumes,
  2771. * so add this one we just eat back.
  2772. */
  2773. qemu_sem_post(&s->rate_limit_sem);
  2774. urgent = true;
  2775. }
  2776. trace_migration_thread_ratelimit_post(urgent);
  2777. }
  2778. }
  2779. trace_migration_thread_after_loop();
  2780. migration_iteration_finish(s);
  2781. object_unref(OBJECT(s));
  2782. rcu_unregister_thread();
  2783. return NULL;
  2784. }
  2785. void migrate_fd_connect(MigrationState *s, Error *error_in)
  2786. {
  2787. int64_t rate_limit;
  2788. bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED;
  2789. s->expected_downtime = s->parameters.downtime_limit;
  2790. s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s);
  2791. if (error_in) {
  2792. migrate_fd_error(s, error_in);
  2793. migrate_fd_cleanup(s);
  2794. return;
  2795. }
  2796. if (resume) {
  2797. /* This is a resumed migration */
  2798. rate_limit = INT64_MAX;
  2799. } else {
  2800. /* This is a fresh new migration */
  2801. rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO;
  2802. /* Notify before starting migration thread */
  2803. notifier_list_notify(&migration_state_notifiers, s);
  2804. }
  2805. qemu_file_set_rate_limit(s->to_dst_file, rate_limit);
  2806. qemu_file_set_blocking(s->to_dst_file, true);
  2807. /*
  2808. * Open the return path. For postcopy, it is used exclusively. For
  2809. * precopy, only if user specified "return-path" capability would
  2810. * QEMU uses the return path.
  2811. */
  2812. if (migrate_postcopy_ram() || migrate_use_return_path()) {
  2813. if (open_return_path_on_source(s, !resume)) {
  2814. error_report("Unable to open return-path for postcopy");
  2815. migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
  2816. migrate_fd_cleanup(s);
  2817. return;
  2818. }
  2819. }
  2820. if (resume) {
  2821. /* Wakeup the main migration thread to do the recovery */
  2822. migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
  2823. MIGRATION_STATUS_POSTCOPY_RECOVER);
  2824. qemu_sem_post(&s->postcopy_pause_sem);
  2825. return;
  2826. }
  2827. if (multifd_save_setup() != 0) {
  2828. migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
  2829. MIGRATION_STATUS_FAILED);
  2830. migrate_fd_cleanup(s);
  2831. return;
  2832. }
  2833. qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
  2834. QEMU_THREAD_JOINABLE);
  2835. s->migration_thread_running = true;
  2836. }
  2837. void migration_global_dump(Monitor *mon)
  2838. {
  2839. MigrationState *ms = migrate_get_current();
  2840. monitor_printf(mon, "globals:\n");
  2841. monitor_printf(mon, "store-global-state: %s\n",
  2842. ms->store_global_state ? "on" : "off");
  2843. monitor_printf(mon, "only-migratable: %s\n",
  2844. only_migratable ? "on" : "off");
  2845. monitor_printf(mon, "send-configuration: %s\n",
  2846. ms->send_configuration ? "on" : "off");
  2847. monitor_printf(mon, "send-section-footer: %s\n",
  2848. ms->send_section_footer ? "on" : "off");
  2849. monitor_printf(mon, "decompress-error-check: %s\n",
  2850. ms->decompress_error_check ? "on" : "off");
  2851. }
  2852. #define DEFINE_PROP_MIG_CAP(name, x) \
  2853. DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false)
  2854. static Property migration_properties[] = {
  2855. DEFINE_PROP_BOOL("store-global-state", MigrationState,
  2856. store_global_state, true),
  2857. DEFINE_PROP_BOOL("send-configuration", MigrationState,
  2858. send_configuration, true),
  2859. DEFINE_PROP_BOOL("send-section-footer", MigrationState,
  2860. send_section_footer, true),
  2861. DEFINE_PROP_BOOL("decompress-error-check", MigrationState,
  2862. decompress_error_check, true),
  2863. /* Migration parameters */
  2864. DEFINE_PROP_UINT8("x-compress-level", MigrationState,
  2865. parameters.compress_level,
  2866. DEFAULT_MIGRATE_COMPRESS_LEVEL),
  2867. DEFINE_PROP_UINT8("x-compress-threads", MigrationState,
  2868. parameters.compress_threads,
  2869. DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT),
  2870. DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState,
  2871. parameters.compress_wait_thread, true),
  2872. DEFINE_PROP_UINT8("x-decompress-threads", MigrationState,
  2873. parameters.decompress_threads,
  2874. DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT),
  2875. DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState,
  2876. parameters.cpu_throttle_initial,
  2877. DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL),
  2878. DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState,
  2879. parameters.cpu_throttle_increment,
  2880. DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT),
  2881. DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState,
  2882. parameters.max_bandwidth, MAX_THROTTLE),
  2883. DEFINE_PROP_UINT64("x-downtime-limit", MigrationState,
  2884. parameters.downtime_limit,
  2885. DEFAULT_MIGRATE_SET_DOWNTIME),
  2886. DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState,
  2887. parameters.x_checkpoint_delay,
  2888. DEFAULT_MIGRATE_X_CHECKPOINT_DELAY),
  2889. DEFINE_PROP_UINT8("multifd-channels", MigrationState,
  2890. parameters.multifd_channels,
  2891. DEFAULT_MIGRATE_MULTIFD_CHANNELS),
  2892. DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState,
  2893. parameters.xbzrle_cache_size,
  2894. DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE),
  2895. DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState,
  2896. parameters.max_postcopy_bandwidth,
  2897. DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH),
  2898. DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState,
  2899. parameters.max_cpu_throttle,
  2900. DEFAULT_MIGRATE_MAX_CPU_THROTTLE),
  2901. DEFINE_PROP_SIZE("announce-initial", MigrationState,
  2902. parameters.announce_initial,
  2903. DEFAULT_MIGRATE_ANNOUNCE_INITIAL),
  2904. DEFINE_PROP_SIZE("announce-max", MigrationState,
  2905. parameters.announce_max,
  2906. DEFAULT_MIGRATE_ANNOUNCE_MAX),
  2907. DEFINE_PROP_SIZE("announce-rounds", MigrationState,
  2908. parameters.announce_rounds,
  2909. DEFAULT_MIGRATE_ANNOUNCE_ROUNDS),
  2910. DEFINE_PROP_SIZE("announce-step", MigrationState,
  2911. parameters.announce_step,
  2912. DEFAULT_MIGRATE_ANNOUNCE_STEP),
  2913. /* Migration capabilities */
  2914. DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
  2915. DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL),
  2916. DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE),
  2917. DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS),
  2918. DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS),
  2919. DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS),
  2920. DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM),
  2921. DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO),
  2922. DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM),
  2923. DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
  2924. DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
  2925. DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
  2926. DEFINE_PROP_END_OF_LIST(),
  2927. };
  2928. static void migration_class_init(ObjectClass *klass, void *data)
  2929. {
  2930. DeviceClass *dc = DEVICE_CLASS(klass);
  2931. dc->user_creatable = false;
  2932. dc->props = migration_properties;
  2933. }
  2934. static void migration_instance_finalize(Object *obj)
  2935. {
  2936. MigrationState *ms = MIGRATION_OBJ(obj);
  2937. MigrationParameters *params = &ms->parameters;
  2938. qemu_mutex_destroy(&ms->error_mutex);
  2939. qemu_mutex_destroy(&ms->qemu_file_lock);
  2940. g_free(params->tls_hostname);
  2941. g_free(params->tls_creds);
  2942. qemu_sem_destroy(&ms->rate_limit_sem);
  2943. qemu_sem_destroy(&ms->pause_sem);
  2944. qemu_sem_destroy(&ms->postcopy_pause_sem);
  2945. qemu_sem_destroy(&ms->postcopy_pause_rp_sem);
  2946. qemu_sem_destroy(&ms->rp_state.rp_sem);
  2947. error_free(ms->error);
  2948. }
  2949. static void migration_instance_init(Object *obj)
  2950. {
  2951. MigrationState *ms = MIGRATION_OBJ(obj);
  2952. MigrationParameters *params = &ms->parameters;
  2953. ms->state = MIGRATION_STATUS_NONE;
  2954. ms->mbps = -1;
  2955. ms->pages_per_second = -1;
  2956. qemu_sem_init(&ms->pause_sem, 0);
  2957. qemu_mutex_init(&ms->error_mutex);
  2958. params->tls_hostname = g_strdup("");
  2959. params->tls_creds = g_strdup("");
  2960. /* Set has_* up only for parameter checks */
  2961. params->has_compress_level = true;
  2962. params->has_compress_threads = true;
  2963. params->has_decompress_threads = true;
  2964. params->has_cpu_throttle_initial = true;
  2965. params->has_cpu_throttle_increment = true;
  2966. params->has_max_bandwidth = true;
  2967. params->has_downtime_limit = true;
  2968. params->has_x_checkpoint_delay = true;
  2969. params->has_block_incremental = true;
  2970. params->has_multifd_channels = true;
  2971. params->has_xbzrle_cache_size = true;
  2972. params->has_max_postcopy_bandwidth = true;
  2973. params->has_max_cpu_throttle = true;
  2974. params->has_announce_initial = true;
  2975. params->has_announce_max = true;
  2976. params->has_announce_rounds = true;
  2977. params->has_announce_step = true;
  2978. qemu_sem_init(&ms->postcopy_pause_sem, 0);
  2979. qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
  2980. qemu_sem_init(&ms->rp_state.rp_sem, 0);
  2981. qemu_sem_init(&ms->rate_limit_sem, 0);
  2982. qemu_mutex_init(&ms->qemu_file_lock);
  2983. }
  2984. /*
  2985. * Return true if check pass, false otherwise. Error will be put
  2986. * inside errp if provided.
  2987. */
  2988. static bool migration_object_check(MigrationState *ms, Error **errp)
  2989. {
  2990. MigrationCapabilityStatusList *head = NULL;
  2991. /* Assuming all off */
  2992. bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret;
  2993. int i;
  2994. if (!migrate_params_check(&ms->parameters, errp)) {
  2995. return false;
  2996. }
  2997. for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
  2998. if (ms->enabled_capabilities[i]) {
  2999. head = migrate_cap_add(head, i, true);
  3000. }
  3001. }
  3002. ret = migrate_caps_check(cap_list, head, errp);
  3003. /* It works with head == NULL */
  3004. qapi_free_MigrationCapabilityStatusList(head);
  3005. return ret;
  3006. }
  3007. static const TypeInfo migration_type = {
  3008. .name = TYPE_MIGRATION,
  3009. /*
  3010. * NOTE: TYPE_MIGRATION is not really a device, as the object is
  3011. * not created using qdev_create(), it is not attached to the qdev
  3012. * device tree, and it is never realized.
  3013. *
  3014. * TODO: Make this TYPE_OBJECT once QOM provides something like
  3015. * TYPE_DEVICE's "-global" properties.
  3016. */
  3017. .parent = TYPE_DEVICE,
  3018. .class_init = migration_class_init,
  3019. .class_size = sizeof(MigrationClass),
  3020. .instance_size = sizeof(MigrationState),
  3021. .instance_init = migration_instance_init,
  3022. .instance_finalize = migration_instance_finalize,
  3023. };
  3024. static void register_migration_types(void)
  3025. {
  3026. type_register_static(&migration_type);
  3027. }
  3028. type_init(register_migration_types);