You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

513 lines
15KB

  1. /*
  2. * QEMU System Emulator block driver
  3. *
  4. * Copyright (c) 2011 IBM Corp.
  5. * Copyright (c) 2012 Red Hat, Inc.
  6. *
  7. * Permission is hereby granted, free of charge, to any person obtaining a copy
  8. * of this software and associated documentation files (the "Software"), to deal
  9. * in the Software without restriction, including without limitation the rights
  10. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. * copies of the Software, and to permit persons to whom the Software is
  12. * furnished to do so, subject to the following conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be included in
  15. * all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23. * THE SOFTWARE.
  24. */
  25. #include "qemu/osdep.h"
  26. #include "qemu-common.h"
  27. #include "block/block.h"
  28. #include "block/blockjob_int.h"
  29. #include "block/block_int.h"
  30. #include "block/trace.h"
  31. #include "sysemu/block-backend.h"
  32. #include "qapi/error.h"
  33. #include "qapi/qapi-events-block-core.h"
  34. #include "qapi/qmp/qerror.h"
  35. #include "qemu/coroutine.h"
  36. #include "qemu/timer.h"
  37. /*
  38. * The block job API is composed of two categories of functions.
  39. *
  40. * The first includes functions used by the monitor. The monitor is
  41. * peculiar in that it accesses the block job list with block_job_get, and
  42. * therefore needs consistency across block_job_get and the actual operation
  43. * (e.g. block_job_set_speed). The consistency is achieved with
  44. * aio_context_acquire/release. These functions are declared in blockjob.h.
  45. *
  46. * The second includes functions used by the block job drivers and sometimes
  47. * by the core block layer. These do not care about locking, because the
  48. * whole coroutine runs under the AioContext lock, and are declared in
  49. * blockjob_int.h.
  50. */
  51. static bool is_block_job(Job *job)
  52. {
  53. return job_type(job) == JOB_TYPE_BACKUP ||
  54. job_type(job) == JOB_TYPE_COMMIT ||
  55. job_type(job) == JOB_TYPE_MIRROR ||
  56. job_type(job) == JOB_TYPE_STREAM;
  57. }
  58. BlockJob *block_job_next(BlockJob *bjob)
  59. {
  60. Job *job = bjob ? &bjob->job : NULL;
  61. do {
  62. job = job_next(job);
  63. } while (job && !is_block_job(job));
  64. return job ? container_of(job, BlockJob, job) : NULL;
  65. }
  66. BlockJob *block_job_get(const char *id)
  67. {
  68. Job *job = job_get(id);
  69. if (job && is_block_job(job)) {
  70. return container_of(job, BlockJob, job);
  71. } else {
  72. return NULL;
  73. }
  74. }
  75. static void block_job_attached_aio_context(AioContext *new_context,
  76. void *opaque);
  77. static void block_job_detach_aio_context(void *opaque);
  78. void block_job_free(Job *job)
  79. {
  80. BlockJob *bjob = container_of(job, BlockJob, job);
  81. BlockDriverState *bs = blk_bs(bjob->blk);
  82. bs->job = NULL;
  83. block_job_remove_all_bdrv(bjob);
  84. blk_remove_aio_context_notifier(bjob->blk,
  85. block_job_attached_aio_context,
  86. block_job_detach_aio_context, bjob);
  87. blk_unref(bjob->blk);
  88. error_free(bjob->blocker);
  89. }
  90. static void block_job_attached_aio_context(AioContext *new_context,
  91. void *opaque)
  92. {
  93. BlockJob *job = opaque;
  94. const JobDriver *drv = job->job.driver;
  95. BlockJobDriver *bjdrv = container_of(drv, BlockJobDriver, job_driver);
  96. job->job.aio_context = new_context;
  97. if (bjdrv->attached_aio_context) {
  98. bjdrv->attached_aio_context(job, new_context);
  99. }
  100. job_resume(&job->job);
  101. }
  102. void block_job_drain(Job *job)
  103. {
  104. BlockJob *bjob = container_of(job, BlockJob, job);
  105. const JobDriver *drv = job->driver;
  106. BlockJobDriver *bjdrv = container_of(drv, BlockJobDriver, job_driver);
  107. blk_drain(bjob->blk);
  108. if (bjdrv->drain) {
  109. bjdrv->drain(bjob);
  110. }
  111. }
  112. static void block_job_detach_aio_context(void *opaque)
  113. {
  114. BlockJob *job = opaque;
  115. /* In case the job terminates during aio_poll()... */
  116. job_ref(&job->job);
  117. job_pause(&job->job);
  118. while (!job->job.paused && !job_is_completed(&job->job)) {
  119. job_drain(&job->job);
  120. }
  121. job->job.aio_context = NULL;
  122. job_unref(&job->job);
  123. }
  124. static char *child_job_get_parent_desc(BdrvChild *c)
  125. {
  126. BlockJob *job = c->opaque;
  127. return g_strdup_printf("%s job '%s'", job_type_str(&job->job), job->job.id);
  128. }
  129. static void child_job_drained_begin(BdrvChild *c)
  130. {
  131. BlockJob *job = c->opaque;
  132. job_pause(&job->job);
  133. }
  134. static bool child_job_drained_poll(BdrvChild *c)
  135. {
  136. BlockJob *bjob = c->opaque;
  137. Job *job = &bjob->job;
  138. const BlockJobDriver *drv = block_job_driver(bjob);
  139. /* An inactive or completed job doesn't have any pending requests. Jobs
  140. * with !job->busy are either already paused or have a pause point after
  141. * being reentered, so no job driver code will run before they pause. */
  142. if (!job->busy || job_is_completed(job)) {
  143. return false;
  144. }
  145. /* Otherwise, assume that it isn't fully stopped yet, but allow the job to
  146. * override this assumption. */
  147. if (drv->drained_poll) {
  148. return drv->drained_poll(bjob);
  149. } else {
  150. return true;
  151. }
  152. }
  153. static void child_job_drained_end(BdrvChild *c)
  154. {
  155. BlockJob *job = c->opaque;
  156. job_resume(&job->job);
  157. }
  158. static const BdrvChildRole child_job = {
  159. .get_parent_desc = child_job_get_parent_desc,
  160. .drained_begin = child_job_drained_begin,
  161. .drained_poll = child_job_drained_poll,
  162. .drained_end = child_job_drained_end,
  163. .stay_at_node = true,
  164. };
  165. void block_job_remove_all_bdrv(BlockJob *job)
  166. {
  167. GSList *l;
  168. for (l = job->nodes; l; l = l->next) {
  169. BdrvChild *c = l->data;
  170. bdrv_op_unblock_all(c->bs, job->blocker);
  171. bdrv_root_unref_child(c);
  172. }
  173. g_slist_free(job->nodes);
  174. job->nodes = NULL;
  175. }
  176. int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
  177. uint64_t perm, uint64_t shared_perm, Error **errp)
  178. {
  179. BdrvChild *c;
  180. c = bdrv_root_attach_child(bs, name, &child_job, perm, shared_perm,
  181. job, errp);
  182. if (c == NULL) {
  183. return -EPERM;
  184. }
  185. job->nodes = g_slist_prepend(job->nodes, c);
  186. bdrv_ref(bs);
  187. bdrv_op_block_all(bs, job->blocker);
  188. return 0;
  189. }
  190. static void block_job_on_idle(Notifier *n, void *opaque)
  191. {
  192. aio_wait_kick();
  193. }
  194. bool block_job_is_internal(BlockJob *job)
  195. {
  196. return (job->job.id == NULL);
  197. }
  198. const BlockJobDriver *block_job_driver(BlockJob *job)
  199. {
  200. return container_of(job->job.driver, BlockJobDriver, job_driver);
  201. }
  202. /* Assumes the job_mutex is held */
  203. static bool job_timer_pending(Job *job)
  204. {
  205. return timer_pending(&job->sleep_timer);
  206. }
  207. void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
  208. {
  209. int64_t old_speed = job->speed;
  210. if (job_apply_verb(&job->job, JOB_VERB_SET_SPEED, errp)) {
  211. return;
  212. }
  213. if (speed < 0) {
  214. error_setg(errp, QERR_INVALID_PARAMETER, "speed");
  215. return;
  216. }
  217. ratelimit_set_speed(&job->limit, speed, BLOCK_JOB_SLICE_TIME);
  218. job->speed = speed;
  219. if (speed && speed <= old_speed) {
  220. return;
  221. }
  222. /* kick only if a timer is pending */
  223. job_enter_cond(&job->job, job_timer_pending);
  224. }
  225. int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n)
  226. {
  227. if (!job->speed) {
  228. return 0;
  229. }
  230. return ratelimit_calculate_delay(&job->limit, n);
  231. }
  232. BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
  233. {
  234. BlockJobInfo *info;
  235. if (block_job_is_internal(job)) {
  236. error_setg(errp, "Cannot query QEMU internal jobs");
  237. return NULL;
  238. }
  239. info = g_new0(BlockJobInfo, 1);
  240. info->type = g_strdup(job_type_str(&job->job));
  241. info->device = g_strdup(job->job.id);
  242. info->busy = atomic_read(&job->job.busy);
  243. info->paused = job->job.pause_count > 0;
  244. info->offset = job->job.progress_current;
  245. info->len = job->job.progress_total;
  246. info->speed = job->speed;
  247. info->io_status = job->iostatus;
  248. info->ready = job_is_ready(&job->job),
  249. info->status = job->job.status;
  250. info->auto_finalize = job->job.auto_finalize;
  251. info->auto_dismiss = job->job.auto_dismiss;
  252. info->has_error = job->job.ret != 0;
  253. info->error = job->job.ret ? g_strdup(strerror(-job->job.ret)) : NULL;
  254. return info;
  255. }
  256. static void block_job_iostatus_set_err(BlockJob *job, int error)
  257. {
  258. if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
  259. job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
  260. BLOCK_DEVICE_IO_STATUS_FAILED;
  261. }
  262. }
  263. static void block_job_event_cancelled(Notifier *n, void *opaque)
  264. {
  265. BlockJob *job = opaque;
  266. if (block_job_is_internal(job)) {
  267. return;
  268. }
  269. qapi_event_send_block_job_cancelled(job_type(&job->job),
  270. job->job.id,
  271. job->job.progress_total,
  272. job->job.progress_current,
  273. job->speed);
  274. }
  275. static void block_job_event_completed(Notifier *n, void *opaque)
  276. {
  277. BlockJob *job = opaque;
  278. const char *msg = NULL;
  279. if (block_job_is_internal(job)) {
  280. return;
  281. }
  282. if (job->job.ret < 0) {
  283. msg = strerror(-job->job.ret);
  284. }
  285. qapi_event_send_block_job_completed(job_type(&job->job),
  286. job->job.id,
  287. job->job.progress_total,
  288. job->job.progress_current,
  289. job->speed,
  290. !!msg,
  291. msg);
  292. }
  293. static void block_job_event_pending(Notifier *n, void *opaque)
  294. {
  295. BlockJob *job = opaque;
  296. if (block_job_is_internal(job)) {
  297. return;
  298. }
  299. qapi_event_send_block_job_pending(job_type(&job->job),
  300. job->job.id);
  301. }
  302. static void block_job_event_ready(Notifier *n, void *opaque)
  303. {
  304. BlockJob *job = opaque;
  305. if (block_job_is_internal(job)) {
  306. return;
  307. }
  308. qapi_event_send_block_job_ready(job_type(&job->job),
  309. job->job.id,
  310. job->job.progress_total,
  311. job->job.progress_current,
  312. job->speed);
  313. }
  314. /*
  315. * API for block job drivers and the block layer. These functions are
  316. * declared in blockjob_int.h.
  317. */
  318. void *block_job_create(const char *job_id, const BlockJobDriver *driver,
  319. JobTxn *txn, BlockDriverState *bs, uint64_t perm,
  320. uint64_t shared_perm, int64_t speed, int flags,
  321. BlockCompletionFunc *cb, void *opaque, Error **errp)
  322. {
  323. BlockBackend *blk;
  324. BlockJob *job;
  325. int ret;
  326. if (bs->job) {
  327. error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
  328. return NULL;
  329. }
  330. if (job_id == NULL && !(flags & JOB_INTERNAL)) {
  331. job_id = bdrv_get_device_name(bs);
  332. }
  333. blk = blk_new(perm, shared_perm);
  334. ret = blk_insert_bs(blk, bs, errp);
  335. if (ret < 0) {
  336. blk_unref(blk);
  337. return NULL;
  338. }
  339. job = job_create(job_id, &driver->job_driver, txn, blk_get_aio_context(blk),
  340. flags, cb, opaque, errp);
  341. if (job == NULL) {
  342. blk_unref(blk);
  343. return NULL;
  344. }
  345. assert(is_block_job(&job->job));
  346. assert(job->job.driver->free == &block_job_free);
  347. assert(job->job.driver->user_resume == &block_job_user_resume);
  348. assert(job->job.driver->drain == &block_job_drain);
  349. job->blk = blk;
  350. job->finalize_cancelled_notifier.notify = block_job_event_cancelled;
  351. job->finalize_completed_notifier.notify = block_job_event_completed;
  352. job->pending_notifier.notify = block_job_event_pending;
  353. job->ready_notifier.notify = block_job_event_ready;
  354. job->idle_notifier.notify = block_job_on_idle;
  355. notifier_list_add(&job->job.on_finalize_cancelled,
  356. &job->finalize_cancelled_notifier);
  357. notifier_list_add(&job->job.on_finalize_completed,
  358. &job->finalize_completed_notifier);
  359. notifier_list_add(&job->job.on_pending, &job->pending_notifier);
  360. notifier_list_add(&job->job.on_ready, &job->ready_notifier);
  361. notifier_list_add(&job->job.on_idle, &job->idle_notifier);
  362. error_setg(&job->blocker, "block device is in use by block job: %s",
  363. job_type_str(&job->job));
  364. block_job_add_bdrv(job, "main node", bs, 0, BLK_PERM_ALL, &error_abort);
  365. bs->job = job;
  366. bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
  367. blk_add_aio_context_notifier(blk, block_job_attached_aio_context,
  368. block_job_detach_aio_context, job);
  369. /* Only set speed when necessary to avoid NotSupported error */
  370. if (speed != 0) {
  371. Error *local_err = NULL;
  372. block_job_set_speed(job, speed, &local_err);
  373. if (local_err) {
  374. job_early_fail(&job->job);
  375. error_propagate(errp, local_err);
  376. return NULL;
  377. }
  378. }
  379. return job;
  380. }
  381. void block_job_iostatus_reset(BlockJob *job)
  382. {
  383. if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
  384. return;
  385. }
  386. assert(job->job.user_paused && job->job.pause_count > 0);
  387. job->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
  388. }
  389. void block_job_user_resume(Job *job)
  390. {
  391. BlockJob *bjob = container_of(job, BlockJob, job);
  392. block_job_iostatus_reset(bjob);
  393. }
  394. BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
  395. int is_read, int error)
  396. {
  397. BlockErrorAction action;
  398. switch (on_err) {
  399. case BLOCKDEV_ON_ERROR_ENOSPC:
  400. case BLOCKDEV_ON_ERROR_AUTO:
  401. action = (error == ENOSPC) ?
  402. BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
  403. break;
  404. case BLOCKDEV_ON_ERROR_STOP:
  405. action = BLOCK_ERROR_ACTION_STOP;
  406. break;
  407. case BLOCKDEV_ON_ERROR_REPORT:
  408. action = BLOCK_ERROR_ACTION_REPORT;
  409. break;
  410. case BLOCKDEV_ON_ERROR_IGNORE:
  411. action = BLOCK_ERROR_ACTION_IGNORE;
  412. break;
  413. default:
  414. abort();
  415. }
  416. if (!block_job_is_internal(job)) {
  417. qapi_event_send_block_job_error(job->job.id,
  418. is_read ? IO_OPERATION_TYPE_READ :
  419. IO_OPERATION_TYPE_WRITE,
  420. action);
  421. }
  422. if (action == BLOCK_ERROR_ACTION_STOP) {
  423. if (!job->job.user_paused) {
  424. job_pause(&job->job);
  425. /* make the pause user visible, which will be resumed from QMP. */
  426. job->job.user_paused = true;
  427. }
  428. block_job_iostatus_set_err(job, error);
  429. }
  430. return action;
  431. }