You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

cpus.c 63KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321
  1. /*
  2. * QEMU System Emulator
  3. *
  4. * Copyright (c) 2003-2008 Fabrice Bellard
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to deal
  8. * in the Software without restriction, including without limitation the rights
  9. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. * THE SOFTWARE.
  23. */
  24. #include "qemu/osdep.h"
  25. #include "qemu-common.h"
  26. #include "qemu/config-file.h"
  27. #include "migration/vmstate.h"
  28. #include "monitor/monitor.h"
  29. #include "qapi/error.h"
  30. #include "qapi/qapi-commands-misc.h"
  31. #include "qapi/qapi-events-run-state.h"
  32. #include "qapi/qmp/qerror.h"
  33. #include "qemu/error-report.h"
  34. #include "qemu/qemu-print.h"
  35. #include "sysemu/tcg.h"
  36. #include "sysemu/block-backend.h"
  37. #include "exec/gdbstub.h"
  38. #include "sysemu/dma.h"
  39. #include "sysemu/hw_accel.h"
  40. #include "sysemu/kvm.h"
  41. #include "sysemu/hax.h"
  42. #include "sysemu/hvf.h"
  43. #include "sysemu/whpx.h"
  44. #include "exec/exec-all.h"
  45. #include "qemu/thread.h"
  46. #include "sysemu/cpus.h"
  47. #include "sysemu/qtest.h"
  48. #include "qemu/main-loop.h"
  49. #include "qemu/option.h"
  50. #include "qemu/bitmap.h"
  51. #include "qemu/seqlock.h"
  52. #include "qemu/guest-random.h"
  53. #include "tcg.h"
  54. #include "hw/nmi.h"
  55. #include "sysemu/replay.h"
  56. #include "sysemu/runstate.h"
  57. #include "hw/boards.h"
  58. #include "hw/hw.h"
  59. #ifdef CONFIG_LINUX
  60. #include <sys/prctl.h>
  61. #ifndef PR_MCE_KILL
  62. #define PR_MCE_KILL 33
  63. #endif
  64. #ifndef PR_MCE_KILL_SET
  65. #define PR_MCE_KILL_SET 1
  66. #endif
  67. #ifndef PR_MCE_KILL_EARLY
  68. #define PR_MCE_KILL_EARLY 1
  69. #endif
  70. #endif /* CONFIG_LINUX */
  71. static QemuMutex qemu_global_mutex;
  72. int64_t max_delay;
  73. int64_t max_advance;
  74. /* vcpu throttling controls */
  75. static QEMUTimer *throttle_timer;
  76. static unsigned int throttle_percentage;
  77. #define CPU_THROTTLE_PCT_MIN 1
  78. #define CPU_THROTTLE_PCT_MAX 99
  79. #define CPU_THROTTLE_TIMESLICE_NS 10000000
  80. bool cpu_is_stopped(CPUState *cpu)
  81. {
  82. return cpu->stopped || !runstate_is_running();
  83. }
  84. static bool cpu_thread_is_idle(CPUState *cpu)
  85. {
  86. if (cpu->stop || cpu->queued_work_first) {
  87. return false;
  88. }
  89. if (cpu_is_stopped(cpu)) {
  90. return true;
  91. }
  92. if (!cpu->halted || cpu_has_work(cpu) ||
  93. kvm_halt_in_kernel()) {
  94. return false;
  95. }
  96. return true;
  97. }
  98. static bool all_cpu_threads_idle(void)
  99. {
  100. CPUState *cpu;
  101. CPU_FOREACH(cpu) {
  102. if (!cpu_thread_is_idle(cpu)) {
  103. return false;
  104. }
  105. }
  106. return true;
  107. }
  108. /***********************************************************/
  109. /* guest cycle counter */
  110. /* Protected by TimersState seqlock */
  111. static bool icount_sleep = true;
  112. /* Arbitrarily pick 1MIPS as the minimum allowable speed. */
  113. #define MAX_ICOUNT_SHIFT 10
  114. typedef struct TimersState {
  115. /* Protected by BQL. */
  116. int64_t cpu_ticks_prev;
  117. int64_t cpu_ticks_offset;
  118. /* Protect fields that can be respectively read outside the
  119. * BQL, and written from multiple threads.
  120. */
  121. QemuSeqLock vm_clock_seqlock;
  122. QemuSpin vm_clock_lock;
  123. int16_t cpu_ticks_enabled;
  124. /* Conversion factor from emulated instructions to virtual clock ticks. */
  125. int16_t icount_time_shift;
  126. /* Compensate for varying guest execution speed. */
  127. int64_t qemu_icount_bias;
  128. int64_t vm_clock_warp_start;
  129. int64_t cpu_clock_offset;
  130. /* Only written by TCG thread */
  131. int64_t qemu_icount;
  132. /* for adjusting icount */
  133. QEMUTimer *icount_rt_timer;
  134. QEMUTimer *icount_vm_timer;
  135. QEMUTimer *icount_warp_timer;
  136. } TimersState;
  137. static TimersState timers_state;
  138. bool mttcg_enabled;
  139. /*
  140. * We default to false if we know other options have been enabled
  141. * which are currently incompatible with MTTCG. Otherwise when each
  142. * guest (target) has been updated to support:
  143. * - atomic instructions
  144. * - memory ordering primitives (barriers)
  145. * they can set the appropriate CONFIG flags in ${target}-softmmu.mak
  146. *
  147. * Once a guest architecture has been converted to the new primitives
  148. * there are two remaining limitations to check.
  149. *
  150. * - The guest can't be oversized (e.g. 64 bit guest on 32 bit host)
  151. * - The host must have a stronger memory order than the guest
  152. *
  153. * It may be possible in future to support strong guests on weak hosts
  154. * but that will require tagging all load/stores in a guest with their
  155. * implicit memory order requirements which would likely slow things
  156. * down a lot.
  157. */
  158. static bool check_tcg_memory_orders_compatible(void)
  159. {
  160. #if defined(TCG_GUEST_DEFAULT_MO) && defined(TCG_TARGET_DEFAULT_MO)
  161. return (TCG_GUEST_DEFAULT_MO & ~TCG_TARGET_DEFAULT_MO) == 0;
  162. #else
  163. return false;
  164. #endif
  165. }
  166. static bool default_mttcg_enabled(void)
  167. {
  168. if (use_icount || TCG_OVERSIZED_GUEST) {
  169. return false;
  170. } else {
  171. #ifdef TARGET_SUPPORTS_MTTCG
  172. return check_tcg_memory_orders_compatible();
  173. #else
  174. return false;
  175. #endif
  176. }
  177. }
  178. void qemu_tcg_configure(QemuOpts *opts, Error **errp)
  179. {
  180. const char *t = qemu_opt_get(opts, "thread");
  181. if (t) {
  182. if (strcmp(t, "multi") == 0) {
  183. if (TCG_OVERSIZED_GUEST) {
  184. error_setg(errp, "No MTTCG when guest word size > hosts");
  185. } else if (use_icount) {
  186. error_setg(errp, "No MTTCG when icount is enabled");
  187. } else {
  188. #ifndef TARGET_SUPPORTS_MTTCG
  189. warn_report("Guest not yet converted to MTTCG - "
  190. "you may get unexpected results");
  191. #endif
  192. if (!check_tcg_memory_orders_compatible()) {
  193. warn_report("Guest expects a stronger memory ordering "
  194. "than the host provides");
  195. error_printf("This may cause strange/hard to debug errors\n");
  196. }
  197. mttcg_enabled = true;
  198. }
  199. } else if (strcmp(t, "single") == 0) {
  200. mttcg_enabled = false;
  201. } else {
  202. error_setg(errp, "Invalid 'thread' setting %s", t);
  203. }
  204. } else {
  205. mttcg_enabled = default_mttcg_enabled();
  206. }
  207. }
  208. /* The current number of executed instructions is based on what we
  209. * originally budgeted minus the current state of the decrementing
  210. * icount counters in extra/u16.low.
  211. */
  212. static int64_t cpu_get_icount_executed(CPUState *cpu)
  213. {
  214. return (cpu->icount_budget -
  215. (cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra));
  216. }
  217. /*
  218. * Update the global shared timer_state.qemu_icount to take into
  219. * account executed instructions. This is done by the TCG vCPU
  220. * thread so the main-loop can see time has moved forward.
  221. */
  222. static void cpu_update_icount_locked(CPUState *cpu)
  223. {
  224. int64_t executed = cpu_get_icount_executed(cpu);
  225. cpu->icount_budget -= executed;
  226. atomic_set_i64(&timers_state.qemu_icount,
  227. timers_state.qemu_icount + executed);
  228. }
  229. /*
  230. * Update the global shared timer_state.qemu_icount to take into
  231. * account executed instructions. This is done by the TCG vCPU
  232. * thread so the main-loop can see time has moved forward.
  233. */
  234. void cpu_update_icount(CPUState *cpu)
  235. {
  236. seqlock_write_lock(&timers_state.vm_clock_seqlock,
  237. &timers_state.vm_clock_lock);
  238. cpu_update_icount_locked(cpu);
  239. seqlock_write_unlock(&timers_state.vm_clock_seqlock,
  240. &timers_state.vm_clock_lock);
  241. }
  242. static int64_t cpu_get_icount_raw_locked(void)
  243. {
  244. CPUState *cpu = current_cpu;
  245. if (cpu && cpu->running) {
  246. if (!cpu->can_do_io) {
  247. error_report("Bad icount read");
  248. exit(1);
  249. }
  250. /* Take into account what has run */
  251. cpu_update_icount_locked(cpu);
  252. }
  253. /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
  254. return atomic_read_i64(&timers_state.qemu_icount);
  255. }
  256. static int64_t cpu_get_icount_locked(void)
  257. {
  258. int64_t icount = cpu_get_icount_raw_locked();
  259. return atomic_read_i64(&timers_state.qemu_icount_bias) +
  260. cpu_icount_to_ns(icount);
  261. }
  262. int64_t cpu_get_icount_raw(void)
  263. {
  264. int64_t icount;
  265. unsigned start;
  266. do {
  267. start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
  268. icount = cpu_get_icount_raw_locked();
  269. } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
  270. return icount;
  271. }
  272. /* Return the virtual CPU time, based on the instruction counter. */
  273. int64_t cpu_get_icount(void)
  274. {
  275. int64_t icount;
  276. unsigned start;
  277. do {
  278. start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
  279. icount = cpu_get_icount_locked();
  280. } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
  281. return icount;
  282. }
  283. int64_t cpu_icount_to_ns(int64_t icount)
  284. {
  285. return icount << atomic_read(&timers_state.icount_time_shift);
  286. }
  287. static int64_t cpu_get_ticks_locked(void)
  288. {
  289. int64_t ticks = timers_state.cpu_ticks_offset;
  290. if (timers_state.cpu_ticks_enabled) {
  291. ticks += cpu_get_host_ticks();
  292. }
  293. if (timers_state.cpu_ticks_prev > ticks) {
  294. /* Non increasing ticks may happen if the host uses software suspend. */
  295. timers_state.cpu_ticks_offset += timers_state.cpu_ticks_prev - ticks;
  296. ticks = timers_state.cpu_ticks_prev;
  297. }
  298. timers_state.cpu_ticks_prev = ticks;
  299. return ticks;
  300. }
  301. /* return the time elapsed in VM between vm_start and vm_stop. Unless
  302. * icount is active, cpu_get_ticks() uses units of the host CPU cycle
  303. * counter.
  304. */
  305. int64_t cpu_get_ticks(void)
  306. {
  307. int64_t ticks;
  308. if (use_icount) {
  309. return cpu_get_icount();
  310. }
  311. qemu_spin_lock(&timers_state.vm_clock_lock);
  312. ticks = cpu_get_ticks_locked();
  313. qemu_spin_unlock(&timers_state.vm_clock_lock);
  314. return ticks;
  315. }
  316. static int64_t cpu_get_clock_locked(void)
  317. {
  318. int64_t time;
  319. time = timers_state.cpu_clock_offset;
  320. if (timers_state.cpu_ticks_enabled) {
  321. time += get_clock();
  322. }
  323. return time;
  324. }
  325. /* Return the monotonic time elapsed in VM, i.e.,
  326. * the time between vm_start and vm_stop
  327. */
  328. int64_t cpu_get_clock(void)
  329. {
  330. int64_t ti;
  331. unsigned start;
  332. do {
  333. start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
  334. ti = cpu_get_clock_locked();
  335. } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
  336. return ti;
  337. }
  338. /* enable cpu_get_ticks()
  339. * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
  340. */
  341. void cpu_enable_ticks(void)
  342. {
  343. seqlock_write_lock(&timers_state.vm_clock_seqlock,
  344. &timers_state.vm_clock_lock);
  345. if (!timers_state.cpu_ticks_enabled) {
  346. timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
  347. timers_state.cpu_clock_offset -= get_clock();
  348. timers_state.cpu_ticks_enabled = 1;
  349. }
  350. seqlock_write_unlock(&timers_state.vm_clock_seqlock,
  351. &timers_state.vm_clock_lock);
  352. }
  353. /* disable cpu_get_ticks() : the clock is stopped. You must not call
  354. * cpu_get_ticks() after that.
  355. * Caller must hold BQL which serves as mutex for vm_clock_seqlock.
  356. */
  357. void cpu_disable_ticks(void)
  358. {
  359. seqlock_write_lock(&timers_state.vm_clock_seqlock,
  360. &timers_state.vm_clock_lock);
  361. if (timers_state.cpu_ticks_enabled) {
  362. timers_state.cpu_ticks_offset += cpu_get_host_ticks();
  363. timers_state.cpu_clock_offset = cpu_get_clock_locked();
  364. timers_state.cpu_ticks_enabled = 0;
  365. }
  366. seqlock_write_unlock(&timers_state.vm_clock_seqlock,
  367. &timers_state.vm_clock_lock);
  368. }
  369. /* Correlation between real and virtual time is always going to be
  370. fairly approximate, so ignore small variation.
  371. When the guest is idle real and virtual time will be aligned in
  372. the IO wait loop. */
  373. #define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
  374. static void icount_adjust(void)
  375. {
  376. int64_t cur_time;
  377. int64_t cur_icount;
  378. int64_t delta;
  379. /* Protected by TimersState mutex. */
  380. static int64_t last_delta;
  381. /* If the VM is not running, then do nothing. */
  382. if (!runstate_is_running()) {
  383. return;
  384. }
  385. seqlock_write_lock(&timers_state.vm_clock_seqlock,
  386. &timers_state.vm_clock_lock);
  387. cur_time = cpu_get_clock_locked();
  388. cur_icount = cpu_get_icount_locked();
  389. delta = cur_icount - cur_time;
  390. /* FIXME: This is a very crude algorithm, somewhat prone to oscillation. */
  391. if (delta > 0
  392. && last_delta + ICOUNT_WOBBLE < delta * 2
  393. && timers_state.icount_time_shift > 0) {
  394. /* The guest is getting too far ahead. Slow time down. */
  395. atomic_set(&timers_state.icount_time_shift,
  396. timers_state.icount_time_shift - 1);
  397. }
  398. if (delta < 0
  399. && last_delta - ICOUNT_WOBBLE > delta * 2
  400. && timers_state.icount_time_shift < MAX_ICOUNT_SHIFT) {
  401. /* The guest is getting too far behind. Speed time up. */
  402. atomic_set(&timers_state.icount_time_shift,
  403. timers_state.icount_time_shift + 1);
  404. }
  405. last_delta = delta;
  406. atomic_set_i64(&timers_state.qemu_icount_bias,
  407. cur_icount - (timers_state.qemu_icount
  408. << timers_state.icount_time_shift));
  409. seqlock_write_unlock(&timers_state.vm_clock_seqlock,
  410. &timers_state.vm_clock_lock);
  411. }
  412. static void icount_adjust_rt(void *opaque)
  413. {
  414. timer_mod(timers_state.icount_rt_timer,
  415. qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
  416. icount_adjust();
  417. }
  418. static void icount_adjust_vm(void *opaque)
  419. {
  420. timer_mod(timers_state.icount_vm_timer,
  421. qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
  422. NANOSECONDS_PER_SECOND / 10);
  423. icount_adjust();
  424. }
  425. static int64_t qemu_icount_round(int64_t count)
  426. {
  427. int shift = atomic_read(&timers_state.icount_time_shift);
  428. return (count + (1 << shift) - 1) >> shift;
  429. }
  430. static void icount_warp_rt(void)
  431. {
  432. unsigned seq;
  433. int64_t warp_start;
  434. /* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
  435. * changes from -1 to another value, so the race here is okay.
  436. */
  437. do {
  438. seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
  439. warp_start = timers_state.vm_clock_warp_start;
  440. } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
  441. if (warp_start == -1) {
  442. return;
  443. }
  444. seqlock_write_lock(&timers_state.vm_clock_seqlock,
  445. &timers_state.vm_clock_lock);
  446. if (runstate_is_running()) {
  447. int64_t clock = REPLAY_CLOCK_LOCKED(REPLAY_CLOCK_VIRTUAL_RT,
  448. cpu_get_clock_locked());
  449. int64_t warp_delta;
  450. warp_delta = clock - timers_state.vm_clock_warp_start;
  451. if (use_icount == 2) {
  452. /*
  453. * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
  454. * far ahead of real time.
  455. */
  456. int64_t cur_icount = cpu_get_icount_locked();
  457. int64_t delta = clock - cur_icount;
  458. warp_delta = MIN(warp_delta, delta);
  459. }
  460. atomic_set_i64(&timers_state.qemu_icount_bias,
  461. timers_state.qemu_icount_bias + warp_delta);
  462. }
  463. timers_state.vm_clock_warp_start = -1;
  464. seqlock_write_unlock(&timers_state.vm_clock_seqlock,
  465. &timers_state.vm_clock_lock);
  466. if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
  467. qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
  468. }
  469. }
  470. static void icount_timer_cb(void *opaque)
  471. {
  472. /* No need for a checkpoint because the timer already synchronizes
  473. * with CHECKPOINT_CLOCK_VIRTUAL_RT.
  474. */
  475. icount_warp_rt();
  476. }
  477. void qtest_clock_warp(int64_t dest)
  478. {
  479. int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
  480. AioContext *aio_context;
  481. assert(qtest_enabled());
  482. aio_context = qemu_get_aio_context();
  483. while (clock < dest) {
  484. int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
  485. QEMU_TIMER_ATTR_ALL);
  486. int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
  487. seqlock_write_lock(&timers_state.vm_clock_seqlock,
  488. &timers_state.vm_clock_lock);
  489. atomic_set_i64(&timers_state.qemu_icount_bias,
  490. timers_state.qemu_icount_bias + warp);
  491. seqlock_write_unlock(&timers_state.vm_clock_seqlock,
  492. &timers_state.vm_clock_lock);
  493. qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
  494. timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
  495. clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
  496. }
  497. qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
  498. }
  499. void qemu_start_warp_timer(void)
  500. {
  501. int64_t clock;
  502. int64_t deadline;
  503. if (!use_icount) {
  504. return;
  505. }
  506. /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
  507. * do not fire, so computing the deadline does not make sense.
  508. */
  509. if (!runstate_is_running()) {
  510. return;
  511. }
  512. if (replay_mode != REPLAY_MODE_PLAY) {
  513. if (!all_cpu_threads_idle()) {
  514. return;
  515. }
  516. if (qtest_enabled()) {
  517. /* When testing, qtest commands advance icount. */
  518. return;
  519. }
  520. replay_checkpoint(CHECKPOINT_CLOCK_WARP_START);
  521. } else {
  522. /* warp clock deterministically in record/replay mode */
  523. if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
  524. /* vCPU is sleeping and warp can't be started.
  525. It is probably a race condition: notification sent
  526. to vCPU was processed in advance and vCPU went to sleep.
  527. Therefore we have to wake it up for doing someting. */
  528. if (replay_has_checkpoint()) {
  529. qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
  530. }
  531. return;
  532. }
  533. }
  534. /* We want to use the earliest deadline from ALL vm_clocks */
  535. clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
  536. deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
  537. ~QEMU_TIMER_ATTR_EXTERNAL);
  538. if (deadline < 0) {
  539. static bool notified;
  540. if (!icount_sleep && !notified) {
  541. warn_report("icount sleep disabled and no active timers");
  542. notified = true;
  543. }
  544. return;
  545. }
  546. if (deadline > 0) {
  547. /*
  548. * Ensure QEMU_CLOCK_VIRTUAL proceeds even when the virtual CPU goes to
  549. * sleep. Otherwise, the CPU might be waiting for a future timer
  550. * interrupt to wake it up, but the interrupt never comes because
  551. * the vCPU isn't running any insns and thus doesn't advance the
  552. * QEMU_CLOCK_VIRTUAL.
  553. */
  554. if (!icount_sleep) {
  555. /*
  556. * We never let VCPUs sleep in no sleep icount mode.
  557. * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
  558. * to the next QEMU_CLOCK_VIRTUAL event and notify it.
  559. * It is useful when we want a deterministic execution time,
  560. * isolated from host latencies.
  561. */
  562. seqlock_write_lock(&timers_state.vm_clock_seqlock,
  563. &timers_state.vm_clock_lock);
  564. atomic_set_i64(&timers_state.qemu_icount_bias,
  565. timers_state.qemu_icount_bias + deadline);
  566. seqlock_write_unlock(&timers_state.vm_clock_seqlock,
  567. &timers_state.vm_clock_lock);
  568. qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
  569. } else {
  570. /*
  571. * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
  572. * "real" time, (related to the time left until the next event) has
  573. * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
  574. * This avoids that the warps are visible externally; for example,
  575. * you will not be sending network packets continuously instead of
  576. * every 100ms.
  577. */
  578. seqlock_write_lock(&timers_state.vm_clock_seqlock,
  579. &timers_state.vm_clock_lock);
  580. if (timers_state.vm_clock_warp_start == -1
  581. || timers_state.vm_clock_warp_start > clock) {
  582. timers_state.vm_clock_warp_start = clock;
  583. }
  584. seqlock_write_unlock(&timers_state.vm_clock_seqlock,
  585. &timers_state.vm_clock_lock);
  586. timer_mod_anticipate(timers_state.icount_warp_timer,
  587. clock + deadline);
  588. }
  589. } else if (deadline == 0) {
  590. qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
  591. }
  592. }
  593. static void qemu_account_warp_timer(void)
  594. {
  595. if (!use_icount || !icount_sleep) {
  596. return;
  597. }
  598. /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
  599. * do not fire, so computing the deadline does not make sense.
  600. */
  601. if (!runstate_is_running()) {
  602. return;
  603. }
  604. /* warp clock deterministically in record/replay mode */
  605. if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
  606. return;
  607. }
  608. timer_del(timers_state.icount_warp_timer);
  609. icount_warp_rt();
  610. }
  611. static bool icount_state_needed(void *opaque)
  612. {
  613. return use_icount;
  614. }
  615. static bool warp_timer_state_needed(void *opaque)
  616. {
  617. TimersState *s = opaque;
  618. return s->icount_warp_timer != NULL;
  619. }
  620. static bool adjust_timers_state_needed(void *opaque)
  621. {
  622. TimersState *s = opaque;
  623. return s->icount_rt_timer != NULL;
  624. }
  625. /*
  626. * Subsection for warp timer migration is optional, because may not be created
  627. */
  628. static const VMStateDescription icount_vmstate_warp_timer = {
  629. .name = "timer/icount/warp_timer",
  630. .version_id = 1,
  631. .minimum_version_id = 1,
  632. .needed = warp_timer_state_needed,
  633. .fields = (VMStateField[]) {
  634. VMSTATE_INT64(vm_clock_warp_start, TimersState),
  635. VMSTATE_TIMER_PTR(icount_warp_timer, TimersState),
  636. VMSTATE_END_OF_LIST()
  637. }
  638. };
  639. static const VMStateDescription icount_vmstate_adjust_timers = {
  640. .name = "timer/icount/timers",
  641. .version_id = 1,
  642. .minimum_version_id = 1,
  643. .needed = adjust_timers_state_needed,
  644. .fields = (VMStateField[]) {
  645. VMSTATE_TIMER_PTR(icount_rt_timer, TimersState),
  646. VMSTATE_TIMER_PTR(icount_vm_timer, TimersState),
  647. VMSTATE_END_OF_LIST()
  648. }
  649. };
  650. /*
  651. * This is a subsection for icount migration.
  652. */
  653. static const VMStateDescription icount_vmstate_timers = {
  654. .name = "timer/icount",
  655. .version_id = 1,
  656. .minimum_version_id = 1,
  657. .needed = icount_state_needed,
  658. .fields = (VMStateField[]) {
  659. VMSTATE_INT64(qemu_icount_bias, TimersState),
  660. VMSTATE_INT64(qemu_icount, TimersState),
  661. VMSTATE_END_OF_LIST()
  662. },
  663. .subsections = (const VMStateDescription*[]) {
  664. &icount_vmstate_warp_timer,
  665. &icount_vmstate_adjust_timers,
  666. NULL
  667. }
  668. };
  669. static const VMStateDescription vmstate_timers = {
  670. .name = "timer",
  671. .version_id = 2,
  672. .minimum_version_id = 1,
  673. .fields = (VMStateField[]) {
  674. VMSTATE_INT64(cpu_ticks_offset, TimersState),
  675. VMSTATE_UNUSED(8),
  676. VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
  677. VMSTATE_END_OF_LIST()
  678. },
  679. .subsections = (const VMStateDescription*[]) {
  680. &icount_vmstate_timers,
  681. NULL
  682. }
  683. };
  684. static void cpu_throttle_thread(CPUState *cpu, run_on_cpu_data opaque)
  685. {
  686. double pct;
  687. double throttle_ratio;
  688. int64_t sleeptime_ns, endtime_ns;
  689. if (!cpu_throttle_get_percentage()) {
  690. return;
  691. }
  692. pct = (double)cpu_throttle_get_percentage()/100;
  693. throttle_ratio = pct / (1 - pct);
  694. /* Add 1ns to fix double's rounding error (like 0.9999999...) */
  695. sleeptime_ns = (int64_t)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS + 1);
  696. endtime_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + sleeptime_ns;
  697. while (sleeptime_ns > 0 && !cpu->stop) {
  698. if (sleeptime_ns > SCALE_MS) {
  699. qemu_cond_timedwait(cpu->halt_cond, &qemu_global_mutex,
  700. sleeptime_ns / SCALE_MS);
  701. } else {
  702. qemu_mutex_unlock_iothread();
  703. g_usleep(sleeptime_ns / SCALE_US);
  704. qemu_mutex_lock_iothread();
  705. }
  706. sleeptime_ns = endtime_ns - qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
  707. }
  708. atomic_set(&cpu->throttle_thread_scheduled, 0);
  709. }
  710. static void cpu_throttle_timer_tick(void *opaque)
  711. {
  712. CPUState *cpu;
  713. double pct;
  714. /* Stop the timer if needed */
  715. if (!cpu_throttle_get_percentage()) {
  716. return;
  717. }
  718. CPU_FOREACH(cpu) {
  719. if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
  720. async_run_on_cpu(cpu, cpu_throttle_thread,
  721. RUN_ON_CPU_NULL);
  722. }
  723. }
  724. pct = (double)cpu_throttle_get_percentage()/100;
  725. timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
  726. CPU_THROTTLE_TIMESLICE_NS / (1-pct));
  727. }
  728. void cpu_throttle_set(int new_throttle_pct)
  729. {
  730. /* Ensure throttle percentage is within valid range */
  731. new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
  732. new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
  733. atomic_set(&throttle_percentage, new_throttle_pct);
  734. timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
  735. CPU_THROTTLE_TIMESLICE_NS);
  736. }
  737. void cpu_throttle_stop(void)
  738. {
  739. atomic_set(&throttle_percentage, 0);
  740. }
  741. bool cpu_throttle_active(void)
  742. {
  743. return (cpu_throttle_get_percentage() != 0);
  744. }
  745. int cpu_throttle_get_percentage(void)
  746. {
  747. return atomic_read(&throttle_percentage);
  748. }
  749. void cpu_ticks_init(void)
  750. {
  751. seqlock_init(&timers_state.vm_clock_seqlock);
  752. qemu_spin_init(&timers_state.vm_clock_lock);
  753. vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
  754. throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
  755. cpu_throttle_timer_tick, NULL);
  756. }
  757. void configure_icount(QemuOpts *opts, Error **errp)
  758. {
  759. const char *option;
  760. char *rem_str = NULL;
  761. option = qemu_opt_get(opts, "shift");
  762. if (!option) {
  763. if (qemu_opt_get(opts, "align") != NULL) {
  764. error_setg(errp, "Please specify shift option when using align");
  765. }
  766. return;
  767. }
  768. icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
  769. if (icount_sleep) {
  770. timers_state.icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
  771. icount_timer_cb, NULL);
  772. }
  773. icount_align_option = qemu_opt_get_bool(opts, "align", false);
  774. if (icount_align_option && !icount_sleep) {
  775. error_setg(errp, "align=on and sleep=off are incompatible");
  776. }
  777. if (strcmp(option, "auto") != 0) {
  778. errno = 0;
  779. timers_state.icount_time_shift = strtol(option, &rem_str, 0);
  780. if (errno != 0 || *rem_str != '\0' || !strlen(option)) {
  781. error_setg(errp, "icount: Invalid shift value");
  782. }
  783. use_icount = 1;
  784. return;
  785. } else if (icount_align_option) {
  786. error_setg(errp, "shift=auto and align=on are incompatible");
  787. } else if (!icount_sleep) {
  788. error_setg(errp, "shift=auto and sleep=off are incompatible");
  789. }
  790. use_icount = 2;
  791. /* 125MIPS seems a reasonable initial guess at the guest speed.
  792. It will be corrected fairly quickly anyway. */
  793. timers_state.icount_time_shift = 3;
  794. /* Have both realtime and virtual time triggers for speed adjustment.
  795. The realtime trigger catches emulated time passing too slowly,
  796. the virtual time trigger catches emulated time passing too fast.
  797. Realtime triggers occur even when idle, so use them less frequently
  798. than VM triggers. */
  799. timers_state.vm_clock_warp_start = -1;
  800. timers_state.icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
  801. icount_adjust_rt, NULL);
  802. timer_mod(timers_state.icount_rt_timer,
  803. qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
  804. timers_state.icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
  805. icount_adjust_vm, NULL);
  806. timer_mod(timers_state.icount_vm_timer,
  807. qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
  808. NANOSECONDS_PER_SECOND / 10);
  809. }
  810. /***********************************************************/
  811. /* TCG vCPU kick timer
  812. *
  813. * The kick timer is responsible for moving single threaded vCPU
  814. * emulation on to the next vCPU. If more than one vCPU is running a
  815. * timer event with force a cpu->exit so the next vCPU can get
  816. * scheduled.
  817. *
  818. * The timer is removed if all vCPUs are idle and restarted again once
  819. * idleness is complete.
  820. */
  821. static QEMUTimer *tcg_kick_vcpu_timer;
  822. static CPUState *tcg_current_rr_cpu;
  823. #define TCG_KICK_PERIOD (NANOSECONDS_PER_SECOND / 10)
  824. static inline int64_t qemu_tcg_next_kick(void)
  825. {
  826. return qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + TCG_KICK_PERIOD;
  827. }
  828. /* Kick the currently round-robin scheduled vCPU */
  829. static void qemu_cpu_kick_rr_cpu(void)
  830. {
  831. CPUState *cpu;
  832. do {
  833. cpu = atomic_mb_read(&tcg_current_rr_cpu);
  834. if (cpu) {
  835. cpu_exit(cpu);
  836. }
  837. } while (cpu != atomic_mb_read(&tcg_current_rr_cpu));
  838. }
  839. static void do_nothing(CPUState *cpu, run_on_cpu_data unused)
  840. {
  841. }
  842. void qemu_timer_notify_cb(void *opaque, QEMUClockType type)
  843. {
  844. if (!use_icount || type != QEMU_CLOCK_VIRTUAL) {
  845. qemu_notify_event();
  846. return;
  847. }
  848. if (qemu_in_vcpu_thread()) {
  849. /* A CPU is currently running; kick it back out to the
  850. * tcg_cpu_exec() loop so it will recalculate its
  851. * icount deadline immediately.
  852. */
  853. qemu_cpu_kick(current_cpu);
  854. } else if (first_cpu) {
  855. /* qemu_cpu_kick is not enough to kick a halted CPU out of
  856. * qemu_tcg_wait_io_event. async_run_on_cpu, instead,
  857. * causes cpu_thread_is_idle to return false. This way,
  858. * handle_icount_deadline can run.
  859. * If we have no CPUs at all for some reason, we don't
  860. * need to do anything.
  861. */
  862. async_run_on_cpu(first_cpu, do_nothing, RUN_ON_CPU_NULL);
  863. }
  864. }
  865. static void kick_tcg_thread(void *opaque)
  866. {
  867. timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
  868. qemu_cpu_kick_rr_cpu();
  869. }
  870. static void start_tcg_kick_timer(void)
  871. {
  872. assert(!mttcg_enabled);
  873. if (!tcg_kick_vcpu_timer && CPU_NEXT(first_cpu)) {
  874. tcg_kick_vcpu_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
  875. kick_tcg_thread, NULL);
  876. }
  877. if (tcg_kick_vcpu_timer && !timer_pending(tcg_kick_vcpu_timer)) {
  878. timer_mod(tcg_kick_vcpu_timer, qemu_tcg_next_kick());
  879. }
  880. }
  881. static void stop_tcg_kick_timer(void)
  882. {
  883. assert(!mttcg_enabled);
  884. if (tcg_kick_vcpu_timer && timer_pending(tcg_kick_vcpu_timer)) {
  885. timer_del(tcg_kick_vcpu_timer);
  886. }
  887. }
  888. /***********************************************************/
  889. void hw_error(const char *fmt, ...)
  890. {
  891. va_list ap;
  892. CPUState *cpu;
  893. va_start(ap, fmt);
  894. fprintf(stderr, "qemu: hardware error: ");
  895. vfprintf(stderr, fmt, ap);
  896. fprintf(stderr, "\n");
  897. CPU_FOREACH(cpu) {
  898. fprintf(stderr, "CPU #%d:\n", cpu->cpu_index);
  899. cpu_dump_state(cpu, stderr, CPU_DUMP_FPU);
  900. }
  901. va_end(ap);
  902. abort();
  903. }
  904. void cpu_synchronize_all_states(void)
  905. {
  906. CPUState *cpu;
  907. CPU_FOREACH(cpu) {
  908. cpu_synchronize_state(cpu);
  909. /* TODO: move to cpu_synchronize_state() */
  910. if (hvf_enabled()) {
  911. hvf_cpu_synchronize_state(cpu);
  912. }
  913. }
  914. }
  915. void cpu_synchronize_all_post_reset(void)
  916. {
  917. CPUState *cpu;
  918. CPU_FOREACH(cpu) {
  919. cpu_synchronize_post_reset(cpu);
  920. /* TODO: move to cpu_synchronize_post_reset() */
  921. if (hvf_enabled()) {
  922. hvf_cpu_synchronize_post_reset(cpu);
  923. }
  924. }
  925. }
  926. void cpu_synchronize_all_post_init(void)
  927. {
  928. CPUState *cpu;
  929. CPU_FOREACH(cpu) {
  930. cpu_synchronize_post_init(cpu);
  931. /* TODO: move to cpu_synchronize_post_init() */
  932. if (hvf_enabled()) {
  933. hvf_cpu_synchronize_post_init(cpu);
  934. }
  935. }
  936. }
  937. void cpu_synchronize_all_pre_loadvm(void)
  938. {
  939. CPUState *cpu;
  940. CPU_FOREACH(cpu) {
  941. cpu_synchronize_pre_loadvm(cpu);
  942. }
  943. }
  944. static int do_vm_stop(RunState state, bool send_stop)
  945. {
  946. int ret = 0;
  947. if (runstate_is_running()) {
  948. cpu_disable_ticks();
  949. pause_all_vcpus();
  950. runstate_set(state);
  951. vm_state_notify(0, state);
  952. if (send_stop) {
  953. qapi_event_send_stop();
  954. }
  955. }
  956. bdrv_drain_all();
  957. replay_disable_events();
  958. ret = bdrv_flush_all();
  959. return ret;
  960. }
  961. /* Special vm_stop() variant for terminating the process. Historically clients
  962. * did not expect a QMP STOP event and so we need to retain compatibility.
  963. */
  964. int vm_shutdown(void)
  965. {
  966. return do_vm_stop(RUN_STATE_SHUTDOWN, false);
  967. }
  968. static bool cpu_can_run(CPUState *cpu)
  969. {
  970. if (cpu->stop) {
  971. return false;
  972. }
  973. if (cpu_is_stopped(cpu)) {
  974. return false;
  975. }
  976. return true;
  977. }
  978. static void cpu_handle_guest_debug(CPUState *cpu)
  979. {
  980. gdb_set_stop_cpu(cpu);
  981. qemu_system_debug_request();
  982. cpu->stopped = true;
  983. }
  984. #ifdef CONFIG_LINUX
  985. static void sigbus_reraise(void)
  986. {
  987. sigset_t set;
  988. struct sigaction action;
  989. memset(&action, 0, sizeof(action));
  990. action.sa_handler = SIG_DFL;
  991. if (!sigaction(SIGBUS, &action, NULL)) {
  992. raise(SIGBUS);
  993. sigemptyset(&set);
  994. sigaddset(&set, SIGBUS);
  995. pthread_sigmask(SIG_UNBLOCK, &set, NULL);
  996. }
  997. perror("Failed to re-raise SIGBUS!\n");
  998. abort();
  999. }
  1000. static void sigbus_handler(int n, siginfo_t *siginfo, void *ctx)
  1001. {
  1002. if (siginfo->si_code != BUS_MCEERR_AO && siginfo->si_code != BUS_MCEERR_AR) {
  1003. sigbus_reraise();
  1004. }
  1005. if (current_cpu) {
  1006. /* Called asynchronously in VCPU thread. */
  1007. if (kvm_on_sigbus_vcpu(current_cpu, siginfo->si_code, siginfo->si_addr)) {
  1008. sigbus_reraise();
  1009. }
  1010. } else {
  1011. /* Called synchronously (via signalfd) in main thread. */
  1012. if (kvm_on_sigbus(siginfo->si_code, siginfo->si_addr)) {
  1013. sigbus_reraise();
  1014. }
  1015. }
  1016. }
  1017. static void qemu_init_sigbus(void)
  1018. {
  1019. struct sigaction action;
  1020. memset(&action, 0, sizeof(action));
  1021. action.sa_flags = SA_SIGINFO;
  1022. action.sa_sigaction = sigbus_handler;
  1023. sigaction(SIGBUS, &action, NULL);
  1024. prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
  1025. }
  1026. #else /* !CONFIG_LINUX */
  1027. static void qemu_init_sigbus(void)
  1028. {
  1029. }
  1030. #endif /* !CONFIG_LINUX */
  1031. static QemuThread io_thread;
  1032. /* cpu creation */
  1033. static QemuCond qemu_cpu_cond;
  1034. /* system init */
  1035. static QemuCond qemu_pause_cond;
  1036. void qemu_init_cpu_loop(void)
  1037. {
  1038. qemu_init_sigbus();
  1039. qemu_cond_init(&qemu_cpu_cond);
  1040. qemu_cond_init(&qemu_pause_cond);
  1041. qemu_mutex_init(&qemu_global_mutex);
  1042. qemu_thread_get_self(&io_thread);
  1043. }
  1044. void run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
  1045. {
  1046. do_run_on_cpu(cpu, func, data, &qemu_global_mutex);
  1047. }
  1048. static void qemu_kvm_destroy_vcpu(CPUState *cpu)
  1049. {
  1050. if (kvm_destroy_vcpu(cpu) < 0) {
  1051. error_report("kvm_destroy_vcpu failed");
  1052. exit(EXIT_FAILURE);
  1053. }
  1054. }
  1055. static void qemu_tcg_destroy_vcpu(CPUState *cpu)
  1056. {
  1057. }
  1058. static void qemu_cpu_stop(CPUState *cpu, bool exit)
  1059. {
  1060. g_assert(qemu_cpu_is_self(cpu));
  1061. cpu->stop = false;
  1062. cpu->stopped = true;
  1063. if (exit) {
  1064. cpu_exit(cpu);
  1065. }
  1066. qemu_cond_broadcast(&qemu_pause_cond);
  1067. }
  1068. static void qemu_wait_io_event_common(CPUState *cpu)
  1069. {
  1070. atomic_mb_set(&cpu->thread_kicked, false);
  1071. if (cpu->stop) {
  1072. qemu_cpu_stop(cpu, false);
  1073. }
  1074. process_queued_cpu_work(cpu);
  1075. }
  1076. static void qemu_tcg_rr_wait_io_event(void)
  1077. {
  1078. CPUState *cpu;
  1079. while (all_cpu_threads_idle()) {
  1080. stop_tcg_kick_timer();
  1081. qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
  1082. }
  1083. start_tcg_kick_timer();
  1084. CPU_FOREACH(cpu) {
  1085. qemu_wait_io_event_common(cpu);
  1086. }
  1087. }
  1088. static void qemu_wait_io_event(CPUState *cpu)
  1089. {
  1090. while (cpu_thread_is_idle(cpu)) {
  1091. qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
  1092. }
  1093. #ifdef _WIN32
  1094. /* Eat dummy APC queued by qemu_cpu_kick_thread. */
  1095. if (!tcg_enabled()) {
  1096. SleepEx(0, TRUE);
  1097. }
  1098. #endif
  1099. qemu_wait_io_event_common(cpu);
  1100. }
  1101. static void *qemu_kvm_cpu_thread_fn(void *arg)
  1102. {
  1103. CPUState *cpu = arg;
  1104. int r;
  1105. rcu_register_thread();
  1106. qemu_mutex_lock_iothread();
  1107. qemu_thread_get_self(cpu->thread);
  1108. cpu->thread_id = qemu_get_thread_id();
  1109. cpu->can_do_io = 1;
  1110. current_cpu = cpu;
  1111. r = kvm_init_vcpu(cpu);
  1112. if (r < 0) {
  1113. error_report("kvm_init_vcpu failed: %s", strerror(-r));
  1114. exit(1);
  1115. }
  1116. kvm_init_cpu_signals(cpu);
  1117. /* signal CPU creation */
  1118. cpu->created = true;
  1119. qemu_cond_signal(&qemu_cpu_cond);
  1120. qemu_guest_random_seed_thread_part2(cpu->random_seed);
  1121. do {
  1122. if (cpu_can_run(cpu)) {
  1123. r = kvm_cpu_exec(cpu);
  1124. if (r == EXCP_DEBUG) {
  1125. cpu_handle_guest_debug(cpu);
  1126. }
  1127. }
  1128. qemu_wait_io_event(cpu);
  1129. } while (!cpu->unplug || cpu_can_run(cpu));
  1130. qemu_kvm_destroy_vcpu(cpu);
  1131. cpu->created = false;
  1132. qemu_cond_signal(&qemu_cpu_cond);
  1133. qemu_mutex_unlock_iothread();
  1134. rcu_unregister_thread();
  1135. return NULL;
  1136. }
  1137. static void *qemu_dummy_cpu_thread_fn(void *arg)
  1138. {
  1139. #ifdef _WIN32
  1140. error_report("qtest is not supported under Windows");
  1141. exit(1);
  1142. #else
  1143. CPUState *cpu = arg;
  1144. sigset_t waitset;
  1145. int r;
  1146. rcu_register_thread();
  1147. qemu_mutex_lock_iothread();
  1148. qemu_thread_get_self(cpu->thread);
  1149. cpu->thread_id = qemu_get_thread_id();
  1150. cpu->can_do_io = 1;
  1151. current_cpu = cpu;
  1152. sigemptyset(&waitset);
  1153. sigaddset(&waitset, SIG_IPI);
  1154. /* signal CPU creation */
  1155. cpu->created = true;
  1156. qemu_cond_signal(&qemu_cpu_cond);
  1157. qemu_guest_random_seed_thread_part2(cpu->random_seed);
  1158. do {
  1159. qemu_mutex_unlock_iothread();
  1160. do {
  1161. int sig;
  1162. r = sigwait(&waitset, &sig);
  1163. } while (r == -1 && (errno == EAGAIN || errno == EINTR));
  1164. if (r == -1) {
  1165. perror("sigwait");
  1166. exit(1);
  1167. }
  1168. qemu_mutex_lock_iothread();
  1169. qemu_wait_io_event(cpu);
  1170. } while (!cpu->unplug);
  1171. qemu_mutex_unlock_iothread();
  1172. rcu_unregister_thread();
  1173. return NULL;
  1174. #endif
  1175. }
  1176. static int64_t tcg_get_icount_limit(void)
  1177. {
  1178. int64_t deadline;
  1179. if (replay_mode != REPLAY_MODE_PLAY) {
  1180. /*
  1181. * Include all the timers, because they may need an attention.
  1182. * Too long CPU execution may create unnecessary delay in UI.
  1183. */
  1184. deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
  1185. QEMU_TIMER_ATTR_ALL);
  1186. /* Maintain prior (possibly buggy) behaviour where if no deadline
  1187. * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
  1188. * INT32_MAX nanoseconds ahead, we still use INT32_MAX
  1189. * nanoseconds.
  1190. */
  1191. if ((deadline < 0) || (deadline > INT32_MAX)) {
  1192. deadline = INT32_MAX;
  1193. }
  1194. return qemu_icount_round(deadline);
  1195. } else {
  1196. return replay_get_instructions();
  1197. }
  1198. }
  1199. static void handle_icount_deadline(void)
  1200. {
  1201. assert(qemu_in_vcpu_thread());
  1202. if (use_icount) {
  1203. int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
  1204. QEMU_TIMER_ATTR_ALL);
  1205. if (deadline == 0) {
  1206. /* Wake up other AioContexts. */
  1207. qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
  1208. qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
  1209. }
  1210. }
  1211. }
  1212. static void prepare_icount_for_run(CPUState *cpu)
  1213. {
  1214. if (use_icount) {
  1215. int insns_left;
  1216. /* These should always be cleared by process_icount_data after
  1217. * each vCPU execution. However u16.high can be raised
  1218. * asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
  1219. */
  1220. g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
  1221. g_assert(cpu->icount_extra == 0);
  1222. cpu->icount_budget = tcg_get_icount_limit();
  1223. insns_left = MIN(0xffff, cpu->icount_budget);
  1224. cpu_neg(cpu)->icount_decr.u16.low = insns_left;
  1225. cpu->icount_extra = cpu->icount_budget - insns_left;
  1226. replay_mutex_lock();
  1227. }
  1228. }
  1229. static void process_icount_data(CPUState *cpu)
  1230. {
  1231. if (use_icount) {
  1232. /* Account for executed instructions */
  1233. cpu_update_icount(cpu);
  1234. /* Reset the counters */
  1235. cpu_neg(cpu)->icount_decr.u16.low = 0;
  1236. cpu->icount_extra = 0;
  1237. cpu->icount_budget = 0;
  1238. replay_account_executed_instructions();
  1239. replay_mutex_unlock();
  1240. }
  1241. }
  1242. static int tcg_cpu_exec(CPUState *cpu)
  1243. {
  1244. int ret;
  1245. #ifdef CONFIG_PROFILER
  1246. int64_t ti;
  1247. #endif
  1248. assert(tcg_enabled());
  1249. #ifdef CONFIG_PROFILER
  1250. ti = profile_getclock();
  1251. #endif
  1252. cpu_exec_start(cpu);
  1253. ret = cpu_exec(cpu);
  1254. cpu_exec_end(cpu);
  1255. #ifdef CONFIG_PROFILER
  1256. atomic_set(&tcg_ctx->prof.cpu_exec_time,
  1257. tcg_ctx->prof.cpu_exec_time + profile_getclock() - ti);
  1258. #endif
  1259. return ret;
  1260. }
  1261. /* Destroy any remaining vCPUs which have been unplugged and have
  1262. * finished running
  1263. */
  1264. static void deal_with_unplugged_cpus(void)
  1265. {
  1266. CPUState *cpu;
  1267. CPU_FOREACH(cpu) {
  1268. if (cpu->unplug && !cpu_can_run(cpu)) {
  1269. qemu_tcg_destroy_vcpu(cpu);
  1270. cpu->created = false;
  1271. qemu_cond_signal(&qemu_cpu_cond);
  1272. break;
  1273. }
  1274. }
  1275. }
  1276. /* Single-threaded TCG
  1277. *
  1278. * In the single-threaded case each vCPU is simulated in turn. If
  1279. * there is more than a single vCPU we create a simple timer to kick
  1280. * the vCPU and ensure we don't get stuck in a tight loop in one vCPU.
  1281. * This is done explicitly rather than relying on side-effects
  1282. * elsewhere.
  1283. */
  1284. static void *qemu_tcg_rr_cpu_thread_fn(void *arg)
  1285. {
  1286. CPUState *cpu = arg;
  1287. assert(tcg_enabled());
  1288. rcu_register_thread();
  1289. tcg_register_thread();
  1290. qemu_mutex_lock_iothread();
  1291. qemu_thread_get_self(cpu->thread);
  1292. cpu->thread_id = qemu_get_thread_id();
  1293. cpu->created = true;
  1294. cpu->can_do_io = 1;
  1295. qemu_cond_signal(&qemu_cpu_cond);
  1296. qemu_guest_random_seed_thread_part2(cpu->random_seed);
  1297. /* wait for initial kick-off after machine start */
  1298. while (first_cpu->stopped) {
  1299. qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
  1300. /* process any pending work */
  1301. CPU_FOREACH(cpu) {
  1302. current_cpu = cpu;
  1303. qemu_wait_io_event_common(cpu);
  1304. }
  1305. }
  1306. start_tcg_kick_timer();
  1307. cpu = first_cpu;
  1308. /* process any pending work */
  1309. cpu->exit_request = 1;
  1310. while (1) {
  1311. qemu_mutex_unlock_iothread();
  1312. replay_mutex_lock();
  1313. qemu_mutex_lock_iothread();
  1314. /* Account partial waits to QEMU_CLOCK_VIRTUAL. */
  1315. qemu_account_warp_timer();
  1316. /* Run the timers here. This is much more efficient than
  1317. * waking up the I/O thread and waiting for completion.
  1318. */
  1319. handle_icount_deadline();
  1320. replay_mutex_unlock();
  1321. if (!cpu) {
  1322. cpu = first_cpu;
  1323. }
  1324. while (cpu && !cpu->queued_work_first && !cpu->exit_request) {
  1325. atomic_mb_set(&tcg_current_rr_cpu, cpu);
  1326. current_cpu = cpu;
  1327. qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
  1328. (cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
  1329. if (cpu_can_run(cpu)) {
  1330. int r;
  1331. qemu_mutex_unlock_iothread();
  1332. prepare_icount_for_run(cpu);
  1333. r = tcg_cpu_exec(cpu);
  1334. process_icount_data(cpu);
  1335. qemu_mutex_lock_iothread();
  1336. if (r == EXCP_DEBUG) {
  1337. cpu_handle_guest_debug(cpu);
  1338. break;
  1339. } else if (r == EXCP_ATOMIC) {
  1340. qemu_mutex_unlock_iothread();
  1341. cpu_exec_step_atomic(cpu);
  1342. qemu_mutex_lock_iothread();
  1343. break;
  1344. }
  1345. } else if (cpu->stop) {
  1346. if (cpu->unplug) {
  1347. cpu = CPU_NEXT(cpu);
  1348. }
  1349. break;
  1350. }
  1351. cpu = CPU_NEXT(cpu);
  1352. } /* while (cpu && !cpu->exit_request).. */
  1353. /* Does not need atomic_mb_set because a spurious wakeup is okay. */
  1354. atomic_set(&tcg_current_rr_cpu, NULL);
  1355. if (cpu && cpu->exit_request) {
  1356. atomic_mb_set(&cpu->exit_request, 0);
  1357. }
  1358. if (use_icount && all_cpu_threads_idle()) {
  1359. /*
  1360. * When all cpus are sleeping (e.g in WFI), to avoid a deadlock
  1361. * in the main_loop, wake it up in order to start the warp timer.
  1362. */
  1363. qemu_notify_event();
  1364. }
  1365. qemu_tcg_rr_wait_io_event();
  1366. deal_with_unplugged_cpus();
  1367. }
  1368. rcu_unregister_thread();
  1369. return NULL;
  1370. }
  1371. static void *qemu_hax_cpu_thread_fn(void *arg)
  1372. {
  1373. CPUState *cpu = arg;
  1374. int r;
  1375. rcu_register_thread();
  1376. qemu_mutex_lock_iothread();
  1377. qemu_thread_get_self(cpu->thread);
  1378. cpu->thread_id = qemu_get_thread_id();
  1379. cpu->created = true;
  1380. current_cpu = cpu;
  1381. hax_init_vcpu(cpu);
  1382. qemu_cond_signal(&qemu_cpu_cond);
  1383. qemu_guest_random_seed_thread_part2(cpu->random_seed);
  1384. do {
  1385. if (cpu_can_run(cpu)) {
  1386. r = hax_smp_cpu_exec(cpu);
  1387. if (r == EXCP_DEBUG) {
  1388. cpu_handle_guest_debug(cpu);
  1389. }
  1390. }
  1391. qemu_wait_io_event(cpu);
  1392. } while (!cpu->unplug || cpu_can_run(cpu));
  1393. rcu_unregister_thread();
  1394. return NULL;
  1395. }
  1396. /* The HVF-specific vCPU thread function. This one should only run when the host
  1397. * CPU supports the VMX "unrestricted guest" feature. */
  1398. static void *qemu_hvf_cpu_thread_fn(void *arg)
  1399. {
  1400. CPUState *cpu = arg;
  1401. int r;
  1402. assert(hvf_enabled());
  1403. rcu_register_thread();
  1404. qemu_mutex_lock_iothread();
  1405. qemu_thread_get_self(cpu->thread);
  1406. cpu->thread_id = qemu_get_thread_id();
  1407. cpu->can_do_io = 1;
  1408. current_cpu = cpu;
  1409. hvf_init_vcpu(cpu);
  1410. /* signal CPU creation */
  1411. cpu->created = true;
  1412. qemu_cond_signal(&qemu_cpu_cond);
  1413. qemu_guest_random_seed_thread_part2(cpu->random_seed);
  1414. do {
  1415. if (cpu_can_run(cpu)) {
  1416. r = hvf_vcpu_exec(cpu);
  1417. if (r == EXCP_DEBUG) {
  1418. cpu_handle_guest_debug(cpu);
  1419. }
  1420. }
  1421. qemu_wait_io_event(cpu);
  1422. } while (!cpu->unplug || cpu_can_run(cpu));
  1423. hvf_vcpu_destroy(cpu);
  1424. cpu->created = false;
  1425. qemu_cond_signal(&qemu_cpu_cond);
  1426. qemu_mutex_unlock_iothread();
  1427. rcu_unregister_thread();
  1428. return NULL;
  1429. }
  1430. static void *qemu_whpx_cpu_thread_fn(void *arg)
  1431. {
  1432. CPUState *cpu = arg;
  1433. int r;
  1434. rcu_register_thread();
  1435. qemu_mutex_lock_iothread();
  1436. qemu_thread_get_self(cpu->thread);
  1437. cpu->thread_id = qemu_get_thread_id();
  1438. current_cpu = cpu;
  1439. r = whpx_init_vcpu(cpu);
  1440. if (r < 0) {
  1441. fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r));
  1442. exit(1);
  1443. }
  1444. /* signal CPU creation */
  1445. cpu->created = true;
  1446. qemu_cond_signal(&qemu_cpu_cond);
  1447. qemu_guest_random_seed_thread_part2(cpu->random_seed);
  1448. do {
  1449. if (cpu_can_run(cpu)) {
  1450. r = whpx_vcpu_exec(cpu);
  1451. if (r == EXCP_DEBUG) {
  1452. cpu_handle_guest_debug(cpu);
  1453. }
  1454. }
  1455. while (cpu_thread_is_idle(cpu)) {
  1456. qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
  1457. }
  1458. qemu_wait_io_event_common(cpu);
  1459. } while (!cpu->unplug || cpu_can_run(cpu));
  1460. whpx_destroy_vcpu(cpu);
  1461. cpu->created = false;
  1462. qemu_cond_signal(&qemu_cpu_cond);
  1463. qemu_mutex_unlock_iothread();
  1464. rcu_unregister_thread();
  1465. return NULL;
  1466. }
  1467. #ifdef _WIN32
  1468. static void CALLBACK dummy_apc_func(ULONG_PTR unused)
  1469. {
  1470. }
  1471. #endif
  1472. /* Multi-threaded TCG
  1473. *
  1474. * In the multi-threaded case each vCPU has its own thread. The TLS
  1475. * variable current_cpu can be used deep in the code to find the
  1476. * current CPUState for a given thread.
  1477. */
  1478. static void *qemu_tcg_cpu_thread_fn(void *arg)
  1479. {
  1480. CPUState *cpu = arg;
  1481. assert(tcg_enabled());
  1482. g_assert(!use_icount);
  1483. rcu_register_thread();
  1484. tcg_register_thread();
  1485. qemu_mutex_lock_iothread();
  1486. qemu_thread_get_self(cpu->thread);
  1487. cpu->thread_id = qemu_get_thread_id();
  1488. cpu->created = true;
  1489. cpu->can_do_io = 1;
  1490. current_cpu = cpu;
  1491. qemu_cond_signal(&qemu_cpu_cond);
  1492. qemu_guest_random_seed_thread_part2(cpu->random_seed);
  1493. /* process any pending work */
  1494. cpu->exit_request = 1;
  1495. do {
  1496. if (cpu_can_run(cpu)) {
  1497. int r;
  1498. qemu_mutex_unlock_iothread();
  1499. r = tcg_cpu_exec(cpu);
  1500. qemu_mutex_lock_iothread();
  1501. switch (r) {
  1502. case EXCP_DEBUG:
  1503. cpu_handle_guest_debug(cpu);
  1504. break;
  1505. case EXCP_HALTED:
  1506. /* during start-up the vCPU is reset and the thread is
  1507. * kicked several times. If we don't ensure we go back
  1508. * to sleep in the halted state we won't cleanly
  1509. * start-up when the vCPU is enabled.
  1510. *
  1511. * cpu->halted should ensure we sleep in wait_io_event
  1512. */
  1513. g_assert(cpu->halted);
  1514. break;
  1515. case EXCP_ATOMIC:
  1516. qemu_mutex_unlock_iothread();
  1517. cpu_exec_step_atomic(cpu);
  1518. qemu_mutex_lock_iothread();
  1519. default:
  1520. /* Ignore everything else? */
  1521. break;
  1522. }
  1523. }
  1524. atomic_mb_set(&cpu->exit_request, 0);
  1525. qemu_wait_io_event(cpu);
  1526. } while (!cpu->unplug || cpu_can_run(cpu));
  1527. qemu_tcg_destroy_vcpu(cpu);
  1528. cpu->created = false;
  1529. qemu_cond_signal(&qemu_cpu_cond);
  1530. qemu_mutex_unlock_iothread();
  1531. rcu_unregister_thread();
  1532. return NULL;
  1533. }
  1534. static void qemu_cpu_kick_thread(CPUState *cpu)
  1535. {
  1536. #ifndef _WIN32
  1537. int err;
  1538. if (cpu->thread_kicked) {
  1539. return;
  1540. }
  1541. cpu->thread_kicked = true;
  1542. err = pthread_kill(cpu->thread->thread, SIG_IPI);
  1543. if (err && err != ESRCH) {
  1544. fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
  1545. exit(1);
  1546. }
  1547. #else /* _WIN32 */
  1548. if (!qemu_cpu_is_self(cpu)) {
  1549. if (whpx_enabled()) {
  1550. whpx_vcpu_kick(cpu);
  1551. } else if (!QueueUserAPC(dummy_apc_func, cpu->hThread, 0)) {
  1552. fprintf(stderr, "%s: QueueUserAPC failed with error %lu\n",
  1553. __func__, GetLastError());
  1554. exit(1);
  1555. }
  1556. }
  1557. #endif
  1558. }
  1559. void qemu_cpu_kick(CPUState *cpu)
  1560. {
  1561. qemu_cond_broadcast(cpu->halt_cond);
  1562. if (tcg_enabled()) {
  1563. cpu_exit(cpu);
  1564. /* NOP unless doing single-thread RR */
  1565. qemu_cpu_kick_rr_cpu();
  1566. } else {
  1567. if (hax_enabled()) {
  1568. /*
  1569. * FIXME: race condition with the exit_request check in
  1570. * hax_vcpu_hax_exec
  1571. */
  1572. cpu->exit_request = 1;
  1573. }
  1574. qemu_cpu_kick_thread(cpu);
  1575. }
  1576. }
  1577. void qemu_cpu_kick_self(void)
  1578. {
  1579. assert(current_cpu);
  1580. qemu_cpu_kick_thread(current_cpu);
  1581. }
  1582. bool qemu_cpu_is_self(CPUState *cpu)
  1583. {
  1584. return qemu_thread_is_self(cpu->thread);
  1585. }
  1586. bool qemu_in_vcpu_thread(void)
  1587. {
  1588. return current_cpu && qemu_cpu_is_self(current_cpu);
  1589. }
  1590. static __thread bool iothread_locked = false;
  1591. bool qemu_mutex_iothread_locked(void)
  1592. {
  1593. return iothread_locked;
  1594. }
  1595. /*
  1596. * The BQL is taken from so many places that it is worth profiling the
  1597. * callers directly, instead of funneling them all through a single function.
  1598. */
  1599. void qemu_mutex_lock_iothread_impl(const char *file, int line)
  1600. {
  1601. QemuMutexLockFunc bql_lock = atomic_read(&qemu_bql_mutex_lock_func);
  1602. g_assert(!qemu_mutex_iothread_locked());
  1603. bql_lock(&qemu_global_mutex, file, line);
  1604. iothread_locked = true;
  1605. }
  1606. void qemu_mutex_unlock_iothread(void)
  1607. {
  1608. g_assert(qemu_mutex_iothread_locked());
  1609. iothread_locked = false;
  1610. qemu_mutex_unlock(&qemu_global_mutex);
  1611. }
  1612. static bool all_vcpus_paused(void)
  1613. {
  1614. CPUState *cpu;
  1615. CPU_FOREACH(cpu) {
  1616. if (!cpu->stopped) {
  1617. return false;
  1618. }
  1619. }
  1620. return true;
  1621. }
  1622. void pause_all_vcpus(void)
  1623. {
  1624. CPUState *cpu;
  1625. qemu_clock_enable(QEMU_CLOCK_VIRTUAL, false);
  1626. CPU_FOREACH(cpu) {
  1627. if (qemu_cpu_is_self(cpu)) {
  1628. qemu_cpu_stop(cpu, true);
  1629. } else {
  1630. cpu->stop = true;
  1631. qemu_cpu_kick(cpu);
  1632. }
  1633. }
  1634. /* We need to drop the replay_lock so any vCPU threads woken up
  1635. * can finish their replay tasks
  1636. */
  1637. replay_mutex_unlock();
  1638. while (!all_vcpus_paused()) {
  1639. qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
  1640. CPU_FOREACH(cpu) {
  1641. qemu_cpu_kick(cpu);
  1642. }
  1643. }
  1644. qemu_mutex_unlock_iothread();
  1645. replay_mutex_lock();
  1646. qemu_mutex_lock_iothread();
  1647. }
  1648. void cpu_resume(CPUState *cpu)
  1649. {
  1650. cpu->stop = false;
  1651. cpu->stopped = false;
  1652. qemu_cpu_kick(cpu);
  1653. }
  1654. void resume_all_vcpus(void)
  1655. {
  1656. CPUState *cpu;
  1657. qemu_clock_enable(QEMU_CLOCK_VIRTUAL, true);
  1658. CPU_FOREACH(cpu) {
  1659. cpu_resume(cpu);
  1660. }
  1661. }
  1662. void cpu_remove_sync(CPUState *cpu)
  1663. {
  1664. cpu->stop = true;
  1665. cpu->unplug = true;
  1666. qemu_cpu_kick(cpu);
  1667. qemu_mutex_unlock_iothread();
  1668. qemu_thread_join(cpu->thread);
  1669. qemu_mutex_lock_iothread();
  1670. }
  1671. /* For temporary buffers for forming a name */
  1672. #define VCPU_THREAD_NAME_SIZE 16
  1673. static void qemu_tcg_init_vcpu(CPUState *cpu)
  1674. {
  1675. char thread_name[VCPU_THREAD_NAME_SIZE];
  1676. static QemuCond *single_tcg_halt_cond;
  1677. static QemuThread *single_tcg_cpu_thread;
  1678. static int tcg_region_inited;
  1679. assert(tcg_enabled());
  1680. /*
  1681. * Initialize TCG regions--once. Now is a good time, because:
  1682. * (1) TCG's init context, prologue and target globals have been set up.
  1683. * (2) qemu_tcg_mttcg_enabled() works now (TCG init code runs before the
  1684. * -accel flag is processed, so the check doesn't work then).
  1685. */
  1686. if (!tcg_region_inited) {
  1687. tcg_region_inited = 1;
  1688. tcg_region_init();
  1689. }
  1690. if (qemu_tcg_mttcg_enabled() || !single_tcg_cpu_thread) {
  1691. cpu->thread = g_malloc0(sizeof(QemuThread));
  1692. cpu->halt_cond = g_malloc0(sizeof(QemuCond));
  1693. qemu_cond_init(cpu->halt_cond);
  1694. if (qemu_tcg_mttcg_enabled()) {
  1695. /* create a thread per vCPU with TCG (MTTCG) */
  1696. parallel_cpus = true;
  1697. snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/TCG",
  1698. cpu->cpu_index);
  1699. qemu_thread_create(cpu->thread, thread_name, qemu_tcg_cpu_thread_fn,
  1700. cpu, QEMU_THREAD_JOINABLE);
  1701. } else {
  1702. /* share a single thread for all cpus with TCG */
  1703. snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "ALL CPUs/TCG");
  1704. qemu_thread_create(cpu->thread, thread_name,
  1705. qemu_tcg_rr_cpu_thread_fn,
  1706. cpu, QEMU_THREAD_JOINABLE);
  1707. single_tcg_halt_cond = cpu->halt_cond;
  1708. single_tcg_cpu_thread = cpu->thread;
  1709. }
  1710. #ifdef _WIN32
  1711. cpu->hThread = qemu_thread_get_handle(cpu->thread);
  1712. #endif
  1713. } else {
  1714. /* For non-MTTCG cases we share the thread */
  1715. cpu->thread = single_tcg_cpu_thread;
  1716. cpu->halt_cond = single_tcg_halt_cond;
  1717. cpu->thread_id = first_cpu->thread_id;
  1718. cpu->can_do_io = 1;
  1719. cpu->created = true;
  1720. }
  1721. }
  1722. static void qemu_hax_start_vcpu(CPUState *cpu)
  1723. {
  1724. char thread_name[VCPU_THREAD_NAME_SIZE];
  1725. cpu->thread = g_malloc0(sizeof(QemuThread));
  1726. cpu->halt_cond = g_malloc0(sizeof(QemuCond));
  1727. qemu_cond_init(cpu->halt_cond);
  1728. snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HAX",
  1729. cpu->cpu_index);
  1730. qemu_thread_create(cpu->thread, thread_name, qemu_hax_cpu_thread_fn,
  1731. cpu, QEMU_THREAD_JOINABLE);
  1732. #ifdef _WIN32
  1733. cpu->hThread = qemu_thread_get_handle(cpu->thread);
  1734. #endif
  1735. }
  1736. static void qemu_kvm_start_vcpu(CPUState *cpu)
  1737. {
  1738. char thread_name[VCPU_THREAD_NAME_SIZE];
  1739. cpu->thread = g_malloc0(sizeof(QemuThread));
  1740. cpu->halt_cond = g_malloc0(sizeof(QemuCond));
  1741. qemu_cond_init(cpu->halt_cond);
  1742. snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/KVM",
  1743. cpu->cpu_index);
  1744. qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
  1745. cpu, QEMU_THREAD_JOINABLE);
  1746. }
  1747. static void qemu_hvf_start_vcpu(CPUState *cpu)
  1748. {
  1749. char thread_name[VCPU_THREAD_NAME_SIZE];
  1750. /* HVF currently does not support TCG, and only runs in
  1751. * unrestricted-guest mode. */
  1752. assert(hvf_enabled());
  1753. cpu->thread = g_malloc0(sizeof(QemuThread));
  1754. cpu->halt_cond = g_malloc0(sizeof(QemuCond));
  1755. qemu_cond_init(cpu->halt_cond);
  1756. snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/HVF",
  1757. cpu->cpu_index);
  1758. qemu_thread_create(cpu->thread, thread_name, qemu_hvf_cpu_thread_fn,
  1759. cpu, QEMU_THREAD_JOINABLE);
  1760. }
  1761. static void qemu_whpx_start_vcpu(CPUState *cpu)
  1762. {
  1763. char thread_name[VCPU_THREAD_NAME_SIZE];
  1764. cpu->thread = g_malloc0(sizeof(QemuThread));
  1765. cpu->halt_cond = g_malloc0(sizeof(QemuCond));
  1766. qemu_cond_init(cpu->halt_cond);
  1767. snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX",
  1768. cpu->cpu_index);
  1769. qemu_thread_create(cpu->thread, thread_name, qemu_whpx_cpu_thread_fn,
  1770. cpu, QEMU_THREAD_JOINABLE);
  1771. #ifdef _WIN32
  1772. cpu->hThread = qemu_thread_get_handle(cpu->thread);
  1773. #endif
  1774. }
  1775. static void qemu_dummy_start_vcpu(CPUState *cpu)
  1776. {
  1777. char thread_name[VCPU_THREAD_NAME_SIZE];
  1778. cpu->thread = g_malloc0(sizeof(QemuThread));
  1779. cpu->halt_cond = g_malloc0(sizeof(QemuCond));
  1780. qemu_cond_init(cpu->halt_cond);
  1781. snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/DUMMY",
  1782. cpu->cpu_index);
  1783. qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
  1784. QEMU_THREAD_JOINABLE);
  1785. }
  1786. void qemu_init_vcpu(CPUState *cpu)
  1787. {
  1788. MachineState *ms = MACHINE(qdev_get_machine());
  1789. cpu->nr_cores = ms->smp.cores;
  1790. cpu->nr_threads = ms->smp.threads;
  1791. cpu->stopped = true;
  1792. cpu->random_seed = qemu_guest_random_seed_thread_part1();
  1793. if (!cpu->as) {
  1794. /* If the target cpu hasn't set up any address spaces itself,
  1795. * give it the default one.
  1796. */
  1797. cpu->num_ases = 1;
  1798. cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
  1799. }
  1800. if (kvm_enabled()) {
  1801. qemu_kvm_start_vcpu(cpu);
  1802. } else if (hax_enabled()) {
  1803. qemu_hax_start_vcpu(cpu);
  1804. } else if (hvf_enabled()) {
  1805. qemu_hvf_start_vcpu(cpu);
  1806. } else if (tcg_enabled()) {
  1807. qemu_tcg_init_vcpu(cpu);
  1808. } else if (whpx_enabled()) {
  1809. qemu_whpx_start_vcpu(cpu);
  1810. } else {
  1811. qemu_dummy_start_vcpu(cpu);
  1812. }
  1813. while (!cpu->created) {
  1814. qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
  1815. }
  1816. }
  1817. void cpu_stop_current(void)
  1818. {
  1819. if (current_cpu) {
  1820. current_cpu->stop = true;
  1821. cpu_exit(current_cpu);
  1822. }
  1823. }
  1824. int vm_stop(RunState state)
  1825. {
  1826. if (qemu_in_vcpu_thread()) {
  1827. qemu_system_vmstop_request_prepare();
  1828. qemu_system_vmstop_request(state);
  1829. /*
  1830. * FIXME: should not return to device code in case
  1831. * vm_stop() has been requested.
  1832. */
  1833. cpu_stop_current();
  1834. return 0;
  1835. }
  1836. return do_vm_stop(state, true);
  1837. }
  1838. /**
  1839. * Prepare for (re)starting the VM.
  1840. * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already
  1841. * running or in case of an error condition), 0 otherwise.
  1842. */
  1843. int vm_prepare_start(void)
  1844. {
  1845. RunState requested;
  1846. qemu_vmstop_requested(&requested);
  1847. if (runstate_is_running() && requested == RUN_STATE__MAX) {
  1848. return -1;
  1849. }
  1850. /* Ensure that a STOP/RESUME pair of events is emitted if a
  1851. * vmstop request was pending. The BLOCK_IO_ERROR event, for
  1852. * example, according to documentation is always followed by
  1853. * the STOP event.
  1854. */
  1855. if (runstate_is_running()) {
  1856. qapi_event_send_stop();
  1857. qapi_event_send_resume();
  1858. return -1;
  1859. }
  1860. /* We are sending this now, but the CPUs will be resumed shortly later */
  1861. qapi_event_send_resume();
  1862. replay_enable_events();
  1863. cpu_enable_ticks();
  1864. runstate_set(RUN_STATE_RUNNING);
  1865. vm_state_notify(1, RUN_STATE_RUNNING);
  1866. return 0;
  1867. }
  1868. void vm_start(void)
  1869. {
  1870. if (!vm_prepare_start()) {
  1871. resume_all_vcpus();
  1872. }
  1873. }
  1874. /* does a state transition even if the VM is already stopped,
  1875. current state is forgotten forever */
  1876. int vm_stop_force_state(RunState state)
  1877. {
  1878. if (runstate_is_running()) {
  1879. return vm_stop(state);
  1880. } else {
  1881. runstate_set(state);
  1882. bdrv_drain_all();
  1883. /* Make sure to return an error if the flush in a previous vm_stop()
  1884. * failed. */
  1885. return bdrv_flush_all();
  1886. }
  1887. }
  1888. void list_cpus(const char *optarg)
  1889. {
  1890. /* XXX: implement xxx_cpu_list for targets that still miss it */
  1891. #if defined(cpu_list)
  1892. cpu_list();
  1893. #endif
  1894. }
  1895. void qmp_memsave(int64_t addr, int64_t size, const char *filename,
  1896. bool has_cpu, int64_t cpu_index, Error **errp)
  1897. {
  1898. FILE *f;
  1899. uint32_t l;
  1900. CPUState *cpu;
  1901. uint8_t buf[1024];
  1902. int64_t orig_addr = addr, orig_size = size;
  1903. if (!has_cpu) {
  1904. cpu_index = 0;
  1905. }
  1906. cpu = qemu_get_cpu(cpu_index);
  1907. if (cpu == NULL) {
  1908. error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
  1909. "a CPU number");
  1910. return;
  1911. }
  1912. f = fopen(filename, "wb");
  1913. if (!f) {
  1914. error_setg_file_open(errp, errno, filename);
  1915. return;
  1916. }
  1917. while (size != 0) {
  1918. l = sizeof(buf);
  1919. if (l > size)
  1920. l = size;
  1921. if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
  1922. error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
  1923. " specified", orig_addr, orig_size);
  1924. goto exit;
  1925. }
  1926. if (fwrite(buf, 1, l, f) != l) {
  1927. error_setg(errp, QERR_IO_ERROR);
  1928. goto exit;
  1929. }
  1930. addr += l;
  1931. size -= l;
  1932. }
  1933. exit:
  1934. fclose(f);
  1935. }
  1936. void qmp_pmemsave(int64_t addr, int64_t size, const char *filename,
  1937. Error **errp)
  1938. {
  1939. FILE *f;
  1940. uint32_t l;
  1941. uint8_t buf[1024];
  1942. f = fopen(filename, "wb");
  1943. if (!f) {
  1944. error_setg_file_open(errp, errno, filename);
  1945. return;
  1946. }
  1947. while (size != 0) {
  1948. l = sizeof(buf);
  1949. if (l > size)
  1950. l = size;
  1951. cpu_physical_memory_read(addr, buf, l);
  1952. if (fwrite(buf, 1, l, f) != l) {
  1953. error_setg(errp, QERR_IO_ERROR);
  1954. goto exit;
  1955. }
  1956. addr += l;
  1957. size -= l;
  1958. }
  1959. exit:
  1960. fclose(f);
  1961. }
  1962. void qmp_inject_nmi(Error **errp)
  1963. {
  1964. nmi_monitor_handle(monitor_get_cpu_index(), errp);
  1965. }
  1966. void dump_drift_info(void)
  1967. {
  1968. if (!use_icount) {
  1969. return;
  1970. }
  1971. qemu_printf("Host - Guest clock %"PRIi64" ms\n",
  1972. (cpu_get_clock() - cpu_get_icount())/SCALE_MS);
  1973. if (icount_align_option) {
  1974. qemu_printf("Max guest delay %"PRIi64" ms\n",
  1975. -max_delay / SCALE_MS);
  1976. qemu_printf("Max guest advance %"PRIi64" ms\n",
  1977. max_advance / SCALE_MS);
  1978. } else {
  1979. qemu_printf("Max guest delay NA\n");
  1980. qemu_printf("Max guest advance NA\n");
  1981. }
  1982. }