1#[cfg(target_arch = "aarch64")]
47use std::arch::asm;
48use std::collections::BTreeMap;
49use std::mem::replace;
50use std::mem::size_of;
51use std::sync::atomic::AtomicU64;
52use std::sync::atomic::Ordering;
53use std::sync::Arc;
54use std::time::Duration;
55
56use anyhow::anyhow;
57use anyhow::bail;
58use anyhow::Context;
59use anyhow::Result;
60use base::error;
61use base::info;
62use base::warn;
63use base::AsRawDescriptor;
64#[cfg(windows)]
65use base::CloseNotifier;
66use base::Error;
67use base::Event;
68use base::EventToken;
69use base::RawDescriptor;
70use base::ReadNotifier;
71use base::Tube;
72use base::WaitContext;
73use base::WorkerThread;
74use chrono::DateTime;
75use chrono::Utc;
76use data_model::Le32;
77use data_model::Le64;
78use serde::Deserialize;
79use serde::Serialize;
80use snapshot::AnySnapshot;
81use vm_control::PvClockCommand;
82use vm_control::PvClockCommandResponse;
83use vm_memory::GuestAddress;
84use vm_memory::GuestMemory;
85use vm_memory::GuestMemoryError;
86use zerocopy::FromBytes;
87use zerocopy::Immutable;
88use zerocopy::IntoBytes;
89use zerocopy::KnownLayout;
90
91use super::copy_config;
92use super::DeviceType;
93use super::Interrupt;
94use super::Queue;
95use super::VirtioDevice;
96
97const QUEUE_SIZE: u16 = 1;
99const QUEUE_SIZES: &[u16] = &[QUEUE_SIZE];
100
101const PVCLOCK_TSC_STABLE_BIT: u8 = 1;
103const PVCLOCK_GUEST_STOPPED: u8 = 2;
104
105const VIRTIO_PVCLOCK_F_TSC_STABLE: u64 = 0; const VIRTIO_PVCLOCK_F_INJECT_SLEEP: u64 = 1; const VIRTIO_PVCLOCK_F_CLOCKSOURCE_RATING: u64 = 2; const VIRTIO_PVCLOCK_S_OK: u8 = 0;
112const VIRTIO_PVCLOCK_S_IOERR: u8 = 1;
113
114const VIRTIO_PVCLOCK_CLOCKSOURCE_RATING: u32 = 450;
115
116#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
117fn read_clock_counter() -> u64 {
118 unsafe { std::arch::x86_64::_rdtsc() }
120}
121
122#[cfg(target_arch = "aarch64")]
123fn read_clock_counter() -> u64 {
124 let mut x: u64;
125 unsafe {
128 asm!("mrs {x}, cntvct_el0",
129 x = out(reg) x,
130 );
131 }
132 x
133}
134
135fn freq_scale_shift(scaled_hz: u64, base_hz: u64) -> (u32, i8) {
145 assert!(scaled_hz > 0 && base_hz > 0);
146 let mut shift = 0;
161 let mut scaled_hz = scaled_hz as u128;
163 let mut base_hz = base_hz as u128;
164 if scaled_hz >= base_hz {
165 while scaled_hz >= base_hz {
166 base_hz <<= 1;
169 shift += 1;
170 }
171 } else {
172 while base_hz > 2 * scaled_hz {
173 scaled_hz <<= 1;
177 shift -= 1;
178 }
179 }
180 assert!(base_hz < (1u128 << 65) && scaled_hz < (1u128 << 65));
183 let mult: u32 = ((scaled_hz << 32) / base_hz)
184 .try_into()
185 .expect("should not overflow");
186 (mult, shift)
187}
188
189#[derive(Debug, Clone, Copy, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
192#[allow(non_camel_case_types)]
193#[repr(C)]
194struct virtio_pvclock_config {
195 suspend_time_ns: Le64,
198 clocksource_rating: Le32,
200 padding: u32,
201}
202
203#[derive(Debug, Clone, Copy, Default, FromBytes, Immutable, IntoBytes, KnownLayout)]
204#[allow(non_camel_case_types)]
205#[repr(C)]
206struct virtio_pvclock_set_pvclock_page_req {
207 pvclock_page_pa: Le64,
209 system_time: Le64,
211 tsc_timestamp: Le64,
213 status: u8,
215 padding: [u8; 7],
216}
217
218struct PvclockSharedData {
220 mem: GuestMemory,
221 seqlock_addr: GuestAddress,
222 tsc_suspended_delta_addr: GuestAddress,
223 tsc_frequency_multiplier_addr: GuestAddress,
224 tsc_frequency_shift_addr: GuestAddress,
225 flags_addr: GuestAddress,
226}
227
228impl PvclockSharedData {
229 pub fn new(mem: GuestMemory, addr: GuestAddress) -> Self {
230 PvclockSharedData {
231 mem,
232 seqlock_addr: addr,
235 tsc_suspended_delta_addr: addr.unchecked_add(8),
236 tsc_frequency_multiplier_addr: addr.unchecked_add(24),
237 tsc_frequency_shift_addr: addr.unchecked_add(28),
238 flags_addr: addr.unchecked_add(29),
239 }
240 }
241
242 fn snapshot(&self) -> GuestAddress {
245 self.seqlock_addr
246 }
247
248 pub fn zero_fill(&mut self) -> Result<()> {
250 self.mem
252 .write_all_at_addr(&[0u8; 32], self.seqlock_addr)
253 .context("failed to zero fill the pvclock shared data")
254 }
255
256 pub fn increment_seqlock(&mut self) -> Result<()> {
257 let value = self
263 .mem
264 .read_obj_from_addr::<u32>(self.seqlock_addr)
265 .context("failed to read seqlock value")?;
266 self.mem
267 .write_obj_at_addr(value.wrapping_add(1), self.seqlock_addr)
268 .context("failed to write seqlock value")
269 }
270
271 pub fn set_tsc_suspended_delta(&mut self, delta: u64) -> Result<()> {
272 self.mem
273 .write_obj_at_addr(delta, self.tsc_suspended_delta_addr)
274 .context("failed to write tsc suspended delta")
275 }
276
277 pub fn set_tsc_frequency(&mut self, frequency: u64) -> Result<()> {
278 let (multiplier, shift): (u32, i8) = freq_scale_shift(1_000_000_000, frequency);
279
280 self.mem
281 .write_obj_at_addr(multiplier, self.tsc_frequency_multiplier_addr)
282 .context("failed to write tsc frequency mlutiplier")?;
283 self.mem
284 .write_obj_at_addr(shift, self.tsc_frequency_shift_addr)
285 .context("failed to write tsc frequency shift")
286 }
287
288 pub fn enable_pvclock_flags(&mut self, flags: u8) -> Result<()> {
289 let value = self
290 .mem
291 .read_obj_from_addr::<u8>(self.flags_addr)
292 .context("failed to read flags")?;
293 self.mem
294 .write_obj_at_addr(value | flags, self.flags_addr)
295 .context("failed to write flags")
296 }
297}
298
299#[derive(Serialize, Deserialize)]
301struct PvClockState {
302 tsc_frequency: u64,
303 paused_main_worker: Option<PvClockWorkerSnapshot>,
307 total_suspend_ns: Arc<AtomicU64>,
310 features: u64,
311 acked_features: u64,
312}
313
314enum PvClockWorkerState {
316 Idle(Tube),
319 Stub(WorkerThread<StubWorkerReturn>),
321 Main(WorkerThread<MainWorkerReturn>),
323 None,
325}
326
327pub struct PvClock {
329 state: PvClockState,
330 worker_state: PvClockWorkerState,
331}
332
333impl PvClock {
334 pub fn new(base_features: u64, tsc_frequency: u64, suspend_tube: Tube) -> Self {
335 let state = PvClockState {
336 tsc_frequency,
337 paused_main_worker: None,
338 total_suspend_ns: Arc::new(AtomicU64::new(0)),
339 features: base_features
340 | 1 << VIRTIO_PVCLOCK_F_TSC_STABLE
341 | 1 << VIRTIO_PVCLOCK_F_INJECT_SLEEP
342 | 1 << VIRTIO_PVCLOCK_F_CLOCKSOURCE_RATING,
343 acked_features: 0,
344 };
345 PvClock {
346 state,
347 worker_state: PvClockWorkerState::Idle(suspend_tube),
348 }
349 }
350
351 fn get_config(&self) -> virtio_pvclock_config {
352 virtio_pvclock_config {
353 suspend_time_ns: self.state.total_suspend_ns.load(Ordering::SeqCst).into(),
354 clocksource_rating: VIRTIO_PVCLOCK_CLOCKSOURCE_RATING.into(),
355 padding: 0,
356 }
357 }
358
359 fn start_main_worker(
361 &mut self,
362 interrupt: Interrupt,
363 pvclock_worker: PvClockWorker,
364 mut queues: BTreeMap<usize, Queue>,
365 ) -> anyhow::Result<()> {
366 let last_state = replace(&mut self.worker_state, PvClockWorkerState::None);
367 if let PvClockWorkerState::Idle(suspend_tube) = last_state {
368 if queues.len() != QUEUE_SIZES.len() {
369 self.worker_state = PvClockWorkerState::Idle(suspend_tube);
370 return Err(anyhow!(
371 "expected {} queues, got {}",
372 QUEUE_SIZES.len(),
373 queues.len()
374 ));
375 }
376 let set_pvclock_page_queue = queues.remove(&0).unwrap();
377 self.worker_state = PvClockWorkerState::Main(WorkerThread::start(
378 "virtio_pvclock".to_string(),
379 move |kill_evt| {
380 run_main_worker(
381 pvclock_worker,
382 set_pvclock_page_queue,
383 suspend_tube,
384 interrupt,
385 kill_evt,
386 )
387 },
388 ));
389 } else {
390 panic!("Invalid state transition");
391 }
392 Ok(())
393 }
394
395 fn start_stub_worker(&mut self) {
397 let last_state = replace(&mut self.worker_state, PvClockWorkerState::None);
398 self.worker_state = if let PvClockWorkerState::Idle(suspend_tube) = last_state {
399 PvClockWorkerState::Stub(WorkerThread::start(
400 "virtio_pvclock_stub".to_string(),
401 move |kill_evt| run_stub_worker(suspend_tube, kill_evt),
402 ))
403 } else {
404 panic!("Invalid state transition");
405 };
406 }
407
408 fn stop_stub_worker(&mut self) {
410 let last_state = replace(&mut self.worker_state, PvClockWorkerState::None);
411 self.worker_state = if let PvClockWorkerState::Stub(stub_worker_thread) = last_state {
412 let stub_worker_ret = stub_worker_thread.stop();
413 PvClockWorkerState::Idle(stub_worker_ret.suspend_tube)
414 } else {
415 panic!("Invalid state transition");
416 }
417 }
418
419 fn stop_main_worker(&mut self) {
421 let last_state = replace(&mut self.worker_state, PvClockWorkerState::None);
422 if let PvClockWorkerState::Main(main_worker_thread) = last_state {
423 let main_worker_ret = main_worker_thread.stop();
424 self.worker_state = PvClockWorkerState::Idle(main_worker_ret.suspend_tube);
425 let mut queues = BTreeMap::new();
426 queues.insert(0, main_worker_ret.set_pvclock_page_queue);
427 self.state.paused_main_worker = Some(main_worker_ret.worker.into());
428 } else {
429 panic!("Invalid state transition");
430 }
431 }
432
433 fn switch_to_stub_worker(&mut self) {
434 self.stop_main_worker();
435 self.start_stub_worker();
436 }
437
438 fn switch_to_main_worker(
439 &mut self,
440 interrupt: Interrupt,
441 pvclock_worker: PvClockWorker,
442 queues: BTreeMap<usize, Queue>,
443 ) -> anyhow::Result<()> {
444 self.stop_stub_worker();
445 self.start_main_worker(interrupt, pvclock_worker, queues)
446 }
447}
448
449#[derive(Serialize, Deserialize, Clone)]
451struct PvclockInstant {
452 time: DateTime<Utc>,
453 tsc_value: u64,
454}
455
456#[derive(Serialize, Deserialize, Clone)]
459struct PvClockWorkerSnapshot {
460 suspend_time: Option<PvclockInstant>,
461 total_suspend_tsc_delta: u64,
462 pvclock_shared_data_base_address: Option<GuestAddress>,
463}
464
465impl From<PvClockWorker> for PvClockWorkerSnapshot {
466 fn from(worker: PvClockWorker) -> Self {
467 PvClockWorkerSnapshot {
468 suspend_time: worker.suspend_time,
469 total_suspend_tsc_delta: worker.total_suspend_tsc_delta,
470 pvclock_shared_data_base_address: worker
471 .pvclock_shared_data
472 .map(|pvclock| pvclock.snapshot()),
473 }
474 }
475}
476
477struct PvClockWorker {
483 tsc_frequency: u64,
484 suspend_time: Option<PvclockInstant>,
486 total_injected_ns: Arc<AtomicU64>,
489 total_suspend_tsc_delta: u64,
491 pvclock_shared_data: Option<PvclockSharedData>,
493 mem: GuestMemory,
494}
495
496impl PvClockWorker {
497 pub fn new(tsc_frequency: u64, total_injected_ns: Arc<AtomicU64>, mem: GuestMemory) -> Self {
498 PvClockWorker {
499 tsc_frequency,
500 suspend_time: None,
501 total_injected_ns,
502 total_suspend_tsc_delta: 0,
503 pvclock_shared_data: None,
504 mem,
505 }
506 }
507
508 fn from_snapshot(
509 tsc_frequency: u64,
510 total_injected_ns: Arc<AtomicU64>,
511 snap: PvClockWorkerSnapshot,
512 mem: GuestMemory,
513 ) -> Self {
514 PvClockWorker {
515 tsc_frequency,
516 suspend_time: snap.suspend_time,
517 total_injected_ns,
518 total_suspend_tsc_delta: snap.total_suspend_tsc_delta,
519 pvclock_shared_data: snap
520 .pvclock_shared_data_base_address
521 .map(|addr| PvclockSharedData::new(mem.clone(), addr)),
522 mem,
523 }
524 }
525
526 fn set_pvclock_page(&mut self, addr: u64) -> Result<()> {
532 if self.pvclock_shared_data.is_some() {
533 return Err(Error::new(libc::EALREADY)).context("pvclock page already set");
534 }
535
536 let mut shared_data = PvclockSharedData::new(self.mem.clone(), GuestAddress(addr));
537
538 shared_data.zero_fill()?;
540
541 shared_data.set_tsc_frequency(self.tsc_frequency)?;
542 shared_data.enable_pvclock_flags(PVCLOCK_TSC_STABLE_BIT)?;
543
544 self.pvclock_shared_data = Some(shared_data);
545 Ok(())
546 }
547
548 pub fn suspend(&mut self) {
549 if self.suspend_time.is_some() {
550 warn!("Suspend time already set, ignoring new suspend time");
551 return;
552 }
553 self.suspend_time = Some(PvclockInstant {
554 time: Utc::now(),
555 tsc_value: read_clock_counter(),
556 });
557 }
558
559 pub fn resume(&mut self) -> Result<u64> {
560 self.increment_pvclock_seqlock()?;
562
563 std::sync::atomic::fence(Ordering::SeqCst);
567
568 let result = self
573 .set_guest_stopped_bit()
574 .and_then(|_| self.set_suspended_time());
575
576 std::sync::atomic::fence(Ordering::SeqCst);
580
581 self.increment_pvclock_seqlock()?;
583
584 result
585 }
586
587 fn get_suspended_duration(suspend_time: &PvclockInstant) -> Duration {
588 match Utc::now().signed_duration_since(suspend_time.time).to_std() {
589 Ok(duration) => duration,
590 Err(e) => {
591 error!(
592 "pvclock found suspend time in the future (was the host \
593 clock adjusted?). Guest boot/realtime clock may now be \
594 incorrect. Details: {}",
595 e
596 );
597 Duration::ZERO
598 }
599 }
600 }
601
602 fn set_suspended_time(&mut self) -> Result<u64> {
603 let (this_suspend_duration, this_suspend_tsc_delta) =
604 if let Some(suspend_time) = self.suspend_time.take() {
605 (
606 Self::get_suspended_duration(&suspend_time),
607 read_clock_counter().wrapping_sub(suspend_time.tsc_value),
611 )
612 } else {
613 return Err(Error::new(libc::ENOTSUP))
614 .context("Cannot set suspend time because suspend was never called");
615 };
616
617 self.total_suspend_tsc_delta = self
620 .total_suspend_tsc_delta
621 .wrapping_add(this_suspend_tsc_delta);
622
623 self.pvclock_shared_data
625 .as_mut()
626 .ok_or(
627 anyhow::Error::new(Error::new(libc::ENODATA)).context("pvclock page is not set"),
628 )?
629 .set_tsc_suspended_delta(self.total_suspend_tsc_delta)?;
630
631 info!(
632 "set total suspend tsc delta to {}",
633 self.total_suspend_tsc_delta
634 );
635
636 self.total_injected_ns
638 .fetch_add(this_suspend_duration.as_nanos() as u64, Ordering::SeqCst);
639
640 Ok(self.total_suspend_tsc_delta)
641 }
642
643 fn increment_pvclock_seqlock(&mut self) -> Result<()> {
644 self.pvclock_shared_data
645 .as_mut()
646 .ok_or(
647 anyhow::Error::new(Error::new(libc::ENODATA)).context("pvclock page is not set"),
648 )?
649 .increment_seqlock()
650 }
651
652 fn set_guest_stopped_bit(&mut self) -> Result<()> {
653 self.pvclock_shared_data
654 .as_mut()
655 .ok_or(
656 anyhow::Error::new(Error::new(libc::ENODATA)).context("pvclock page is not set"),
657 )?
658 .enable_pvclock_flags(PVCLOCK_GUEST_STOPPED)
659 }
660}
661
662fn pvclock_response_error_from_anyhow(error: anyhow::Error) -> base::Error {
663 for cause in error.chain() {
664 if let Some(e) = cause.downcast_ref::<base::Error>() {
665 return *e;
666 }
667
668 if let Some(e) = cause.downcast_ref::<GuestMemoryError>() {
669 return match e {
670 GuestMemoryError::MemoryAddSealsFailed(e) => *e,
672 GuestMemoryError::MemoryCreationFailed(e) => *e,
673 _ => Error::new(libc::EINVAL),
675 };
676 }
677 }
678 Error::new(libc::EFAULT)
680}
681
682struct StubWorkerReturn {
683 suspend_tube: Tube,
684}
685
686fn run_stub_worker(suspend_tube: Tube, kill_evt: Event) -> StubWorkerReturn {
688 #[derive(EventToken, Debug)]
689 enum Token {
690 SomePvClockRequest,
691 Kill,
692 }
693 let wait_ctx: WaitContext<Token> = match WaitContext::build_with(&[
694 (suspend_tube.get_read_notifier(), Token::SomePvClockRequest),
695 #[cfg(windows)]
698 (suspend_tube.get_close_notifier(), Token::Kill),
699 (&kill_evt, Token::Kill),
700 ]) {
701 Ok(wait_ctx) => wait_ctx,
702 Err(e) => {
703 error!("failed creating WaitContext: {}", e);
704 return StubWorkerReturn { suspend_tube };
705 }
706 };
707 'wait: loop {
708 let events = match wait_ctx.wait() {
709 Ok(v) => v,
710 Err(e) => {
711 error!("failed polling for events: {}", e);
712 break;
713 }
714 };
715 for event in events.iter().filter(|e| e.is_readable) {
716 match event.token {
717 Token::SomePvClockRequest => {
718 match suspend_tube.recv::<PvClockCommand>() {
719 Ok(req) => req,
720 Err(e) => {
721 error!("failed to receive request: {}", e);
722 continue;
723 }
724 };
725 if let Err(e) = suspend_tube.send(&PvClockCommandResponse::DeviceInactive) {
726 error!("error sending PvClockCommandResponse: {}", e);
727 }
728 }
729 Token::Kill => {
730 break 'wait;
731 }
732 }
733 }
734 }
735 StubWorkerReturn { suspend_tube }
736}
737
738struct MainWorkerReturn {
739 worker: PvClockWorker,
740 set_pvclock_page_queue: Queue,
741 suspend_tube: Tube,
742}
743
744fn run_main_worker(
747 mut worker: PvClockWorker,
748 mut set_pvclock_page_queue: Queue,
749 suspend_tube: Tube,
750 interrupt: Interrupt,
751 kill_evt: Event,
752) -> MainWorkerReturn {
753 #[derive(EventToken)]
754 enum Token {
755 SetPvClockPageQueue,
756 SuspendResume,
757 Kill,
758 }
759
760 let wait_ctx: WaitContext<Token> = match WaitContext::build_with(&[
761 (set_pvclock_page_queue.event(), Token::SetPvClockPageQueue),
762 (suspend_tube.get_read_notifier(), Token::SuspendResume),
763 #[cfg(windows)]
766 (suspend_tube.get_close_notifier(), Token::Kill),
767 (&kill_evt, Token::Kill),
768 ]) {
769 Ok(pc) => pc,
770 Err(e) => {
771 error!("failed creating WaitContext: {}", e);
772 return MainWorkerReturn {
773 suspend_tube,
774 set_pvclock_page_queue,
775 worker,
776 };
777 }
778 };
779
780 'wait: loop {
781 let events = match wait_ctx.wait() {
782 Ok(v) => v,
783 Err(e) => {
784 error!("failed polling for events: {}", e);
785 break;
786 }
787 };
788
789 for event in events.iter().filter(|e| e.is_readable) {
790 match event.token {
791 Token::SetPvClockPageQueue => {
792 let _ = set_pvclock_page_queue.event().wait();
793 let desc_chain = match set_pvclock_page_queue.pop() {
794 Some(desc_chain) => desc_chain,
795 None => {
796 continue;
799 }
800 };
801
802 let desc = desc_chain
807 .reader
808 .get_remaining_regions()
809 .chain(desc_chain.writer.get_remaining_regions())
810 .next()
811 .unwrap();
812
813 let len = if desc.len < size_of::<virtio_pvclock_set_pvclock_page_req>() {
814 error!("pvclock descriptor too short");
815 0
816 } else {
817 let addr = GuestAddress(desc.offset);
818 let mut req: virtio_pvclock_set_pvclock_page_req = match worker
819 .mem
820 .read_obj_from_addr(addr)
821 {
822 Ok(req) => req,
823 Err(e) => {
824 error!("failed to read request from set_pvclock_page queue: {}", e);
825 continue;
826 }
827 };
828
829 req.status = match worker.set_pvclock_page(req.pvclock_page_pa.into()) {
830 Err(e) => {
831 error!("failed to set pvclock page: {:#}", e);
832 VIRTIO_PVCLOCK_S_IOERR
833 }
834 Ok(_) => VIRTIO_PVCLOCK_S_OK,
835 };
836
837 if let Err(e) = worker.mem.write_obj_at_addr(req, addr) {
838 error!("failed to write set_pvclock_page status: {}", e);
839 continue;
840 }
841
842 desc.len as u32
843 };
844
845 set_pvclock_page_queue.add_used_with_bytes_written(desc_chain, len);
846 set_pvclock_page_queue.trigger_interrupt();
847 }
848 Token::SuspendResume => {
849 let req = match suspend_tube.recv::<PvClockCommand>() {
850 Ok(req) => req,
851 Err(e) => {
852 error!("failed to receive request: {}", e);
853 continue;
854 }
855 };
856
857 let resp = match req {
858 PvClockCommand::Suspend => {
859 worker.suspend();
860 PvClockCommandResponse::Ok
861 }
862 PvClockCommand::Resume => {
863 match worker.resume() {
864 Ok(total_suspended_ticks) => {
865 interrupt.signal_config_changed();
867 PvClockCommandResponse::Resumed {
868 total_suspended_ticks,
869 }
870 }
871 Err(e) => {
872 error!("Failed to resume pvclock: {:#}", e);
873 PvClockCommandResponse::Err(pvclock_response_error_from_anyhow(
874 e,
875 ))
876 }
877 }
878 }
879 };
880
881 if let Err(e) = suspend_tube.send(&resp) {
882 error!("error sending PvClockCommandResponse: {}", e);
883 }
884 }
885 Token::Kill => {
886 break 'wait;
887 }
888 }
889 }
890 }
891
892 MainWorkerReturn {
893 suspend_tube,
894 set_pvclock_page_queue,
895 worker,
896 }
897}
898
899impl VirtioDevice for PvClock {
900 fn keep_rds(&self) -> Vec<RawDescriptor> {
901 if let PvClockWorkerState::Idle(suspend_tube) = &self.worker_state {
902 vec![suspend_tube.as_raw_descriptor()]
903 } else {
904 Vec::new()
905 }
906 }
907
908 fn device_type(&self) -> DeviceType {
909 DeviceType::Pvclock
910 }
911
912 fn queue_max_sizes(&self) -> &[u16] {
913 QUEUE_SIZES
914 }
915
916 fn features(&self) -> u64 {
917 self.state.features
918 }
919
920 fn ack_features(&mut self, mut value: u64) {
921 if value & !self.features() != 0 {
922 warn!("virtio-pvclock got unknown feature ack {:x}", value);
923 value &= self.features();
924 }
925 self.state.acked_features |= value;
926 }
927
928 fn read_config(&self, offset: u64, data: &mut [u8]) {
929 copy_config(data, 0, self.get_config().as_bytes(), offset);
930 }
931
932 fn write_config(&mut self, offset: u64, data: &[u8]) {
933 warn!(
935 "Unexpected write to virtio-pvclock config at offset {}: {:?}",
936 offset, data
937 );
938 }
939
940 fn activate(
941 &mut self,
942 mem: GuestMemory,
943 interrupt: Interrupt,
944 queues: BTreeMap<usize, Queue>,
945 ) -> anyhow::Result<()> {
946 let tsc_frequency = self.state.tsc_frequency;
947 let total_suspend_ns = self.state.total_suspend_ns.clone();
948 let worker = PvClockWorker::new(tsc_frequency, total_suspend_ns, mem);
949 self.switch_to_main_worker(interrupt, worker, queues)
950 }
951
952 fn reset(&mut self) -> Result<()> {
953 self.switch_to_stub_worker();
954 Ok(())
955 }
956
957 fn virtio_sleep(&mut self) -> anyhow::Result<Option<BTreeMap<usize, Queue>>> {
958 let last_state = replace(&mut self.worker_state, PvClockWorkerState::None);
959 match last_state {
960 PvClockWorkerState::Main(main_worker_thread) => {
961 let main_worker_ret = main_worker_thread.stop();
962 let mut queues = BTreeMap::new();
963 queues.insert(0, main_worker_ret.set_pvclock_page_queue);
964 self.worker_state = PvClockWorkerState::Idle(main_worker_ret.suspend_tube);
965 self.state.paused_main_worker = Some(main_worker_ret.worker.into());
966 Ok(Some(queues))
967 }
968 PvClockWorkerState::Stub(stub_worker_thread) => {
969 let stub_ret = stub_worker_thread.stop();
970 self.worker_state = PvClockWorkerState::Idle(stub_ret.suspend_tube);
971 Ok(None)
972 }
973 PvClockWorkerState::Idle(suspend_tube) => {
974 self.worker_state = PvClockWorkerState::Idle(suspend_tube);
975 Ok(None)
976 }
977 PvClockWorkerState::None => panic!("invalid state transition"),
978 }
979 }
980
981 fn virtio_wake(
982 &mut self,
983 queues_state: Option<(GuestMemory, Interrupt, BTreeMap<usize, Queue>)>,
984 ) -> anyhow::Result<()> {
985 if let Some((mem, interrupt, queues)) = queues_state {
986 let worker_snap = self
987 .state
988 .paused_main_worker
989 .take()
990 .ok_or(anyhow!("a sleeping pvclock must have a paused worker"))?;
991 let worker = PvClockWorker::from_snapshot(
992 self.state.tsc_frequency,
993 self.state.total_suspend_ns.clone(),
994 worker_snap,
995 mem,
996 );
997 self.start_main_worker(interrupt, worker, queues)?;
999 } else {
1000 self.start_stub_worker();
1003 }
1004 Ok(())
1005 }
1006
1007 fn virtio_snapshot(&mut self) -> anyhow::Result<AnySnapshot> {
1008 AnySnapshot::to_any(&self.state).context("failed to serialize PvClockState")
1009 }
1010
1011 fn virtio_restore(&mut self, data: AnySnapshot) -> anyhow::Result<()> {
1012 let state: PvClockState = AnySnapshot::from_any(data).context("error deserializing")?;
1013 if state.features != self.features() {
1014 bail!(
1015 "expected virtio_features to match, but they did not. Live: {:?}, snapshot {:?}",
1016 self.features(),
1017 state.features,
1018 );
1019 }
1020 self.state = state;
1025 Ok(())
1026 }
1027
1028 fn on_device_sandboxed(&mut self) {
1029 self.start_stub_worker();
1030 }
1031}
1032
1033#[cfg(test)]
1034mod tests {
1035 use super::*;
1036 use crate::virtio::QueueConfig;
1037
1038 const TEST_QUEUE_SIZE: u16 = 2048;
1039
1040 fn make_interrupt() -> Interrupt {
1041 Interrupt::new_for_test()
1042 }
1043
1044 fn create_pvclock_device() -> (Tube, PvClock) {
1045 let (host_tube, device_tube) = Tube::pair().unwrap();
1046 let mut pvclock_device = PvClock::new(0, 1e9 as u64, device_tube);
1047
1048 pvclock_device.on_device_sandboxed();
1054
1055 (host_tube, pvclock_device)
1056 }
1057
1058 fn create_sleeping_device() -> (PvClock, GuestMemory, Tube) {
1059 let (_host_tube, mut pvclock_device) = create_pvclock_device();
1060
1061 let mut fake_queue = QueueConfig::new(TEST_QUEUE_SIZE, 0);
1064 fake_queue.set_ready(true);
1065 let mem = GuestMemory::new(&[(GuestAddress(0), 0x10000)]).unwrap();
1066 let interrupt = make_interrupt();
1067 pvclock_device
1068 .activate(
1069 mem.clone(),
1070 interrupt.clone(),
1071 BTreeMap::from([(
1072 0,
1073 fake_queue
1074 .activate(&mem, Event::new().unwrap(), interrupt)
1075 .unwrap(),
1076 )]),
1077 )
1078 .expect("activate should succeed");
1079 let queues = pvclock_device
1080 .virtio_sleep()
1081 .expect("sleep should succeed")
1082 .expect("sleep should yield queues");
1083 assert_eq!(queues.len(), 1);
1084 assert_eq!(
1085 queues.get(&0).expect("queue must be present").size(),
1086 TEST_QUEUE_SIZE
1087 );
1088 assert!(pvclock_device.state.paused_main_worker.is_some());
1089 (pvclock_device, mem, _host_tube)
1090 }
1091
1092 fn assert_wake_successful(pvclock_device: &mut PvClock, mem: &GuestMemory) {
1093 let mut wake_queues = BTreeMap::new();
1096 let mut fake_queue = QueueConfig::new(TEST_QUEUE_SIZE, 0);
1097 let interrupt = make_interrupt();
1098 fake_queue.set_ready(true);
1099 wake_queues.insert(
1100 0,
1101 fake_queue
1102 .activate(mem, Event::new().unwrap(), interrupt.clone())
1103 .unwrap(),
1104 );
1105 let queues_state = (mem.clone(), interrupt, wake_queues);
1106 pvclock_device
1107 .virtio_wake(Some(queues_state))
1108 .expect("wake should succeed");
1109 assert!(pvclock_device.state.paused_main_worker.is_none());
1110 }
1111
1112 #[test]
1113 fn test_command_response_when_inactive() {
1114 let (host_tube, _pvclock_device) = create_pvclock_device();
1115 assert!(host_tube.send(&PvClockCommand::Suspend).is_ok());
1116 let res = host_tube.recv::<PvClockCommandResponse>();
1117 assert!(matches!(res, Ok(PvClockCommandResponse::DeviceInactive)));
1118 }
1119
1120 #[test]
1121 fn test_sleep_wake_smoke() {
1122 let (mut pvclock_device, mem, _tube) = create_sleeping_device();
1123 assert_wake_successful(&mut pvclock_device, &mem);
1124 }
1125
1126 #[test]
1127 fn test_save_restore() {
1128 let (mut pvclock_device, mem, _tube) = create_sleeping_device();
1129 let test_suspend_ns = 9999;
1130
1131 pvclock_device
1134 .state
1135 .total_suspend_ns
1136 .store(test_suspend_ns, Ordering::SeqCst);
1137
1138 let snap = pvclock_device.virtio_snapshot().unwrap();
1139 pvclock_device
1140 .state
1141 .total_suspend_ns
1142 .store(0, Ordering::SeqCst);
1143 pvclock_device.virtio_restore(snap).unwrap();
1144 assert_eq!(
1145 pvclock_device.state.total_suspend_ns.load(Ordering::SeqCst),
1146 test_suspend_ns
1147 );
1148
1149 assert_wake_successful(&mut pvclock_device, &mem);
1150 }
1151
1152 fn pvclock_scale_tsc(mult: u32, shift: i8, tsc: u64) -> u64 {
1155 let shifted = if shift < 0 {
1156 tsc >> -shift
1157 } else {
1158 tsc << shift
1159 };
1160 let product = shifted as u128 * mult as u128;
1161 (product >> 32).try_into().expect("should not overflow")
1162 }
1163
1164 fn check_freq_scale(f: u64, input: u64) {
1166 let (mult, shift) = freq_scale_shift(1_000_000_000, f);
1168
1169 let scaled = pvclock_scale_tsc(mult, shift, input);
1170
1171 let expected: u64 = (input as u128 * 1_000_000_000u128 / f as u128) as u64;
1175 let expected_lo: u64 = (input as u128 * 999_999_990u128 / f as u128) as u64;
1176 let expected_hi: u64 = (input as u128 * 1_000_000_010u128 / f as u128) as u64;
1177 assert!(
1178 (expected_lo..=expected_hi).contains(&scaled),
1179 "{scaled} should be close to {expected} (base_hz={f}, mult={mult}, shift={shift})"
1180 );
1181 }
1182
1183 #[test]
1184 fn test_freq_scale_shift_accuracy() {
1185 for f in (1..=50).map(|n| n * 100_000_000) {
1188 check_freq_scale(f, f);
1189 }
1190 }
1191
1192 #[test]
1193 fn test_freq_scale_shift_overflow_high_freq() {
1194 for f in (11..=50).map(|n| n * 100_000_000) {
1198 check_freq_scale(f, u64::MAX);
1199 }
1200 }
1201
1202 #[test]
1203 fn test_freq_scale_shift_overflow_low_freq() {
1204 fn prev_power_of_two(n: u64) -> u64 {
1205 assert_ne!(n, 0);
1206 let highest_bit_set = 63 - n.leading_zeros();
1207 1 << highest_bit_set
1208 }
1209 for f in (1..=10).map(|n| n * 100_000_000) {
1213 let factor = 1_000_000_000 / f;
1215 let target = u64::MAX / (prev_power_of_two(factor) << 1);
1217 check_freq_scale(f, target);
1218 }
1219 }
1220}