VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrvGip.cpp

Last change on this file was 109140, checked in by vboxsync, 8 days ago

SUP/SUPDrvGip.cpp: warning cleanups. jiraref:VBP-1653

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 189.1 KB
Line 
1/* $Id: SUPDrvGip.cpp 109140 2025-05-02 20:41:05Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code for GIP.
4 */
5
6/*
7 * Copyright (C) 2006-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.215389.xyz.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#define LOG_GROUP LOG_GROUP_SUP_DRV
42#define SUPDRV_AGNOSTIC
43#include "SUPDrvInternal.h"
44#ifndef PAGE_SHIFT
45# include <iprt/param.h>
46#endif
47#include <iprt/asm.h>
48#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
49# include <iprt/asm-amd64-x86.h>
50#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
51# include <iprt/asm-arm.h>
52#else
53# error "Port me!"
54#endif
55#include <iprt/asm-math.h>
56#include <iprt/cpuset.h>
57#include <iprt/handletable.h>
58#include <iprt/mem.h>
59#include <iprt/mp.h>
60#include <iprt/power.h>
61#include <iprt/process.h>
62#include <iprt/semaphore.h>
63#include <iprt/spinlock.h>
64#include <iprt/thread.h>
65#include <iprt/uuid.h>
66#include <iprt/net.h>
67#include <iprt/crc.h>
68#include <iprt/string.h>
69#include <iprt/timer.h>
70#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
71# include <iprt/rand.h>
72# include <iprt/path.h>
73#endif
74#include <iprt/uint128.h>
75#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
76# include <iprt/x86.h>
77#elif defined(RT_ARCH_ARM64)
78# include <iprt/armv8.h>
79#endif
80
81#include <VBox/param.h>
82#include <VBox/log.h>
83#include <VBox/err.h>
84
85#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
86# include "dtrace/SUPDrv.h"
87#else
88/* ... */
89#endif
90
91
92/*********************************************************************************************************************************
93* Defined Constants And Macros *
94*********************************************************************************************************************************/
95/** The frequency by which we recalculate the u32UpdateHz and
96 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
97 *
98 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
99 */
100#define GIP_UPDATEHZ_RECALC_FREQ 0x800
101
102/** A reserved TSC value used for synchronization as well as measurement of
103 * TSC deltas. */
104#define GIP_TSC_DELTA_RSVD UINT64_MAX
105/** The number of TSC delta measurement loops in total (includes primer and
106 * read-time loops). */
107#define GIP_TSC_DELTA_LOOPS 96
108/** The number of cache primer loops. */
109#define GIP_TSC_DELTA_PRIMER_LOOPS 4
110/** The number of loops until we keep computing the minumum read time. */
111#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
112
113/** The TSC frequency refinement period in seconds.
114 * The timer fires after 200ms, then every second, this value just says when
115 * to stop it after that. */
116#define GIP_TSC_REFINE_PERIOD_IN_SECS 12
117/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_PRACTICALLY_ZERO rating */
118#define GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO 32
119/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_ROUGHLY_ZERO rating */
120#define GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO 448
121/** The TSC delta value for the initial GIP master - 0 in regular builds.
122 * To test the delta code this can be set to a non-zero value. */
123#if 0
124# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(170139095182512) /* 0x00009abd9854acb0 */
125#else
126# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(0)
127#endif
128
129AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
130AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
131
132/** @def VBOX_SVN_REV
133 * The makefile should define this if it can. */
134#ifndef VBOX_SVN_REV
135# define VBOX_SVN_REV 0
136#endif
137
138#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
139# define DO_NOT_START_GIP
140#endif
141
142
143/*********************************************************************************************************************************
144* Internal Functions *
145*********************************************************************************************************************************/
146static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
147static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
148static int supdrvGipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask);
149static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz);
150static void supdrvTscResetSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas);
151#ifdef SUPDRV_USE_TSC_DELTA_THREAD
152static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt);
153static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt);
154static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt, bool fForceAll);
155#else
156static int supdrvTscMeasureInitialDeltas(PSUPDRVDEVEXT pDevExt);
157static int supdrvTscMeasureDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
158#endif
159
160
161/*********************************************************************************************************************************
162* Global Variables *
163*********************************************************************************************************************************/
164DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
165SUPR0_EXPORT_SYMBOL(g_pSUPGlobalInfoPage);
166
167
168
169/*
170 *
171 * Misc Common GIP Code
172 * Misc Common GIP Code
173 * Misc Common GIP Code
174 *
175 *
176 */
177
178
179/**
180 * Finds the GIP CPU index corresponding to @a idCpu.
181 *
182 * @returns GIP CPU array index, UINT32_MAX if not found.
183 * @param pGip The GIP.
184 * @param idCpu The CPU ID.
185 */
186static uint32_t supdrvGipFindCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
187{
188 uint32_t i;
189 for (i = 0; i < pGip->cCpus; i++)
190 if (pGip->aCPUs[i].idCpu == idCpu)
191 return i;
192 return UINT32_MAX;
193}
194
195
196/**
197 * Gets the APIC ID using the best available method.
198 *
199 * @returns APIC ID.
200 * @param pGip The GIP, for SUPGIPGETCPU_XXX.
201 *
202 * @note APIC ID == CPU ID on non-x86 platforms.
203 */
204DECLINLINE(uint32_t) supdrvGipGetApicId(PSUPGLOBALINFOPAGE pGip)
205{
206#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
207 if (pGip->fGetGipCpu & SUPGIPGETCPU_APIC_ID_EXT_0B)
208 return ASMGetApicIdExt0B();
209 if (pGip->fGetGipCpu & SUPGIPGETCPU_APIC_ID_EXT_8000001E)
210 return ASMGetApicIdExt8000001E();
211 return ASMGetApicId();
212
213#elif defined(RT_ARCH_ARM64) && defined(RT_OS_WINDOWS)
214 RT_NOREF(pGip);
215 return (uint32_t)ASMGetThreadIdRoEL0();
216
217#elif defined(RT_ARCH_ARM64) && (defined(RT_OS_LINUX) || defined(RT_OS_DARWIN))
218 RT_NOREF(pGip);
219 return (uint32_t)RTMpCurSetIndex(); /* the easy way out for now */
220
221#else
222# error "port me"
223#endif
224}
225
226
227/**
228 * Gets the APIC ID using the best available method, slow version.
229 *
230 * @note APIC ID == CPU ID on non-x86 platforms.
231 */
232static uint32_t supdrvGipGetApicIdSlow(void)
233{
234#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
235 uint32_t const idApic = ASMGetApicId();
236
237 /* The Intel CPU topology leaf: */
238 uint32_t uOther = ASMCpuId_EAX(0);
239 if (uOther >= UINT32_C(0xb) && RTX86IsValidStdRange(uOther))
240 {
241 uint32_t uEax = 0;
242 uint32_t uEbx = 0;
243 uint32_t uEcx = 0;
244 uint32_t uEdx = 0;
245# if defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)
246 ASMCpuId_Idx_ECX(0xb, 0, &uEax, &uEbx, &uEcx, &uEdx);
247# else
248 ASMCpuIdExSlow(0xb, 0, 0, 0, &uEax, &uEbx, &uEcx, &uEdx);
249# endif
250 if ((uEcx >> 8) != 0) /* level type != invalid */
251 {
252 if ((uEdx & 0xff) == idApic)
253 return uEdx;
254 AssertMsgFailed(("ASMGetApicIdExt0B=>%#x idApic=%#x\n", uEdx, idApic));
255 }
256 }
257
258 /* The AMD leaf: */
259 uOther = ASMCpuId_EAX(UINT32_C(0x80000000));
260 if (uOther >= UINT32_C(0x8000001e) && RTX86IsValidExtRange(uOther))
261 {
262 uOther = ASMGetApicIdExt8000001E();
263 if ((uOther & 0xff) == idApic)
264 return uOther;
265 AssertMsgFailed(("ASMGetApicIdExt8000001E=>%#x idApic=%#x\n", uOther, idApic));
266 }
267 return idApic;
268
269#elif defined(RT_ARCH_ARM64) && defined(RT_OS_WINDOWS)
270 return (uint32_t)ASMGetThreadIdRoEL0();
271
272#elif defined(RT_ARCH_ARM64) && (defined(RT_OS_LINUX) || defined(RT_OS_DARWIN))
273 return (uint32_t)RTMpCurSetIndex(); /* the easy way out for now */
274
275#else
276# error "port me"
277#endif
278}
279
280
281
282/*
283 *
284 * GIP Mapping and Unmapping Related Code.
285 * GIP Mapping and Unmapping Related Code.
286 * GIP Mapping and Unmapping Related Code.
287 *
288 *
289 */
290
291
292/**
293 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
294 * updating.
295 *
296 * @param pGipCpu The per CPU structure for this CPU.
297 * @param u64NanoTS The current time.
298 */
299static void supdrvGipReInitCpu(PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
300{
301 /*
302 * Here we don't really care about applying the TSC delta. The re-initialization of this
303 * value is not relevant especially while (re)starting the GIP as the first few ones will
304 * be ignored anyway, see supdrvGipDoUpdateCpu().
305 */
306 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
307 pGipCpu->u64NanoTS = u64NanoTS;
308}
309
310
311/**
312 * Set the current TSC and NanoTS value for the CPU.
313 *
314 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
315 * @param pvUser1 Pointer to the ring-0 GIP mapping.
316 * @param pvUser2 Pointer to the variable holding the current time.
317 */
318static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
319{
320 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
321 uint32_t const idApic = supdrvGipGetApicId(pGip);
322 if (idApic < RT_ELEMENTS(pGip->aiCpuFromApicId))
323 {
324 unsigned const iCpu = pGip->aiCpuFromApicId[idApic];
325
326 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
327 supdrvGipReInitCpu(&pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
328 else
329 LogRelMax(64, ("supdrvGipReInitCpuCallback: iCpu=%#x out of bounds (%#zx, idApic=%#x)\n",
330 iCpu, RT_ELEMENTS(pGip->aiCpuFromApicId), idApic));
331 }
332 else
333 LogRelMax(64, ("supdrvGipReInitCpuCallback: idApic=%#x out of bounds (%#zx)\n",
334 idApic, RT_ELEMENTS(pGip->aiCpuFromApicId)));
335
336 NOREF(pvUser2);
337}
338
339
340/**
341 * State structure for supdrvGipDetectGetGipCpuCallback.
342 */
343typedef struct SUPDRVGIPDETECTGETCPU
344{
345 /** Bitmap of APIC IDs that has been seen (initialized to zero).
346 * Used to detect duplicate APIC IDs (paranoia). */
347 uint8_t volatile bmApicId[4096 / 8];
348 /** Mask of supported GIP CPU getter methods (SUPGIPGETCPU_XXX) (all bits set
349 * initially). The callback clears the methods not detected. */
350 uint32_t volatile fSupported;
351 /** The first callback detecting any kind of range issues (initialized to
352 * NIL_RTCPUID). */
353 RTCPUID volatile idCpuProblem;
354} SUPDRVGIPDETECTGETCPU;
355/** Pointer to state structure for supdrvGipDetectGetGipCpuCallback. */
356typedef SUPDRVGIPDETECTGETCPU *PSUPDRVGIPDETECTGETCPU;
357
358
359/**
360 * Checks for alternative ways of getting the CPU ID.
361 *
362 * This also checks the APIC ID, CPU ID and CPU set index values against the
363 * GIP tables.
364 *
365 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
366 * @param pvUser1 Pointer to the state structure.
367 * @param pvUser2 Pointer to the GIP.
368 */
369static DECLCALLBACK(void) supdrvGipDetectGetGipCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
370{
371 PSUPDRVGIPDETECTGETCPU pState = (PSUPDRVGIPDETECTGETCPU)pvUser1;
372 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser2;
373 int const iCpuSet = RTMpCpuIdToSetIndex(idCpu);
374 uint32_t fSupported = 0;
375 uint32_t idApic;
376#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
377 uint32_t uEax, uEbx, uEcx, uEdx;
378#else
379 uint32_t const uEax = 0; /* Dummy for LogRel. */
380#endif
381 NOREF(pGip);
382
383 AssertMsg(idCpu == RTMpCpuId(), ("idCpu=%#x RTMpCpuId()=%#x\n", idCpu, RTMpCpuId())); /* paranoia^3 */
384
385#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
386 /*
387 * Check that the CPU ID and CPU set index are interchangable.
388 */
389 if ((RTCPUID)iCpuSet == idCpu)
390 {
391 AssertCompile(RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS));
392 if ( iCpuSet >= 0
393 && iCpuSet < RTCPUSET_MAX_CPUS
394 && RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS))
395 {
396 PSUPGIPCPU pGipCpu = SUPGetGipCpuBySetIndex(pGip, iCpuSet);
397
398 /*
399 * Check whether the IDTR.LIMIT contains a CPU number.
400 */
401# ifdef RT_ARCH_X86
402 uint16_t const cbIdt = sizeof(X86DESC64SYSTEM) * 256;
403# else
404 uint16_t const cbIdt = sizeof(X86DESCGATE) * 256;
405# endif
406 RTIDTR Idtr;
407 ASMGetIDTR(&Idtr);
408 if (Idtr.cbIdt >= cbIdt)
409 {
410 uint32_t uTmp = Idtr.cbIdt - cbIdt;
411 uTmp &= RTCPUSET_MAX_CPUS - 1;
412 if (uTmp == idCpu)
413 {
414 RTIDTR Idtr2;
415 ASMGetIDTR(&Idtr2);
416 if (Idtr2.cbIdt == Idtr.cbIdt)
417 fSupported |= SUPGIPGETCPU_IDTR_LIMIT_MASK_MAX_SET_CPUS;
418 }
419 }
420
421 /*
422 * Check whether RDTSCP is an option.
423 */
424 if (ASMHasCpuId())
425 {
426 if ( RTX86IsValidExtRange(ASMCpuId_EAX(UINT32_C(0x80000000)))
427 && (ASMCpuId_EDX(UINT32_C(0x80000001)) & X86_CPUID_EXT_FEATURE_EDX_RDTSCP) )
428 {
429 uint32_t uAux;
430 ASMReadTscWithAux(&uAux);
431 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
432 {
433 ASMNopPause();
434 ASMReadTscWithAux(&uAux);
435 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
436 fSupported |= SUPGIPGETCPU_RDTSCP_MASK_MAX_SET_CPUS;
437 }
438
439 if (pGipCpu)
440 {
441 uint32_t const uGroupedAux = (uint8_t)pGipCpu->iCpuGroupMember | ((uint32_t)pGipCpu->iCpuGroup << 8);
442 if ( (uAux & UINT16_MAX) == uGroupedAux
443 && pGipCpu->iCpuGroupMember <= UINT8_MAX)
444 {
445 ASMNopPause();
446 ASMReadTscWithAux(&uAux);
447 if ((uAux & UINT16_MAX) == uGroupedAux)
448 fSupported |= SUPGIPGETCPU_RDTSCP_GROUP_IN_CH_NUMBER_IN_CL;
449 }
450 }
451 }
452 }
453 }
454 }
455
456 /*
457 * Check for extended APIC ID methods.
458 */
459 idApic = UINT32_MAX;
460 uEax = ASMCpuId_EAX(0);
461 if (uEax >= UINT32_C(0xb) && RTX86IsValidStdRange(uEax))
462 {
463# if defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)
464 ASMCpuId_Idx_ECX(0xb, 0, &uEax, &uEbx, &uEcx, &uEdx);
465# else
466 ASMCpuIdExSlow(0xb, 0, 0, 0, &uEax, &uEbx, &uEcx, &uEdx);
467# endif
468 if ((uEcx >> 8) != 0) /* level type != invalid */
469 {
470 if (RT_LIKELY( uEdx < RT_ELEMENTS(pGip->aiCpuFromApicId)
471 && !ASMBitTest(pState->bmApicId, uEdx)))
472 {
473 if (uEdx == ASMGetApicIdExt0B())
474 {
475 idApic = uEdx;
476 fSupported |= SUPGIPGETCPU_APIC_ID_EXT_0B;
477 }
478 else
479 AssertMsgFailed(("%#x vs %#x\n", uEdx, ASMGetApicIdExt0B()));
480 }
481 }
482 }
483
484 uEax = ASMCpuId_EAX(UINT32_C(0x80000000));
485 if (uEax >= UINT32_C(0x8000001e) && RTX86IsValidExtRange(uEax))
486 {
487# if defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)
488 ASMCpuId_Idx_ECX(UINT32_C(0x8000001e), 0, &uEax, &uEbx, &uEcx, &uEdx);
489# else
490 ASMCpuIdExSlow(UINT32_C(0x8000001e), 0, 0, 0, &uEax, &uEbx, &uEcx, &uEdx);
491# endif
492 if (uEax || uEbx || uEcx || uEdx)
493 {
494 if (RT_LIKELY( uEax < RT_ELEMENTS(pGip->aiCpuFromApicId)
495 && ( idApic == UINT32_MAX
496 || idApic == uEax)
497 && !ASMBitTest(pState->bmApicId, uEax)))
498 {
499 if (uEax == ASMGetApicIdExt8000001E())
500 {
501 idApic = uEax;
502 fSupported |= SUPGIPGETCPU_APIC_ID_EXT_8000001E;
503 }
504 else
505 AssertMsgFailed(("%#x vs %#x\n", uEax, ASMGetApicIdExt8000001E()));
506 }
507 }
508 }
509
510#else /* !defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86) */
511 fSupported |= SUPGIPGETCPU_TPIDRRO_EL0;
512 idApic = supdrvGipGetApicIdSlow();
513#endif /* !defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86) */
514
515 /*
516 * Check that the APIC ID is unique.
517 */
518#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
519 uEax = ASMGetApicId();
520 if (RT_LIKELY( uEax < RT_ELEMENTS(pGip->aiCpuFromApicId)
521 && ( idApic == UINT32_MAX
522 || idApic == uEax)
523 && !ASMAtomicBitTestAndSet(pState->bmApicId, uEax)))
524 {
525 idApic = uEax;
526 fSupported |= SUPGIPGETCPU_APIC_ID;
527 }
528 else
529#endif /* defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) */
530 if ( idApic == UINT32_MAX
531 || idApic >= RT_ELEMENTS(pGip->aiCpuFromApicId) /* parnaoia */
532 || ASMAtomicBitTestAndSet(pState->bmApicId, idApic))
533 {
534 AssertCompile(sizeof(pState->bmApicId) * 8 == RT_ELEMENTS(pGip->aiCpuFromApicId));
535 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
536 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x/%#x - duplicate APIC ID.\n",
537 idCpu, iCpuSet, uEax, idApic));
538 }
539
540 /*
541 * Check that the iCpuSet is within the expected range.
542 */
543 if (RT_UNLIKELY( iCpuSet < 0
544 || (unsigned)iCpuSet >= RTCPUSET_MAX_CPUS
545 || (unsigned)iCpuSet >= RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)))
546 {
547 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
548 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU set index is out of range.\n",
549 idCpu, iCpuSet, idApic));
550 }
551 else
552 {
553 RTCPUID idCpu2 = RTMpCpuIdFromSetIndex(iCpuSet);
554 if (RT_UNLIKELY(idCpu2 != idCpu))
555 {
556 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
557 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU id/index roundtrip problem: %#x\n",
558 idCpu, iCpuSet, idApic, idCpu2));
559 }
560 }
561
562 /*
563 * Update the supported feature mask before we return.
564 */
565 ASMAtomicAndU32(&pState->fSupported, fSupported);
566
567 NOREF(pvUser2);
568}
569
570
571/**
572 * Increase the timer freqency on hosts where this is possible (NT).
573 *
574 * The idea is that more interrupts is better for us... Also, it's better than
575 * we increase the timer frequence, because we might end up getting inaccurate
576 * callbacks if someone else does it.
577 *
578 * @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
579 */
580static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
581{
582 if (pDevExt->u32SystemTimerGranularityGrant == 0)
583 {
584 uint32_t u32SystemResolution;
585 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
586 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
587 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
588 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
589 )
590 {
591#if 0 /* def VBOX_STRICT - this is somehow triggers bogus assertions on windows 10 */
592 uint32_t u32After = RTTimerGetSystemGranularity();
593 AssertMsg(u32After <= u32SystemResolution, ("u32After=%u u32SystemResolution=%u\n", u32After, u32SystemResolution));
594#endif
595 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
596 }
597 }
598}
599
600
601/**
602 * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
603 *
604 * @param pDevExt Clears u32SystemTimerGranularityGrant.
605 */
606static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
607{
608 if (pDevExt->u32SystemTimerGranularityGrant)
609 {
610 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
611 AssertRC(rc2);
612 pDevExt->u32SystemTimerGranularityGrant = 0;
613 }
614}
615
616
617/**
618 * Maps the GIP into userspace and/or get the physical address of the GIP.
619 *
620 * @returns IPRT status code.
621 * @param pSession Session to which the GIP mapping should belong.
622 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
623 * @param pHCPhysGip Where to store the physical address. (optional)
624 *
625 * @remark There is no reference counting on the mapping, so one call to this function
626 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
627 * and remove the session as a GIP user.
628 */
629SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
630{
631 int rc;
632 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
633 RTR3PTR pGipR3 = NIL_RTR3PTR;
634 RTHCPHYS HCPhys = NIL_RTHCPHYS;
635 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
636
637 /*
638 * Validate
639 */
640 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
641 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
642 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
643
644#ifdef SUPDRV_USE_MUTEX_FOR_GIP
645 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
646#else
647 RTSemFastMutexRequest(pDevExt->mtxGip);
648#endif
649 if (pDevExt->pGip)
650 {
651 /*
652 * Map it?
653 */
654 rc = VINF_SUCCESS;
655 if (ppGipR3)
656 {
657 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
658 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
659 RTMEM_PROT_READ, NIL_RTR0PROCESS);
660 if (RT_SUCCESS(rc))
661 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
662 }
663
664 /*
665 * Get physical address.
666 */
667 if (pHCPhysGip && RT_SUCCESS(rc))
668 HCPhys = pDevExt->HCPhysGip;
669
670 /*
671 * Reference globally.
672 */
673 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
674 {
675 pSession->fGipReferenced = 1;
676 pDevExt->cGipUsers++;
677 if (pDevExt->cGipUsers == 1)
678 {
679 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
680 uint64_t u64NanoTS;
681
682 /*
683 * GIP starts/resumes updating again. On windows we bump the
684 * host timer frequency to make sure we don't get stuck in guest
685 * mode and to get better timer (and possibly clock) accuracy.
686 */
687 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
688
689 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
690
691 /*
692 * document me
693 */
694 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
695 {
696 unsigned i;
697 for (i = 0; i < pGipR0->cCpus; i++)
698 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
699 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
700 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
701 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
702 }
703
704 /*
705 * document me
706 */
707 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
708 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
709 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
710 || RTMpGetOnlineCount() == 1)
711 supdrvGipReInitCpu(&pGipR0->aCPUs[0], u64NanoTS);
712 else
713 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
714
715 /*
716 * Detect alternative ways to figure the CPU ID in ring-3 and
717 * raw-mode context. Check the sanity of the APIC IDs, CPU IDs,
718 * and CPU set indexes while we're at it.
719 */
720 if (RT_SUCCESS(rc))
721 {
722 PSUPDRVGIPDETECTGETCPU pDetectState = (PSUPDRVGIPDETECTGETCPU)RTMemTmpAllocZ(sizeof(*pDetectState));
723 if (pDetectState)
724 {
725 pDetectState->fSupported = UINT32_MAX;
726 pDetectState->idCpuProblem = NIL_RTCPUID;
727 rc = RTMpOnAll(supdrvGipDetectGetGipCpuCallback, pDetectState, pGipR0);
728 if (pDetectState->idCpuProblem == NIL_RTCPUID)
729 {
730 if ( pDetectState->fSupported != UINT32_MAX
731 && pDetectState->fSupported != 0)
732 {
733 if (pGipR0->fGetGipCpu != pDetectState->fSupported)
734 {
735 pGipR0->fGetGipCpu = pDetectState->fSupported;
736 LogRel(("SUPR0GipMap: fGetGipCpu=%#x\n", pDetectState->fSupported));
737 }
738 }
739 else
740 {
741 LogRel(("SUPR0GipMap: No supported ways of getting the APIC ID or CPU number in ring-3! (%#x)\n",
742 pDetectState->fSupported));
743 rc = VERR_UNSUPPORTED_CPU;
744 }
745 }
746 else
747 {
748 LogRel(("SUPR0GipMap: APIC ID, CPU ID or CPU set index problem detected on CPU #%u (%#x)!\n",
749 pDetectState->idCpuProblem, pDetectState->idCpuProblem));
750 rc = VERR_INVALID_CPU_ID;
751 }
752 RTMemTmpFree(pDetectState);
753 }
754 else
755 rc = VERR_NO_TMP_MEMORY;
756 }
757
758 /*
759 * Start the GIP timer if all is well..
760 */
761 if (RT_SUCCESS(rc))
762 {
763#ifndef DO_NOT_START_GIP
764 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
765#endif
766 rc = VINF_SUCCESS;
767 }
768
769 /*
770 * Bail out on error.
771 */
772 if (RT_FAILURE(rc))
773 {
774 LogRel(("SUPR0GipMap: failed rc=%Rrc\n", rc));
775 pDevExt->cGipUsers = 0;
776 pSession->fGipReferenced = 0;
777 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
778 {
779 int rc2 = RTR0MemObjFree(pSession->GipMapObjR3, false); AssertRC(rc2);
780 if (RT_SUCCESS(rc2))
781 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
782 }
783 HCPhys = NIL_RTHCPHYS;
784 pGipR3 = NIL_RTR3PTR;
785 }
786 }
787 }
788 }
789 else
790 {
791 rc = VERR_GENERAL_FAILURE;
792 Log(("SUPR0GipMap: GIP is not available!\n"));
793 }
794#ifdef SUPDRV_USE_MUTEX_FOR_GIP
795 RTSemMutexRelease(pDevExt->mtxGip);
796#else
797 RTSemFastMutexRelease(pDevExt->mtxGip);
798#endif
799
800 /*
801 * Write returns.
802 */
803 if (pHCPhysGip)
804 *pHCPhysGip = HCPhys;
805 if (ppGipR3)
806 *ppGipR3 = pGipR3;
807
808#ifdef DEBUG_DARWIN_GIP
809 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
810#else
811 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
812#endif
813 return rc;
814}
815SUPR0_EXPORT_SYMBOL(SUPR0GipMap);
816
817
818/**
819 * Unmaps any user mapping of the GIP and terminates all GIP access
820 * from this session.
821 *
822 * @returns IPRT status code.
823 * @param pSession Session to which the GIP mapping should belong.
824 */
825SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
826{
827 int rc = VINF_SUCCESS;
828 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
829#ifdef DEBUG_DARWIN_GIP
830 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
831 pSession,
832 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
833 pSession->GipMapObjR3));
834#else
835 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
836#endif
837 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
838
839#ifdef SUPDRV_USE_MUTEX_FOR_GIP
840 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
841#else
842 RTSemFastMutexRequest(pDevExt->mtxGip);
843#endif
844
845 /*
846 * GIP test-mode session?
847 */
848 if ( pSession->fGipTestMode
849 && pDevExt->pGip)
850 {
851 supdrvGipSetFlags(pDevExt, pSession, 0, ~SUPGIP_FLAGS_TESTING_ENABLE);
852 Assert(!pSession->fGipTestMode);
853 }
854
855 /*
856 * Unmap anything?
857 */
858 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
859 {
860 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
861 AssertRC(rc);
862 if (RT_SUCCESS(rc))
863 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
864 }
865
866 /*
867 * Dereference global GIP.
868 */
869 if (pSession->fGipReferenced && !rc)
870 {
871 pSession->fGipReferenced = 0;
872 if ( pDevExt->cGipUsers > 0
873 && !--pDevExt->cGipUsers)
874 {
875 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
876#ifndef DO_NOT_START_GIP
877 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
878#endif
879 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
880 }
881 }
882
883#ifdef SUPDRV_USE_MUTEX_FOR_GIP
884 RTSemMutexRelease(pDevExt->mtxGip);
885#else
886 RTSemFastMutexRelease(pDevExt->mtxGip);
887#endif
888
889 return rc;
890}
891SUPR0_EXPORT_SYMBOL(SUPR0GipUnmap);
892
893
894/**
895 * Gets the GIP pointer.
896 *
897 * @returns Pointer to the GIP or NULL.
898 */
899SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
900{
901 return g_pSUPGlobalInfoPage;
902}
903
904
905
906
907
908/*
909 *
910 *
911 * GIP Initialization, Termination and CPU Offline / Online Related Code.
912 * GIP Initialization, Termination and CPU Offline / Online Related Code.
913 * GIP Initialization, Termination and CPU Offline / Online Related Code.
914 *
915 *
916 */
917
918/**
919 * Used by supdrvGipInitRefineInvariantTscFreqTimer and supdrvGipInitMeasureTscFreq
920 * to update the TSC frequency related GIP variables.
921 *
922 * @param pGip The GIP.
923 * @param nsElapsed The number of nanoseconds elapsed.
924 * @param cElapsedTscTicks The corresponding number of TSC ticks.
925 * @param iTick The tick number for debugging.
926 */
927static void supdrvGipInitSetCpuFreq(PSUPGLOBALINFOPAGE pGip, uint64_t nsElapsed, uint64_t cElapsedTscTicks, uint32_t iTick)
928{
929 /*
930 * Calculate the frequency.
931 */
932 uint64_t uCpuHz;
933 if ( cElapsedTscTicks < UINT64_MAX / RT_NS_1SEC
934 && nsElapsed < UINT32_MAX)
935 uCpuHz = ASMMultU64ByU32DivByU32(cElapsedTscTicks, RT_NS_1SEC, (uint32_t)nsElapsed);
936 else
937 {
938 RTUINT128U CpuHz, Tmp, Divisor;
939 CpuHz.s.Lo = CpuHz.s.Hi = 0;
940 RTUInt128MulU64ByU64(&Tmp, cElapsedTscTicks, RT_NS_1SEC_64);
941 RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, nsElapsed));
942 uCpuHz = CpuHz.s.Lo;
943 }
944
945 /*
946 * Update the GIP.
947 */
948 ASMAtomicWriteU64(&pGip->u64CpuHz, uCpuHz);
949 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
950 {
951 ASMAtomicWriteU64(&pGip->aCPUs[0].u64CpuHz, uCpuHz);
952
953 /* For inspecting the frequency calcs using tstGIP-2, debugger or similar. */
954 if (iTick + 1 < pGip->cCpus)
955 ASMAtomicWriteU64(&pGip->aCPUs[iTick + 1].u64CpuHz, uCpuHz);
956 }
957}
958
959
960/**
961 * Timer callback function for TSC frequency refinement in invariant GIP mode.
962 *
963 * This is started during driver init and fires once
964 * GIP_TSC_REFINE_PERIOD_IN_SECS seconds later.
965 *
966 * @param pTimer The timer.
967 * @param pvUser Opaque pointer to the device instance data.
968 * @param iTick The timer tick.
969 */
970static DECLCALLBACK(void) supdrvGipInitRefineInvariantTscFreqTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
971{
972 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
973 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
974 RTCPUID idCpu;
975 uint64_t cNsElapsed;
976 uint64_t cTscTicksElapsed;
977 uint64_t nsNow;
978 uint64_t uTsc;
979 RTCCUINTREG fEFlags;
980
981 /* Paranoia. */
982 AssertReturnVoid(pGip);
983 AssertReturnVoid(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
984
985 /*
986 * If we got a power event, stop the refinement process.
987 */
988 if (pDevExt->fInvTscRefinePowerEvent)
989 {
990 int rc = RTTimerStop(pTimer); AssertRC(rc);
991 return;
992 }
993
994 /*
995 * Read the TSC and time, noting which CPU we are on.
996 *
997 * Don't bother spinning until RTTimeSystemNanoTS changes, since on
998 * systems where it matters we're in a context where we cannot waste that
999 * much time (DPC watchdog, called from clock interrupt).
1000 */
1001 fEFlags = ASMIntDisableFlags();
1002 uTsc = ASMReadTSC();
1003 nsNow = RTTimeSystemNanoTS();
1004 idCpu = RTMpCpuId();
1005 ASMSetFlags(fEFlags);
1006
1007 cNsElapsed = nsNow - pDevExt->nsStartInvarTscRefine;
1008 cTscTicksElapsed = uTsc - pDevExt->uTscStartInvarTscRefine;
1009
1010 /*
1011 * If the above measurement was taken on a different CPU than the one we
1012 * started the process on, cTscTicksElapsed will need to be adjusted with
1013 * the TSC deltas of both the CPUs.
1014 *
1015 * We ASSUME that the delta calculation process takes less time than the
1016 * TSC frequency refinement timer. If it doesn't, we'll complain and
1017 * drop the frequency refinement.
1018 *
1019 * Note! We cannot entirely trust enmUseTscDelta here because it's
1020 * downgraded after each delta calculation.
1021 */
1022 if ( idCpu != pDevExt->idCpuInvarTscRefine
1023 && pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1024 {
1025 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine);
1026 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpu);
1027 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1028 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
1029 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1030 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
1031 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
1032 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
1033 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
1034 {
1035 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
1036 {
1037 /* cTscTicksElapsed = (uTsc - iStopTscDelta) - (pDevExt->uTscStartInvarTscRefine - iStartTscDelta); */
1038 cTscTicksElapsed += iStartTscDelta - iStopTscDelta;
1039 }
1040 }
1041 /*
1042 * Allow 5 times the refinement period to elapse before we give up on the TSC delta
1043 * calculations.
1044 */
1045 else if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * 5 * RT_NS_1SEC_64)
1046 {
1047 SUPR0Printf("vboxdrv: Failed to refine invariant TSC frequency because deltas are unavailable after %u (%u) seconds\n",
1048 (uint32_t)(cNsElapsed / RT_NS_1SEC), GIP_TSC_REFINE_PERIOD_IN_SECS);
1049 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
1050 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
1051 int rc = RTTimerStop(pTimer); AssertRC(rc);
1052 return;
1053 }
1054 }
1055
1056 /*
1057 * Calculate and update the CPU frequency variables in GIP.
1058 *
1059 * If there is a GIP user already and we've already refined the frequency
1060 * a couple of times, don't update it as we want a stable frequency value
1061 * for all VMs.
1062 */
1063 if ( pDevExt->cGipUsers == 0
1064 || cNsElapsed < RT_NS_1SEC * 2)
1065 {
1066 supdrvGipInitSetCpuFreq(pGip, cNsElapsed, cTscTicksElapsed, (uint32_t)iTick);
1067
1068 /*
1069 * Stop the timer once we've reached the defined refinement period.
1070 */
1071 if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * RT_NS_1SEC_64)
1072 {
1073 int rc = RTTimerStop(pTimer);
1074 AssertRC(rc);
1075 }
1076 }
1077 else
1078 {
1079 int rc = RTTimerStop(pTimer);
1080 AssertRC(rc);
1081 }
1082}
1083
1084
1085/**
1086 * @callback_method_impl{FNRTPOWERNOTIFICATION}
1087 */
1088static DECLCALLBACK(void) supdrvGipPowerNotificationCallback(RTPOWEREVENT enmEvent, void *pvUser)
1089{
1090 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1091 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1092
1093 /*
1094 * If the TSC frequency refinement timer is running, we need to cancel it so it
1095 * doesn't screw up the frequency after a long suspend.
1096 *
1097 * Recalculate all TSC-deltas on host resume as it may have changed, seen
1098 * on Windows 7 running on the Dell Optiplex Intel Core i5-3570.
1099 */
1100 if (enmEvent == RTPOWEREVENT_RESUME)
1101 {
1102 ASMAtomicWriteBool(&pDevExt->fInvTscRefinePowerEvent, true);
1103 if ( RT_LIKELY(pGip)
1104 && pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
1105 && !supdrvOSAreCpusOfflinedOnSuspend())
1106 {
1107#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1108 supdrvTscDeltaThreadStartMeasurement(pDevExt, true /* fForceAll */);
1109#else
1110 RTCpuSetCopy(&pDevExt->TscDeltaCpuSet, &pGip->OnlineCpuSet);
1111 supdrvTscMeasureInitialDeltas(pDevExt);
1112#endif
1113 }
1114 }
1115 else if (enmEvent == RTPOWEREVENT_SUSPEND)
1116 ASMAtomicWriteBool(&pDevExt->fInvTscRefinePowerEvent, true);
1117}
1118
1119
1120/**
1121 * Start the TSC-frequency refinment timer for the invariant TSC GIP mode.
1122 *
1123 * We cannot use this in the synchronous and asynchronous tsc GIP modes because
1124 * the CPU may change the TSC frequence between now and when the timer fires
1125 * (supdrvInitAsyncRefineTscTimer).
1126 *
1127 * @param pDevExt Pointer to the device instance data.
1128 */
1129static void supdrvGipInitStartTimerForRefiningInvariantTscFreq(PSUPDRVDEVEXT pDevExt)
1130{
1131 uint64_t u64NanoTS;
1132 RTCCUINTREG fEFlags;
1133 int rc;
1134
1135 /*
1136 * Register a power management callback.
1137 */
1138 pDevExt->fInvTscRefinePowerEvent = false;
1139 rc = RTPowerNotificationRegister(supdrvGipPowerNotificationCallback, pDevExt);
1140 AssertRC(rc); /* ignore */
1141
1142 /*
1143 * Record the TSC and NanoTS as the starting anchor point for refinement
1144 * of the TSC. We try get as close to a clock tick as possible on systems
1145 * which does not provide high resolution time.
1146 */
1147 u64NanoTS = RTTimeSystemNanoTS();
1148 while (RTTimeSystemNanoTS() == u64NanoTS)
1149 ASMNopPause();
1150
1151 fEFlags = ASMIntDisableFlags();
1152 pDevExt->uTscStartInvarTscRefine = ASMReadTSC();
1153 pDevExt->nsStartInvarTscRefine = RTTimeSystemNanoTS();
1154 pDevExt->idCpuInvarTscRefine = RTMpCpuId();
1155 ASMSetFlags(fEFlags);
1156
1157 /*
1158 * Create a timer that runs on the same CPU so we won't have a depencency
1159 * on the TSC-delta and can run in parallel to it. On systems that does not
1160 * implement CPU specific timers we'll apply deltas in the timer callback,
1161 * just like we do for CPUs going offline.
1162 *
1163 * The longer the refinement interval the better the accuracy, at least in
1164 * theory. If it's too long though, ring-3 may already be starting its
1165 * first VMs before we're done. On most systems we will be loading the
1166 * support driver during boot and VMs won't be started for a while yet,
1167 * it is really only a problem during development (especially with
1168 * on-demand driver starting on windows).
1169 *
1170 * To avoid wasting time doing a long supdrvGipInitMeasureTscFreq() call
1171 * to calculate the frequency during driver loading, the timer is set
1172 * to fire after 200 ms the first time. It will then reschedule itself
1173 * to fire every second until GIP_TSC_REFINE_PERIOD_IN_SECS has been
1174 * reached or it notices that there is a user land client with GIP
1175 * mapped (we want a stable frequency for all VMs).
1176 */
1177 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC,
1178 RTTIMER_FLAGS_CPU(RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine)),
1179 supdrvGipInitRefineInvariantTscFreqTimer, pDevExt);
1180 if (RT_SUCCESS(rc))
1181 {
1182 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
1183 if (RT_SUCCESS(rc))
1184 return;
1185 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
1186 }
1187
1188 if (rc == VERR_CPU_OFFLINE || rc == VERR_NOT_SUPPORTED)
1189 {
1190 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC, RTTIMER_FLAGS_CPU_ANY,
1191 supdrvGipInitRefineInvariantTscFreqTimer, pDevExt);
1192 if (RT_SUCCESS(rc))
1193 {
1194 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
1195 if (RT_SUCCESS(rc))
1196 return;
1197 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
1198 }
1199 }
1200
1201 pDevExt->pInvarTscRefineTimer = NULL;
1202 OSDBGPRINT(("vboxdrv: Failed to create or start TSC frequency refinement timer: rc=%Rrc\n", rc));
1203}
1204
1205
1206/**
1207 * @callback_method_impl{PFNRTMPWORKER,
1208 * RTMpOnSpecific callback for reading TSC and time on the CPU we started
1209 * the measurements on.}
1210 */
1211static DECLCALLBACK(void) supdrvGipInitReadTscAndNanoTsOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1212{
1213 RTCCUINTREG fEFlags = ASMIntDisableFlags();
1214 uint64_t *puTscStop = (uint64_t *)pvUser1;
1215 uint64_t *pnsStop = (uint64_t *)pvUser2;
1216 RT_NOREF1(idCpu);
1217
1218 *puTscStop = ASMReadTSC();
1219 *pnsStop = RTTimeSystemNanoTS();
1220
1221 ASMSetFlags(fEFlags);
1222}
1223
1224
1225/**
1226 * Measures the TSC frequency of the system.
1227 *
1228 * The TSC frequency can vary on systems which are not reported as invariant.
1229 * On such systems the object of this function is to find out what the nominal,
1230 * maximum TSC frequency under 'normal' CPU operation.
1231 *
1232 * @returns VBox status code.
1233 * @param pGip Pointer to the GIP.
1234 * @param fRough Set if we're doing the rough calculation that the
1235 * TSC measuring code needs, where accuracy isn't all
1236 * that important (too high is better than too low).
1237 * When clear we try for best accuracy that we can
1238 * achieve in reasonably short time.
1239 */
1240static int supdrvGipInitMeasureTscFreq(PSUPGLOBALINFOPAGE pGip, bool fRough)
1241{
1242 uint32_t nsTimerIncr = RTTimerGetSystemGranularity();
1243 int cTriesLeft = fRough ? 4 : 2;
1244 while (cTriesLeft-- > 0)
1245 {
1246 RTCCUINTREG fEFlags;
1247 uint64_t nsStart;
1248 uint64_t nsStop;
1249 uint64_t uTscStart;
1250 uint64_t uTscStop;
1251 RTCPUID idCpuStart;
1252 RTCPUID idCpuStop;
1253
1254 /*
1255 * Synchronize with the host OS clock tick on systems without high
1256 * resolution time API (older Windows version for example).
1257 */
1258 nsStart = RTTimeSystemNanoTS();
1259 while (RTTimeSystemNanoTS() == nsStart)
1260 ASMNopPause();
1261
1262 /*
1263 * Read the TSC and current time, noting which CPU we're on.
1264 */
1265 fEFlags = ASMIntDisableFlags();
1266 uTscStart = ASMReadTSC();
1267 nsStart = RTTimeSystemNanoTS();
1268 idCpuStart = RTMpCpuId();
1269 ASMSetFlags(fEFlags);
1270
1271 /*
1272 * Delay for a while.
1273 */
1274 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1275 {
1276 /*
1277 * Sleep-wait since the TSC frequency is constant, it eases host load.
1278 * Shorter interval produces more variance in the frequency (esp. Windows).
1279 */
1280 uint64_t msElapsed = 0;
1281 uint64_t msDelay = ( ((fRough ? 16 : 200) * RT_NS_1MS + nsTimerIncr - 1) / nsTimerIncr * nsTimerIncr - RT_NS_100US )
1282 / RT_NS_1MS;
1283 do
1284 {
1285 RTThreadSleep((RTMSINTERVAL)(msDelay - msElapsed));
1286 nsStop = RTTimeSystemNanoTS();
1287 msElapsed = (nsStop - nsStart) / RT_NS_1MS;
1288 } while (msElapsed < msDelay);
1289
1290 while (RTTimeSystemNanoTS() == nsStop)
1291 ASMNopPause();
1292 }
1293 else
1294 {
1295 /*
1296 * Busy-wait keeping the frequency up.
1297 */
1298 do
1299 {
1300 ASMNopPause();
1301 nsStop = RTTimeSystemNanoTS();
1302 } while (nsStop - nsStart < RT_NS_100MS);
1303 }
1304
1305 /*
1306 * Read the TSC and time again.
1307 */
1308 fEFlags = ASMIntDisableFlags();
1309 uTscStop = ASMReadTSC();
1310 nsStop = RTTimeSystemNanoTS();
1311 idCpuStop = RTMpCpuId();
1312 ASMSetFlags(fEFlags);
1313
1314 /*
1315 * If the CPU changes, things get a bit complicated and what we
1316 * can get away with depends on the GIP mode / TSC reliability.
1317 */
1318 if (idCpuStop != idCpuStart)
1319 {
1320 bool fDoXCall = false;
1321
1322 /*
1323 * Synchronous TSC mode: we're probably fine as it's unlikely
1324 * that we were rescheduled because of TSC throttling or power
1325 * management reasons, so just go ahead.
1326 */
1327 if (pGip->u32Mode == SUPGIPMODE_SYNC_TSC)
1328 {
1329 /* Probably ok, maybe we should retry once?. */
1330 Assert(pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_NOT_APPLICABLE);
1331 }
1332 /*
1333 * If we're just doing the rough measurement, do the cross call and
1334 * get on with things (we don't have deltas!).
1335 */
1336 else if (fRough)
1337 fDoXCall = true;
1338 /*
1339 * Invariant TSC mode: It doesn't matter if we have delta available
1340 * for both CPUs. That is not something we can assume at this point.
1341 *
1342 * Note! We cannot necessarily trust enmUseTscDelta here because it's
1343 * downgraded after each delta calculation and the delta
1344 * calculations may not be complete yet.
1345 */
1346 else if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1347 {
1348/** @todo This section of code is never reached atm, consider dropping it later on... */
1349 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1350 {
1351 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(idCpuStart);
1352 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpuStop);
1353 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1354 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
1355 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1356 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
1357 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
1358 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
1359 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
1360 {
1361 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
1362 {
1363 uTscStart -= iStartTscDelta;
1364 uTscStop -= iStopTscDelta;
1365 }
1366 }
1367 /*
1368 * Invalid CPU indexes are not caused by online/offline races, so
1369 * we have to trigger driver load failure if that happens as GIP
1370 * and IPRT assumptions are busted on this system.
1371 */
1372 else if (iStopGipCpu >= pGip->cCpus || iStartGipCpu >= pGip->cCpus)
1373 {
1374 SUPR0Printf("vboxdrv: Unexpected CPU index in supdrvGipInitMeasureTscFreq.\n");
1375 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
1376 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
1377 return VERR_INVALID_CPU_INDEX;
1378 }
1379 /*
1380 * No valid deltas. We retry, if we're on our last retry
1381 * we do the cross call instead just to get a result. The
1382 * frequency will be refined in a few seconds anyway.
1383 */
1384 else if (cTriesLeft > 0)
1385 continue;
1386 else
1387 fDoXCall = true;
1388 }
1389 }
1390 /*
1391 * Asynchronous TSC mode: This is bad, as the reason we usually
1392 * use this mode is to deal with variable TSC frequencies and
1393 * deltas. So, we need to get the TSC from the same CPU as
1394 * started it, we also need to keep that CPU busy. So, retry
1395 * and fall back to the cross call on the last attempt.
1396 */
1397 else
1398 {
1399 Assert(pGip->u32Mode == SUPGIPMODE_ASYNC_TSC);
1400 if (cTriesLeft > 0)
1401 continue;
1402 fDoXCall = true;
1403 }
1404
1405 if (fDoXCall)
1406 {
1407 /*
1408 * Try read the TSC and timestamp on the start CPU.
1409 */
1410 int rc = RTMpOnSpecific(idCpuStart, supdrvGipInitReadTscAndNanoTsOnCpu, &uTscStop, &nsStop);
1411 if (RT_FAILURE(rc) && (!fRough || cTriesLeft > 0))
1412 continue;
1413 }
1414 }
1415
1416 /*
1417 * Calculate the TSC frequency and update it (shared with the refinement timer).
1418 */
1419 supdrvGipInitSetCpuFreq(pGip, nsStop - nsStart, uTscStop - uTscStart, 0);
1420 return VINF_SUCCESS;
1421 }
1422
1423 Assert(!fRough);
1424 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
1425}
1426
1427
1428/**
1429 * Finds our (@a idCpu) entry, or allocates a new one if not found.
1430 *
1431 * @returns Index of the CPU in the cache set.
1432 * @param pGip The GIP.
1433 * @param idCpu The CPU ID.
1434 */
1435static uint32_t supdrvGipFindOrAllocCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
1436{
1437 uint32_t i, cTries;
1438
1439 /*
1440 * ASSUMES that CPU IDs are constant.
1441 */
1442 for (i = 0; i < pGip->cCpus; i++)
1443 if (pGip->aCPUs[i].idCpu == idCpu)
1444 return i;
1445
1446 cTries = 0;
1447 do
1448 {
1449 for (i = 0; i < pGip->cCpus; i++)
1450 {
1451 bool fRc;
1452 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
1453 if (fRc)
1454 return i;
1455 }
1456 } while (cTries++ < 32);
1457 AssertReleaseFailed();
1458 return i - 1;
1459}
1460
1461
1462/**
1463 * The calling CPU should be accounted as online, update GIP accordingly.
1464 *
1465 * This is used by supdrvGipCreate() as well as supdrvGipMpEvent().
1466 *
1467 * @param pDevExt The device extension.
1468 * @param idCpu The CPU ID.
1469 */
1470static void supdrvGipMpEventOnlineOrInitOnCpu(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1471{
1472 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1473 int iCpuSet = 0;
1474 uint32_t idApic;
1475 uint32_t i = 0;
1476 uint64_t u64NanoTS = 0;
1477
1478 AssertPtrReturnVoid(pGip);
1479 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1480 AssertRelease(idCpu == RTMpCpuId());
1481 Assert(pGip->cPossibleCpus == RTMpGetCount());
1482
1483 /*
1484 * Do this behind a spinlock with interrupts disabled as this can fire
1485 * on all CPUs simultaneously, see @bugref{6110}.
1486 */
1487 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1488
1489 /*
1490 * Update the globals.
1491 */
1492 ASMAtomicWriteU16(&pGip->cPresentCpus, (uint16_t)RTMpGetPresentCount());
1493 ASMAtomicWriteU16(&pGip->cOnlineCpus, (uint16_t)RTMpGetOnlineCount());
1494 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1495 if (iCpuSet >= 0)
1496 {
1497 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1498 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
1499 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
1500 }
1501
1502 /*
1503 * Update the entry.
1504 */
1505 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
1506 i = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1507
1508 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, pGip->u64CpuHz);
1509
1510 idApic = supdrvGipGetApicIdSlow();
1511 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, (uint16_t)idApic);
1512 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
1513 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
1514
1515 pGip->aCPUs[i].iCpuGroup = 0;
1516 pGip->aCPUs[i].iCpuGroupMember = (uint16_t)iCpuSet;
1517#ifdef RT_OS_WINDOWS
1518 supdrvOSGipInitGroupBitsForCpu(pDevExt, pGip, &pGip->aCPUs[i]);
1519#endif
1520
1521 /*
1522 * Update the APIC ID and CPU set index mappings.
1523 */
1524 if (idApic < RT_ELEMENTS(pGip->aiCpuFromApicId))
1525 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], (uint16_t)i);
1526 else
1527 LogRelMax(64, ("supdrvGipMpEventOnlineOrInitOnCpu: idApic=%#x is out of bounds (%#zx, i=%u, iCpuSet=%d)\n",
1528 idApic, RT_ELEMENTS(pGip->aiCpuFromApicId), i, iCpuSet));
1529 if ((unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx))
1530 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], (uint16_t)i);
1531 else
1532 LogRelMax(64, ("supdrvGipMpEventOnlineOrInitOnCpu: iCpuSet=%d is out of bounds (%#zx, i=%u, idApic=%d)\n",
1533 iCpuSet, RT_ELEMENTS(pGip->aiCpuFromApicId), i, idApic));
1534
1535 /* Add this CPU to this set of CPUs we need to calculate the TSC-delta for. */
1536 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, RTMpCpuIdToSetIndex(idCpu));
1537
1538 /* Update the Mp online/offline counter. */
1539 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1540
1541 /* Commit it. */
1542 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
1543
1544 RTSpinlockRelease(pDevExt->hGipSpinlock);
1545}
1546
1547
1548/**
1549 * RTMpOnSpecific callback wrapper for supdrvGipMpEventOnlineOrInitOnCpu().
1550 *
1551 * @param idCpu The CPU ID we are running on.
1552 * @param pvUser1 Opaque pointer to the device instance data.
1553 * @param pvUser2 Not used.
1554 */
1555static DECLCALLBACK(void) supdrvGipMpEventOnlineCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1556{
1557 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser1;
1558 NOREF(pvUser2);
1559 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1560}
1561
1562
1563/**
1564 * The CPU should be accounted as offline, update the GIP accordingly.
1565 *
1566 * This is used by supdrvGipMpEvent.
1567 *
1568 * @param pDevExt The device extension.
1569 * @param idCpu The CPU ID.
1570 */
1571static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1572{
1573 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1574 int iCpuSet;
1575 unsigned i;
1576
1577 AssertPtrReturnVoid(pGip);
1578 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1579
1580 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1581 AssertReturnVoid(iCpuSet >= 0);
1582
1583 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
1584 AssertReturnVoid(i < pGip->cCpus);
1585 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
1586
1587 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1588 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
1589
1590 /* Update the Mp online/offline counter. */
1591 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1592
1593 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1594 {
1595 /* Reset the TSC delta, we will recalculate it lazily. */
1596 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
1597 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
1598 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, iCpuSet);
1599 }
1600
1601 /* Commit it. */
1602 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
1603
1604 RTSpinlockRelease(pDevExt->hGipSpinlock);
1605}
1606
1607
1608/**
1609 * Multiprocessor event notification callback.
1610 *
1611 * This is used to make sure that the GIP master gets passed on to
1612 * another CPU. It also updates the associated CPU data.
1613 *
1614 * @param enmEvent The event.
1615 * @param idCpu The cpu it applies to.
1616 * @param pvUser Pointer to the device extension.
1617 */
1618static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
1619{
1620 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1621 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1622
1623 if (pGip)
1624 {
1625 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
1626 switch (enmEvent)
1627 {
1628 case RTMPEVENT_ONLINE:
1629 {
1630 RTThreadPreemptDisable(&PreemptState);
1631 if (idCpu == RTMpCpuId())
1632 {
1633 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1634 RTThreadPreemptRestore(&PreemptState);
1635 }
1636 else
1637 {
1638 RTThreadPreemptRestore(&PreemptState);
1639 RTMpOnSpecific(idCpu, supdrvGipMpEventOnlineCallback, pDevExt, NULL /* pvUser2 */);
1640 }
1641
1642 /*
1643 * Recompute TSC-delta for the newly online'd CPU.
1644 */
1645 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1646 {
1647#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1648 supdrvTscDeltaThreadStartMeasurement(pDevExt, false /* fForceAll */);
1649#else
1650 uint32_t iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1651 supdrvTscMeasureDeltaOne(pDevExt, iCpu);
1652#endif
1653 }
1654 break;
1655 }
1656
1657 case RTMPEVENT_OFFLINE:
1658 supdrvGipMpEventOffline(pDevExt, idCpu);
1659 break;
1660 }
1661 }
1662
1663 /*
1664 * Make sure there is a master GIP.
1665 */
1666 if (enmEvent == RTMPEVENT_OFFLINE)
1667 {
1668 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
1669 if (idGipMaster == idCpu)
1670 {
1671 /*
1672 * The GIP master is going offline, find a new one.
1673 */
1674 bool fIgnored;
1675 unsigned i;
1676 RTCPUID idNewGipMaster = NIL_RTCPUID;
1677 RTCPUSET OnlineCpus;
1678 RTMpGetOnlineSet(&OnlineCpus);
1679
1680 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
1681 if (RTCpuSetIsMemberByIndex(&OnlineCpus, i))
1682 {
1683 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
1684 if (idCurCpu != idGipMaster)
1685 {
1686 idNewGipMaster = idCurCpu;
1687 break;
1688 }
1689 }
1690
1691 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
1692 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
1693 NOREF(fIgnored);
1694 }
1695 }
1696}
1697
1698
1699/**
1700 * On CPU initialization callback for RTMpOnAll.
1701 *
1702 * @param idCpu The CPU ID.
1703 * @param pvUser1 The device extension.
1704 * @param pvUser2 The GIP.
1705 */
1706static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1707{
1708 /* This is good enough, even though it will update some of the globals a
1709 bit to much. */
1710 supdrvGipMpEventOnlineOrInitOnCpu((PSUPDRVDEVEXT)pvUser1, idCpu);
1711 NOREF(pvUser2);
1712}
1713
1714#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1715
1716/**
1717 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
1718 *
1719 * @param idCpu Ignored.
1720 * @param pvUser1 Where to put the TSC.
1721 * @param pvUser2 Ignored.
1722 */
1723static DECLCALLBACK(void) supdrvGipInitDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1724{
1725 Assert(RTMpCpuIdToSetIndex(idCpu) == (intptr_t)pvUser2);
1726 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
1727 RT_NOREF2(idCpu, pvUser2);
1728}
1729
1730
1731/**
1732 * Determine if Async GIP mode is required because of TSC drift.
1733 *
1734 * When using the default/normal timer code it is essential that the time stamp counter
1735 * (TSC) runs never backwards, that is, a read operation to the counter should return
1736 * a bigger value than any previous read operation. This is guaranteed by the latest
1737 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
1738 * case we have to choose the asynchronous timer mode.
1739 *
1740 * @param poffMin Pointer to the determined difference between different
1741 * cores (optional, can be NULL).
1742 * @return false if the time stamp counters appear to be synchronized, true otherwise.
1743 */
1744static bool supdrvGipInitDetermineAsyncTsc(uint64_t *poffMin)
1745{
1746 /*
1747 * Just iterate all the cpus 8 times and make sure that the TSC is
1748 * ever increasing. We don't bother taking TSC rollover into account.
1749 */
1750 int iEndCpu = RTMpGetArraySize();
1751 int iCpu;
1752 int cLoops = 8;
1753 bool fAsync = false;
1754 int rc = VINF_SUCCESS;
1755 uint64_t offMax = 0;
1756 uint64_t offMin = ~(uint64_t)0;
1757 uint64_t PrevTsc = ASMReadTSC();
1758
1759 while (cLoops-- > 0)
1760 {
1761 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
1762 {
1763 uint64_t CurTsc;
1764 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvGipInitDetermineAsyncTscWorker,
1765 &CurTsc, (void *)(uintptr_t)iCpu);
1766 if (RT_SUCCESS(rc))
1767 {
1768 if (CurTsc <= PrevTsc)
1769 {
1770 fAsync = true;
1771 offMin = offMax = PrevTsc - CurTsc;
1772 Log(("supdrvGipInitDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
1773 iCpu, cLoops, CurTsc, PrevTsc));
1774 break;
1775 }
1776
1777 /* Gather statistics (except the first time). */
1778 if (iCpu != 0 || cLoops != 7)
1779 {
1780 uint64_t off = CurTsc - PrevTsc;
1781 if (off < offMin)
1782 offMin = off;
1783 if (off > offMax)
1784 offMax = off;
1785 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
1786 }
1787
1788 /* Next */
1789 PrevTsc = CurTsc;
1790 }
1791 else if (rc == VERR_NOT_SUPPORTED)
1792 break;
1793 else
1794 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
1795 }
1796
1797 /* broke out of the loop. */
1798 if (iCpu < iEndCpu)
1799 break;
1800 }
1801
1802 if (poffMin)
1803 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
1804 Log(("supdrvGipInitDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
1805 fAsync, iEndCpu, rc, offMin, offMax));
1806# if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
1807 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
1808# endif
1809 return fAsync;
1810}
1811
1812#endif /* RT_ARCH_AMD64 || RT_ARCH_X86 */
1813
1814
1815/**
1816 * supdrvGipInit() worker that determines the GIP TSC mode.
1817 *
1818 * @returns The most suitable TSC mode.
1819 * @param pDevExt Pointer to the device instance data.
1820 */
1821static SUPGIPMODE supdrvGipInitDetermineTscMode(PSUPDRVDEVEXT pDevExt)
1822{
1823#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1824 uint64_t u64DiffCoresIgnored;
1825 uint32_t uEAX, uEBX, uECX, uEDX;
1826
1827 /*
1828 * Establish whether the CPU advertises TSC as invariant, we need that in
1829 * a couple of places below.
1830 */
1831 bool fInvariantTsc = false;
1832 if (ASMHasCpuId())
1833 {
1834 uEAX = ASMCpuId_EAX(0x80000000);
1835 if (RTX86IsValidExtRange(uEAX) && uEAX >= 0x80000007)
1836 {
1837 uEDX = ASMCpuId_EDX(0x80000007);
1838 if (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
1839 fInvariantTsc = true;
1840 }
1841 }
1842
1843 /*
1844 * On single CPU systems, we don't need to consider ASYNC mode.
1845 */
1846 if (RTMpGetCount() <= 1)
1847 return fInvariantTsc ? SUPGIPMODE_INVARIANT_TSC : SUPGIPMODE_SYNC_TSC;
1848
1849 /*
1850 * Allow the user and/or OS specific bits to force async mode.
1851 */
1852 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
1853 return SUPGIPMODE_ASYNC_TSC;
1854
1855 /*
1856 * Use invariant mode if the CPU says TSC is invariant.
1857 */
1858 if (fInvariantTsc)
1859 return SUPGIPMODE_INVARIANT_TSC;
1860
1861 /*
1862 * TSC is not invariant and we're on SMP, this presents two problems:
1863 *
1864 * (1) There might be a skew between the CPU, so that cpu0
1865 * returns a TSC that is slightly different from cpu1.
1866 * This screw may be due to (2), bad TSC initialization
1867 * or slightly different TSC rates.
1868 *
1869 * (2) Power management (and other things) may cause the TSC
1870 * to run at a non-constant speed, and cause the speed
1871 * to be different on the cpus. This will result in (1).
1872 *
1873 * If any of the above is detected, we will have to use ASYNC mode.
1874 */
1875 /* (1). Try check for current differences between the cpus. */
1876 if (supdrvGipInitDetermineAsyncTsc(&u64DiffCoresIgnored))
1877 return SUPGIPMODE_ASYNC_TSC;
1878
1879 /* (2) If it's an AMD CPU with power management, we won't trust its TSC. */
1880 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
1881 if ( RTX86IsValidStdRange(uEAX)
1882 && (RTX86IsAmdCpu(uEBX, uECX, uEDX) || RTX86IsHygonCpu(uEBX, uECX, uEDX)) )
1883 {
1884 /* Check for APM support. */
1885 uEAX = ASMCpuId_EAX(0x80000000);
1886 if (RTX86IsValidExtRange(uEAX) && uEAX >= 0x80000007)
1887 {
1888 uEDX = ASMCpuId_EDX(0x80000007);
1889 if (uEDX & 0x3e) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
1890 return SUPGIPMODE_ASYNC_TSC;
1891 }
1892 }
1893
1894 return SUPGIPMODE_SYNC_TSC;
1895
1896#elif defined(RT_ARCH_ARM64)
1897 RT_NOREF(pDevExt);
1898 return SUPGIPMODE_INVARIANT_TSC;
1899
1900#else
1901# error "Port me"
1902#endif
1903}
1904
1905
1906/**
1907 * Initializes per-CPU GIP information.
1908 *
1909 * @param pGip Pointer to the GIP.
1910 * @param pCpu Pointer to which GIP CPU to initialize.
1911 * @param u64NanoTS The current nanosecond timestamp.
1912 * @param uCpuHz The CPU frequency to set, 0 if the caller doesn't know.
1913 */
1914static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz)
1915{
1916 pCpu->u32TransactionId = 2;
1917 pCpu->u64NanoTS = u64NanoTS;
1918 pCpu->u64TSC = ASMReadTSC();
1919 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
1920 pCpu->i64TSCDelta = pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED ? INT64_MAX : 0;
1921
1922 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
1923 ASMAtomicWriteU32(&pCpu->idCpu, NIL_RTCPUID);
1924 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
1925 ASMAtomicWriteU16(&pCpu->iCpuGroup, 0);
1926 ASMAtomicWriteU16(&pCpu->iCpuGroupMember, UINT16_MAX);
1927 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
1928 ASMAtomicWriteU32(&pCpu->iReservedForNumaNode, 0);
1929
1930 /*
1931 * The first time we're called, we don't have a CPU frequency handy,
1932 * so pretend it's a 4 GHz CPU. On CPUs that are online, we'll get
1933 * called again and at that point we have a more plausible CPU frequency
1934 * value handy. The frequency history will also be adjusted again on
1935 * the 2nd timer callout (maybe we can skip that now?).
1936 */
1937 if (!uCpuHz)
1938 {
1939 pCpu->u64CpuHz = _4G - 1;
1940 pCpu->u32UpdateIntervalTSC = (uint32_t)((_4G - 1) / pGip->u32UpdateHz);
1941 }
1942 else
1943 {
1944 pCpu->u64CpuHz = uCpuHz;
1945 pCpu->u32UpdateIntervalTSC = (uint32_t)(uCpuHz / pGip->u32UpdateHz);
1946 }
1947 pCpu->au32TSCHistory[0]
1948 = pCpu->au32TSCHistory[1]
1949 = pCpu->au32TSCHistory[2]
1950 = pCpu->au32TSCHistory[3]
1951 = pCpu->au32TSCHistory[4]
1952 = pCpu->au32TSCHistory[5]
1953 = pCpu->au32TSCHistory[6]
1954 = pCpu->au32TSCHistory[7]
1955 = pCpu->u32UpdateIntervalTSC;
1956}
1957
1958
1959/**
1960 * Initializes the GIP data.
1961 *
1962 * @returns VBox status code.
1963 * @param pDevExt Pointer to the device instance data.
1964 * @param pGip Pointer to the read-write kernel mapping of the GIP.
1965 * @param HCPhys The physical address of the GIP.
1966 * @param u64NanoTS The current nanosecond timestamp.
1967 * @param uUpdateHz The update frequency.
1968 * @param uUpdateIntervalNS The update interval in nanoseconds.
1969 * @param cCpus The CPU count.
1970 * @param cbGipCpuGroups The supdrvOSGipGetGroupTableSize return value we
1971 * used when allocating the GIP structure.
1972 */
1973static int supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
1974 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS,
1975 unsigned cCpus, size_t cbGipCpuGroups)
1976{
1977 size_t const cbGip = RT_ALIGN_Z(RT_UOFFSETOF_DYN(SUPGLOBALINFOPAGE, aCPUs[cCpus]) + cbGipCpuGroups, PAGE_SIZE);
1978 unsigned i;
1979#ifdef DEBUG_DARWIN_GIP
1980 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1981#else
1982 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1983#endif
1984
1985 /*
1986 * Initialize the structure.
1987 */
1988 memset(pGip, 0, cbGip);
1989
1990 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
1991 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
1992 pGip->u32Mode = supdrvGipInitDetermineTscMode(pDevExt);
1993 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
1994 /*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */)
1995 pGip->enmUseTscDelta = supdrvOSAreTscDeltasInSync() /* Allow OS override (windows). */
1996 ? SUPGIPUSETSCDELTA_ZERO_CLAIMED : SUPGIPUSETSCDELTA_PRACTICALLY_ZERO /* downgrade later */;
1997 else
1998 pGip->enmUseTscDelta = SUPGIPUSETSCDELTA_NOT_APPLICABLE;
1999 pGip->cCpus = (uint16_t)cCpus;
2000 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
2001 pGip->u32UpdateHz = uUpdateHz;
2002 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
2003 pGip->fGetGipCpu = SUPGIPGETCPU_APIC_ID;
2004 RTCpuSetEmpty(&pGip->OnlineCpuSet);
2005 RTCpuSetEmpty(&pGip->PresentCpuSet);
2006 RTMpGetSet(&pGip->PossibleCpuSet);
2007 pGip->cOnlineCpus = (uint16_t)RTMpGetOnlineCount();
2008 pGip->cPresentCpus = (uint16_t)RTMpGetPresentCount();
2009 pGip->cPossibleCpus = (uint16_t)RTMpGetCount();
2010 pGip->cPossibleCpuGroups = 1;
2011 pGip->idCpuMax = RTMpGetMaxCpuId();
2012 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
2013 pGip->aiCpuFromApicId[i] = UINT16_MAX;
2014 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
2015 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
2016 for (i = 0; i < RT_ELEMENTS(pGip->aoffCpuGroup); i++)
2017 pGip->aoffCpuGroup[i] = UINT32_MAX;
2018 for (i = 0; i < cCpus; i++)
2019 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, 0 /*uCpuHz*/);
2020#ifdef RT_OS_WINDOWS
2021 int rc = supdrvOSInitGipGroupTable(pDevExt, pGip, cbGipCpuGroups);
2022 AssertRCReturn(rc, rc);
2023#endif
2024
2025 /*
2026 * Link it to the device extension.
2027 */
2028 pDevExt->pGip = pGip;
2029 pDevExt->HCPhysGip = HCPhys;
2030 pDevExt->cGipUsers = 0;
2031
2032 return VINF_SUCCESS;
2033}
2034
2035
2036/**
2037 * Creates the GIP.
2038 *
2039 * @returns VBox status code.
2040 * @param pDevExt Instance data. GIP stuff may be updated.
2041 */
2042int VBOXCALL supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
2043{
2044 PSUPGLOBALINFOPAGE pGip;
2045 size_t cbGip;
2046 size_t cbGipCpuGroups;
2047 RTHCPHYS HCPhysGip;
2048 uint32_t u32SystemResolution;
2049 uint32_t u32Interval;
2050 uint32_t u32MinInterval;
2051 uint32_t uMod;
2052 unsigned cCpus;
2053 int rc;
2054
2055 LogFlow(("supdrvGipCreate:\n"));
2056
2057 /*
2058 * Assert order.
2059 */
2060 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
2061 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
2062 Assert(!pDevExt->pGipTimer);
2063#ifdef SUPDRV_USE_MUTEX_FOR_GIP
2064 Assert(pDevExt->mtxGip != NIL_RTSEMMUTEX);
2065 Assert(pDevExt->mtxTscDelta != NIL_RTSEMMUTEX);
2066#else
2067 Assert(pDevExt->mtxGip != NIL_RTSEMFASTMUTEX);
2068 Assert(pDevExt->mtxTscDelta != NIL_RTSEMFASTMUTEX);
2069#endif
2070
2071 /*
2072 * Check the CPU count.
2073 */
2074 cCpus = RTMpGetArraySize();
2075 if (cCpus > RT_MIN(RTCPUSET_MAX_CPUS, RT_ELEMENTS(pGip->aiCpuFromApicId)))
2076 {
2077 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, RT_ELEMENTS(pGip->aiCpuFromApicId)));
2078 return VERR_TOO_MANY_CPUS;
2079 }
2080
2081 /*
2082 * Allocate a contiguous set of pages with a default kernel mapping.
2083 */
2084#ifdef RT_OS_WINDOWS
2085 cbGipCpuGroups = supdrvOSGipGetGroupTableSize(pDevExt);
2086#else
2087 cbGipCpuGroups = 0;
2088#endif
2089 cbGip = RT_UOFFSETOF_DYN(SUPGLOBALINFOPAGE, aCPUs[cCpus]) + cbGipCpuGroups;
2090 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, cbGip, NIL_RTHCPHYS /*PhysHighest*/, false /*fExecutable*/);
2091 if (RT_FAILURE(rc))
2092 {
2093 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
2094 return rc;
2095 }
2096 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
2097 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
2098
2099 /*
2100 * Find a reasonable update interval and initialize the structure.
2101 */
2102 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
2103 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
2104 * See @bugref{6710}. */
2105 u32MinInterval = RT_NS_10MS;
2106 u32SystemResolution = RTTimerGetSystemGranularity();
2107 u32Interval = u32MinInterval;
2108 uMod = u32MinInterval % u32SystemResolution;
2109 if (uMod)
2110 u32Interval += u32SystemResolution - uMod;
2111
2112 rc = supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval,
2113 cCpus, cbGipCpuGroups);
2114
2115 /*
2116 * Important sanity check... (Sets rc)
2117 */
2118 if (RT_UNLIKELY( pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_ZERO_CLAIMED
2119 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
2120 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
2121 {
2122 OSDBGPRINT(("supdrvGipCreate: Host-OS/user claims the TSC-deltas are zero but we detected async. TSC! Bad.\n"));
2123 rc = VERR_INTERNAL_ERROR_2;
2124 }
2125
2126 /* It doesn't make sense to do TSC-delta detection on systems we detect as async. */
2127 AssertStmt( pGip->u32Mode != SUPGIPMODE_ASYNC_TSC
2128 || pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED,
2129 rc = VERR_INTERNAL_ERROR_3);
2130
2131 /*
2132 * Do the TSC frequency measurements.
2133 *
2134 * If we're in invariant TSC mode, just to a quick preliminary measurement
2135 * that the TSC-delta measurement code can use to yield cross calls.
2136 *
2137 * If we're in any of the other two modes, neither which require MP init,
2138 * notifications or deltas for the job, do the full measurement now so
2139 * that supdrvGipInitOnCpu() can populate the TSC interval and history
2140 * array with more reasonable values.
2141 */
2142 if (RT_SUCCESS(rc))
2143 {
2144 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
2145 {
2146 rc = supdrvGipInitMeasureTscFreq(pGip, true /*fRough*/); /* cannot fail */
2147 supdrvGipInitStartTimerForRefiningInvariantTscFreq(pDevExt);
2148 }
2149 else
2150 rc = supdrvGipInitMeasureTscFreq(pGip, false /*fRough*/);
2151 if (RT_SUCCESS(rc))
2152 {
2153 /*
2154 * Start TSC-delta measurement thread before we start getting MP
2155 * events that will try kick it into action (includes the
2156 * RTMpOnAll/supdrvGipInitOnCpu call below).
2157 */
2158 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
2159 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
2160#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2161 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
2162 rc = supdrvTscDeltaThreadInit(pDevExt);
2163#endif
2164 if (RT_SUCCESS(rc))
2165 {
2166 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
2167 if (RT_SUCCESS(rc))
2168 {
2169 /*
2170 * Do GIP initialization on all online CPUs. Wake up the
2171 * TSC-delta thread afterwards.
2172 */
2173 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
2174 if (RT_SUCCESS(rc))
2175 {
2176#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2177 supdrvTscDeltaThreadStartMeasurement(pDevExt, true /* fForceAll */);
2178#else
2179 uint16_t iCpu;
2180 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
2181 {
2182 /*
2183 * Measure the TSC deltas now that we have MP notifications.
2184 */
2185 int cTries = 5;
2186 do
2187 {
2188 rc = supdrvTscMeasureInitialDeltas(pDevExt);
2189 if ( rc != VERR_TRY_AGAIN
2190 && rc != VERR_CPU_OFFLINE)
2191 break;
2192 } while (--cTries > 0);
2193 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
2194 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
2195 }
2196 else
2197 {
2198 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
2199 AssertMsg(!pGip->aCPUs[iCpu].i64TSCDelta, ("iCpu=%u %lld mode=%d\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta, pGip->u32Mode));
2200 }
2201 if (RT_SUCCESS(rc))
2202#endif
2203 {
2204 /*
2205 * Create the timer.
2206 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
2207 */
2208 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
2209 {
2210 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL,
2211 supdrvGipAsyncTimer, pDevExt);
2212 if (rc == VERR_NOT_SUPPORTED)
2213 {
2214 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
2215 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
2216 }
2217 }
2218 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2219 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */,
2220 supdrvGipSyncAndInvariantTimer, pDevExt);
2221 if (RT_SUCCESS(rc))
2222 {
2223 /*
2224 * We're good.
2225 */
2226 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
2227 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
2228
2229 g_pSUPGlobalInfoPage = pGip;
2230 return VINF_SUCCESS;
2231 }
2232
2233 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
2234 Assert(!pDevExt->pGipTimer);
2235 }
2236 }
2237 else
2238 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
2239 }
2240 else
2241 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
2242 }
2243 else
2244 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
2245 }
2246 else
2247 OSDBGPRINT(("supdrvGipCreate: supdrvTscMeasureInitialDeltas failed. rc=%Rrc\n", rc));
2248 }
2249
2250 /* Releases timer frequency increase too. */
2251 supdrvGipDestroy(pDevExt);
2252 return rc;
2253}
2254
2255
2256/**
2257 * Invalidates the GIP data upon termination.
2258 *
2259 * @param pGip Pointer to the read-write kernel mapping of the GIP.
2260 */
2261static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
2262{
2263 unsigned i;
2264 pGip->u32Magic = 0;
2265 for (i = 0; i < pGip->cCpus; i++)
2266 {
2267 pGip->aCPUs[i].u64NanoTS = 0;
2268 pGip->aCPUs[i].u64TSC = 0;
2269 pGip->aCPUs[i].iTSCHistoryHead = 0;
2270 pGip->aCPUs[i].u64TSCSample = 0;
2271 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
2272 }
2273}
2274
2275
2276/**
2277 * Terminates the GIP.
2278 *
2279 * @param pDevExt Instance data. GIP stuff may be updated.
2280 */
2281void VBOXCALL supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
2282{
2283 int rc;
2284#ifdef DEBUG_DARWIN_GIP
2285 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
2286 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
2287 pDevExt->pGipTimer, pDevExt->GipMemObj));
2288#endif
2289
2290 /*
2291 * Stop receiving MP notifications before tearing anything else down.
2292 */
2293 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
2294
2295#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2296 /*
2297 * Terminate the TSC-delta measurement thread and resources.
2298 */
2299 supdrvTscDeltaTerm(pDevExt);
2300#endif
2301
2302 /*
2303 * Destroy the TSC-refinement timer.
2304 */
2305 if (pDevExt->pInvarTscRefineTimer)
2306 {
2307 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
2308 pDevExt->pInvarTscRefineTimer = NULL;
2309 }
2310
2311 /*
2312 * Invalid the GIP data.
2313 */
2314 if (pDevExt->pGip)
2315 {
2316 supdrvGipTerm(pDevExt->pGip);
2317 pDevExt->pGip = NULL;
2318 }
2319 g_pSUPGlobalInfoPage = NULL;
2320
2321 /*
2322 * Destroy the timer and free the GIP memory object.
2323 */
2324 if (pDevExt->pGipTimer)
2325 {
2326 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
2327 pDevExt->pGipTimer = NULL;
2328 }
2329
2330 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
2331 {
2332 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
2333 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
2334 }
2335
2336 /*
2337 * Finally, make sure we've release the system timer resolution request
2338 * if one actually succeeded and is still pending.
2339 */
2340 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
2341}
2342
2343
2344
2345
2346/*
2347 *
2348 *
2349 * GIP Update Timer Related Code
2350 * GIP Update Timer Related Code
2351 * GIP Update Timer Related Code
2352 *
2353 *
2354 */
2355
2356
2357/**
2358 * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that
2359 * updates all the per cpu data except the transaction id.
2360 *
2361 * @param pDevExt The device extension.
2362 * @param pGipCpu Pointer to the per cpu data.
2363 * @param u64NanoTS The current time stamp.
2364 * @param u64TSC The current TSC.
2365 * @param iTick The current timer tick.
2366 *
2367 * @remarks Can be called with interrupts disabled!
2368 */
2369static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
2370{
2371 uint64_t u64TSCDelta;
2372 bool fUpdateCpuHz;
2373 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2374 AssertPtrReturnVoid(pGip);
2375
2376 /* Delta between this and the previous update. */
2377 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
2378
2379 /*
2380 * Update the NanoTS.
2381 */
2382 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
2383
2384 /*
2385 * Calc TSC delta.
2386 */
2387 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
2388 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
2389
2390 /*
2391 * Determine if we need to update the CPU (TSC) frequency calculation.
2392 *
2393 * We don't need to keep recalculating the frequency when it's invariant,
2394 * unless the special tstGIP-2 testing mode is enabled.
2395 */
2396 fUpdateCpuHz = pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC;
2397 if (!(pGip->fFlags & SUPGIP_FLAGS_TESTING))
2398 { /* likely*/ }
2399 else
2400 {
2401 uint32_t fGipFlags = pGip->fFlags;
2402 if (fGipFlags & (SUPGIP_FLAGS_TESTING_ENABLE | SUPGIP_FLAGS_TESTING_START))
2403 {
2404 if (fGipFlags & SUPGIP_FLAGS_TESTING_START)
2405 {
2406 /* Cache the TSC frequency before forcing updates due to test mode. */
2407 if (!fUpdateCpuHz)
2408 pDevExt->uGipTestModeInvariantCpuHz = pGip->aCPUs[0].u64CpuHz;
2409 ASMAtomicAndU32(&pGip->fFlags, ~SUPGIP_FLAGS_TESTING_START);
2410 }
2411 fUpdateCpuHz = true;
2412 }
2413 else if (fGipFlags & SUPGIP_FLAGS_TESTING_STOP)
2414 {
2415 /* Restore the cached TSC frequency if any. */
2416 if (!fUpdateCpuHz)
2417 {
2418 Assert(pDevExt->uGipTestModeInvariantCpuHz);
2419 ASMAtomicWriteU64(&pGip->aCPUs[0].u64CpuHz, pDevExt->uGipTestModeInvariantCpuHz);
2420 }
2421 ASMAtomicAndU32(&pGip->fFlags, ~(SUPGIP_FLAGS_TESTING_STOP | SUPGIP_FLAGS_TESTING));
2422 }
2423 }
2424
2425 /*
2426 * Calculate the CPU (TSC) frequency if necessary.
2427 */
2428 if (fUpdateCpuHz)
2429 {
2430 uint64_t u64CpuHz;
2431 uint32_t u32UpdateIntervalTSC;
2432 uint32_t u32UpdateIntervalTSCSlack;
2433 uint32_t u32TransactionId;
2434 unsigned iTSCHistoryHead;
2435
2436 if (u64TSCDelta >> 32)
2437 {
2438 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
2439 pGipCpu->cErrors++;
2440 }
2441
2442 /*
2443 * On the 2nd and 3rd callout, reset the history with the current TSC
2444 * interval since the values entered by supdrvGipInit are totally off.
2445 * The interval on the 1st callout completely unreliable, the 2nd is a bit
2446 * better, while the 3rd should be most reliable.
2447 */
2448 /** @todo Could we drop this now that we initializes the history
2449 * with nominal TSC frequency values? */
2450 u32TransactionId = pGipCpu->u32TransactionId;
2451 if (RT_UNLIKELY( ( u32TransactionId == 5
2452 || u32TransactionId == 7)
2453 && ( iTick == 2
2454 || iTick == 3) ))
2455 {
2456 unsigned i;
2457 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
2458 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
2459 }
2460
2461 /*
2462 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
2463 * Wait until we have at least one full history since the above history reset. The
2464 * assumption is that the majority of the previous history values will be tolerable.
2465 * See @bugref{6710#c67}.
2466 */
2467 /** @todo Could we drop the fudging there now that we initializes the history
2468 * with nominal TSC frequency values? */
2469 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
2470 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2471 {
2472 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
2473 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
2474 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
2475 {
2476 uint32_t u32;
2477 u32 = pGipCpu->au32TSCHistory[0];
2478 u32 += pGipCpu->au32TSCHistory[1];
2479 u32 += pGipCpu->au32TSCHistory[2];
2480 u32 += pGipCpu->au32TSCHistory[3];
2481 u32 >>= 2;
2482 u64TSCDelta = pGipCpu->au32TSCHistory[4];
2483 u64TSCDelta += pGipCpu->au32TSCHistory[5];
2484 u64TSCDelta += pGipCpu->au32TSCHistory[6];
2485 u64TSCDelta += pGipCpu->au32TSCHistory[7];
2486 u64TSCDelta >>= 2;
2487 u64TSCDelta += u32;
2488 u64TSCDelta >>= 1;
2489 }
2490 }
2491
2492 /*
2493 * TSC History.
2494 */
2495 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
2496 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
2497 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
2498 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
2499
2500 /*
2501 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
2502 *
2503 * On Windows, we have an occasional (but recurring) sour value that messed up
2504 * the history but taking only 1 interval reduces the precision overall.
2505 */
2506 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
2507 || pGip->u32UpdateHz >= 1000)
2508 {
2509 uint32_t u32;
2510 u32 = pGipCpu->au32TSCHistory[0];
2511 u32 += pGipCpu->au32TSCHistory[1];
2512 u32 += pGipCpu->au32TSCHistory[2];
2513 u32 += pGipCpu->au32TSCHistory[3];
2514 u32 >>= 2;
2515 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
2516 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
2517 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
2518 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
2519 u32UpdateIntervalTSC >>= 2;
2520 u32UpdateIntervalTSC += u32;
2521 u32UpdateIntervalTSC >>= 1;
2522
2523 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
2524 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
2525 }
2526 else if (pGip->u32UpdateHz >= 90)
2527 {
2528 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2529 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
2530 u32UpdateIntervalTSC >>= 1;
2531
2532 /* value chosen on a 2GHz thinkpad running windows */
2533 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
2534 }
2535 else
2536 {
2537 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2538
2539 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
2540 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
2541 }
2542 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
2543
2544 /*
2545 * CpuHz.
2546 */
2547 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
2548 u64CpuHz /= pGip->u32UpdateIntervalNS;
2549 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
2550 }
2551}
2552
2553
2554/**
2555 * Updates the GIP.
2556 *
2557 * @param pDevExt The device extension.
2558 * @param u64NanoTS The current nanosecond timestamp.
2559 * @param u64TSC The current TSC timestamp.
2560 * @param idCpu The CPU ID.
2561 * @param iTick The current timer tick.
2562 *
2563 * @remarks Can be called with interrupts disabled!
2564 */
2565static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
2566{
2567 /*
2568 * Determine the relevant CPU data.
2569 */
2570 PSUPGIPCPU pGipCpu;
2571 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2572 AssertPtrReturnVoid(pGip);
2573
2574 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2575 pGipCpu = &pGip->aCPUs[0];
2576 else
2577 {
2578 unsigned iCpu;
2579 uint32_t idApic = supdrvGipGetApicId(pGip);
2580 if (RT_LIKELY(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)))
2581 { /* likely */ }
2582 else
2583 return;
2584 iCpu = pGip->aiCpuFromApicId[idApic];
2585 if (RT_LIKELY(iCpu < pGip->cCpus))
2586 { /* likely */ }
2587 else
2588 return;
2589 pGipCpu = &pGip->aCPUs[iCpu];
2590 if (RT_LIKELY(pGipCpu->idCpu == idCpu))
2591 { /* likely */ }
2592 else
2593 return;
2594 }
2595
2596 /*
2597 * Start update transaction.
2598 */
2599 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2600 {
2601 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
2602 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2603 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2604 pGipCpu->cErrors++;
2605 return;
2606 }
2607
2608 /*
2609 * Recalc the update frequency every 0x800th time.
2610 */
2611 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariant hosts. */
2612 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
2613 {
2614 if (pGip->u64NanoTSLastUpdateHz)
2615 {
2616#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
2617 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
2618 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
2619 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
2620 {
2621 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
2622 * calculation on non-invariant hosts if it changes the history decision
2623 * taken in supdrvGipDoUpdateCpu(). */
2624 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
2625 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
2626 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
2627 }
2628#endif
2629 }
2630 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
2631 }
2632
2633 /*
2634 * Update the data.
2635 */
2636 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2637
2638 /*
2639 * Complete transaction.
2640 */
2641 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2642}
2643
2644
2645/**
2646 * Updates the per cpu GIP data for the calling cpu.
2647 *
2648 * @param pDevExt The device extension.
2649 * @param u64NanoTS The current nanosecond timestamp.
2650 * @param u64TSC The current TSC timesaver.
2651 * @param idCpu The CPU ID.
2652 * @param idApic The APIC id for the CPU index.
2653 * @param iTick The current timer tick.
2654 *
2655 * @remarks Can be called with interrupts disabled!
2656 */
2657static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
2658 RTCPUID idCpu, uint32_t idApic, uint64_t iTick)
2659{
2660 uint32_t iCpu;
2661 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2662
2663 /*
2664 * Avoid a potential race when a CPU online notification doesn't fire on
2665 * the onlined CPU but the tick creeps in before the event notification is
2666 * run.
2667 */
2668 if (RT_LIKELY(iTick != 1))
2669 { /* likely*/ }
2670 else
2671 {
2672 iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
2673 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
2674 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
2675 }
2676
2677 iCpu = idApic < RT_ELEMENTS(pGip->aiCpuFromApicId) ? pGip->aiCpuFromApicId[idApic] : UINT32_MAX;
2678 if (RT_LIKELY(iCpu < pGip->cCpus))
2679 {
2680 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
2681 if (pGipCpu->idCpu == idCpu)
2682 {
2683 /*
2684 * Start update transaction.
2685 */
2686 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2687 {
2688 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2689 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2690 pGipCpu->cErrors++;
2691 return;
2692 }
2693
2694 /*
2695 * Update the data.
2696 */
2697 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2698
2699 /*
2700 * Complete transaction.
2701 */
2702 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2703 }
2704 }
2705}
2706
2707
2708/**
2709 * Timer callback function for the sync and invariant GIP modes.
2710 *
2711 * @param pTimer The timer.
2712 * @param pvUser Opaque pointer to the device extension.
2713 * @param iTick The timer tick.
2714 */
2715static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2716{
2717 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2718 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2719 RTCCUINTREG fEFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2720 uint64_t u64TSC = ASMReadTSC();
2721 uint64_t u64NanoTS = RTTimeSystemNanoTS();
2722 RT_NOREF1(pTimer);
2723
2724 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
2725 {
2726 /*
2727 * The calculations in supdrvGipUpdate() is somewhat timing sensitive,
2728 * missing timer ticks is not an option for GIP because the GIP users
2729 * will end up incrementing the time in 1ns per time getter call until
2730 * there is a complete timer update. So, if the delta has yet to be
2731 * calculated, we just pretend it is zero for now (the GIP users
2732 * probably won't have it for a wee while either and will do the same).
2733 *
2734 * We could maybe on some platforms try cross calling a CPU with a
2735 * working delta here, but it's not worth the hassle since the
2736 * likelihood of this happening is really low. On Windows, Linux, and
2737 * Solaris timers fire on the CPU they were registered/started on.
2738 * Darwin timers doesn't necessarily (they are high priority threads).
2739 */
2740 uint32_t iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
2741 uint16_t iGipCpu = RT_LIKELY(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx))
2742 ? pGip->aiCpuFromCpuSetIdx[iCpuSet] : UINT16_MAX;
2743 Assert(!ASMIntAreEnabled());
2744 if (RT_LIKELY(iGipCpu < pGip->cCpus))
2745 {
2746 int64_t iTscDelta = pGip->aCPUs[iGipCpu].i64TSCDelta;
2747 if (iTscDelta != INT64_MAX)
2748 u64TSC -= iTscDelta;
2749 }
2750 }
2751
2752 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
2753
2754 ASMSetFlags(fEFlags);
2755}
2756
2757
2758/**
2759 * Timer callback function for async GIP mode.
2760 * @param pTimer The timer.
2761 * @param pvUser Opaque pointer to the device extension.
2762 * @param iTick The timer tick.
2763 */
2764static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2765{
2766 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2767 RTCCUINTREG fEFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2768 RTCPUID idCpu = RTMpCpuId();
2769 uint64_t u64TSC = ASMReadTSC();
2770 uint64_t NanoTS = RTTimeSystemNanoTS();
2771 RT_NOREF1(pTimer);
2772
2773 /** @todo reset the transaction number and whatnot when iTick == 1. */
2774 if (pDevExt->idGipMaster == idCpu)
2775 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
2776 else
2777 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, supdrvGipGetApicId(pDevExt->pGip), iTick);
2778
2779 ASMSetFlags(fEFlags);
2780}
2781
2782
2783
2784
2785/*
2786 *
2787 *
2788 * TSC Delta Measurements And Related Code
2789 * TSC Delta Measurements And Related Code
2790 * TSC Delta Measurements And Related Code
2791 *
2792 *
2793 */
2794
2795
2796/*
2797 * Select TSC delta measurement algorithm.
2798 */
2799#if 0
2800# define GIP_TSC_DELTA_METHOD_1
2801#else
2802# define GIP_TSC_DELTA_METHOD_2
2803#endif
2804
2805/** For padding variables to keep them away from other cache lines. Better too
2806 * large than too small!
2807 * @remarks Current AMD64 and x86 CPUs seems to use 64 bytes. There are claims
2808 * that NetBurst had 128 byte cache lines while the 486 thru Pentium
2809 * III had 32 bytes cache lines. */
2810#define GIP_TSC_DELTA_CACHE_LINE_SIZE 128
2811
2812
2813/**
2814 * TSC delta measurement algorithm \#2 result entry.
2815 */
2816typedef struct SUPDRVTSCDELTAMETHOD2ENTRY
2817{
2818 uint32_t iSeqMine;
2819 uint32_t iSeqOther;
2820 uint64_t uTsc;
2821} SUPDRVTSCDELTAMETHOD2ENTRY;
2822
2823/**
2824 * TSC delta measurement algorithm \#2 Data.
2825 */
2826typedef struct SUPDRVTSCDELTAMETHOD2
2827{
2828 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2829 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2830 /** The current sequence number of this worker. */
2831 uint32_t volatile iCurSeqNo;
2832 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2833 uint32_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint32_t) - 1];
2834 /** Result table. */
2835 SUPDRVTSCDELTAMETHOD2ENTRY aResults[64];
2836} SUPDRVTSCDELTAMETHOD2;
2837/** Pointer to the data for TSC delta measurement algorithm \#2 .*/
2838typedef SUPDRVTSCDELTAMETHOD2 *PSUPDRVTSCDELTAMETHOD2;
2839
2840
2841/**
2842 * The TSC delta synchronization struct, version 2.
2843 *
2844 * The synchronization variable is completely isolated in its own cache line
2845 * (provided our max cache line size estimate is correct).
2846 */
2847typedef struct SUPTSCDELTASYNC2
2848{
2849 /** Padding to make sure the uVar1 is in its own cache line. */
2850 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2851
2852 /** The synchronization variable, holds values GIP_TSC_DELTA_SYNC_*. */
2853 volatile uint32_t uSyncVar;
2854 /** Sequence synchronizing variable used for post 'GO' synchronization. */
2855 volatile uint32_t uSyncSeq;
2856
2857 /** Padding to make sure the uVar1 is in its own cache line. */
2858 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t) - 2];
2859
2860 /** Start RDTSC value. Put here mainly to save stack space. */
2861 uint64_t uTscStart;
2862 /** Copy of SUPDRVGIPTSCDELTARGS::cMaxTscTicks. */
2863 uint64_t cMaxTscTicks;
2864} SUPTSCDELTASYNC2;
2865AssertCompileSize(SUPTSCDELTASYNC2, GIP_TSC_DELTA_CACHE_LINE_SIZE * 2 + sizeof(uint64_t));
2866typedef SUPTSCDELTASYNC2 *PSUPTSCDELTASYNC2;
2867
2868/** Prestart wait. */
2869#define GIP_TSC_DELTA_SYNC2_PRESTART_WAIT UINT32_C(0x0ffe)
2870/** Prestart aborted. */
2871#define GIP_TSC_DELTA_SYNC2_PRESTART_ABORT UINT32_C(0x0fff)
2872/** Ready (on your mark). */
2873#define GIP_TSC_DELTA_SYNC2_READY UINT32_C(0x1000)
2874/** Steady (get set). */
2875#define GIP_TSC_DELTA_SYNC2_STEADY UINT32_C(0x1001)
2876/** Go! */
2877#define GIP_TSC_DELTA_SYNC2_GO UINT32_C(0x1002)
2878/** Used by the verification test. */
2879#define GIP_TSC_DELTA_SYNC2_GO_GO UINT32_C(0x1003)
2880
2881/** We reached the time limit. */
2882#define GIP_TSC_DELTA_SYNC2_TIMEOUT UINT32_C(0x1ffe)
2883/** The other party won't touch the sync struct ever again. */
2884#define GIP_TSC_DELTA_SYNC2_FINAL UINT32_C(0x1fff)
2885
2886
2887/**
2888 * Argument package/state passed by supdrvTscMeasureDeltaOne() to the RTMpOn
2889 * callback worker.
2890 * @todo add
2891 */
2892typedef struct SUPDRVGIPTSCDELTARGS
2893{
2894 /** The device extension. */
2895 PSUPDRVDEVEXT pDevExt;
2896 /** Pointer to the GIP CPU array entry for the worker. */
2897 PSUPGIPCPU pWorker;
2898 /** Pointer to the GIP CPU array entry for the master. */
2899 PSUPGIPCPU pMaster;
2900 /** The maximum number of ticks to spend in supdrvTscMeasureDeltaCallback.
2901 * (This is what we need a rough TSC frequency for.) */
2902 uint64_t cMaxTscTicks;
2903 /** Used to abort synchronization setup. */
2904 bool volatile fAbortSetup;
2905
2906 /** Padding to make sure the master variables live in its own cache lines. */
2907 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2908
2909 /** @name Master
2910 * @{ */
2911 /** The time the master spent in the MP worker. */
2912 uint64_t cElapsedMasterTscTicks;
2913 /** The iTry value when stopped at. */
2914 uint32_t iTry;
2915 /** Set if the run timed out. */
2916 bool volatile fTimedOut;
2917 /** Pointer to the master's synchronization struct (on stack). */
2918 PSUPTSCDELTASYNC2 volatile pSyncMaster;
2919 /** Master data union. */
2920 union
2921 {
2922 /** Data (master) for delta verification. */
2923 struct
2924 {
2925 /** Verification test TSC values for the master. */
2926 uint64_t volatile auTscs[32];
2927 } Verify;
2928 /** Data (master) for measurement method \#2. */
2929 struct
2930 {
2931 /** Data and sequence number. */
2932 SUPDRVTSCDELTAMETHOD2 Data;
2933 /** The lag setting for the next run. */
2934 bool fLag;
2935 /** Number of hits. */
2936 uint32_t cHits;
2937 } M2;
2938 } uMaster;
2939 /** The verifier verdict, VINF_SUCCESS if ok, VERR_OUT_OF_RANGE if not,
2940 * VERR_TRY_AGAIN on timeout. */
2941 int32_t rcVerify;
2942#ifdef TSCDELTA_VERIFY_WITH_STATS
2943 /** The maximum difference between TSC read during delta verification. */
2944 int64_t cMaxVerifyTscTicks;
2945 /** The minimum difference between two TSC reads during verification. */
2946 int64_t cMinVerifyTscTicks;
2947 /** The bad TSC diff, worker relative to master (= worker - master).
2948 * Negative value means the worker is behind the master. */
2949 int64_t iVerifyBadTscDiff;
2950#endif
2951 /** @} */
2952
2953 /** Padding to make sure the worker variables live is in its own cache line. */
2954 uint64_t au64CacheLinePaddingBetween[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2955
2956 /** @name Proletarian
2957 * @{ */
2958 /** Pointer to the worker's synchronization struct (on stack). */
2959 PSUPTSCDELTASYNC2 volatile pSyncWorker;
2960 /** The time the worker spent in the MP worker. */
2961 uint64_t cElapsedWorkerTscTicks;
2962 /** Worker data union. */
2963 union
2964 {
2965 /** Data (worker) for delta verification. */
2966 struct
2967 {
2968 /** Verification test TSC values for the worker. */
2969 uint64_t volatile auTscs[32];
2970 } Verify;
2971 /** Data (worker) for measurement method \#2. */
2972 struct
2973 {
2974 /** Data and sequence number. */
2975 SUPDRVTSCDELTAMETHOD2 Data;
2976 /** The lag setting for the next run (set by master). */
2977 bool fLag;
2978 } M2;
2979 } uWorker;
2980 /** @} */
2981
2982 /** Padding to make sure the above is in its own cache line. */
2983 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2984} SUPDRVGIPTSCDELTARGS;
2985typedef SUPDRVGIPTSCDELTARGS *PSUPDRVGIPTSCDELTARGS;
2986
2987
2988/** @name Macros that implements the basic synchronization steps common to
2989 * the algorithms.
2990 *
2991 * Must be used from loop as the timeouts are implemented via 'break' statements
2992 * at the moment.
2993 *
2994 * @{
2995 */
2996#if defined(DEBUG_bird) /* || defined(VBOX_STRICT) */
2997# define TSCDELTA_DBG_VARS() uint32_t iDbgCounter
2998# define TSCDELTA_DBG_START_LOOP() do { iDbgCounter = 0; } while (0)
2999# define TSCDELTA_DBG_CHECK_LOOP() \
3000 do { iDbgCounter++; if ((iDbgCounter & UINT32_C(0x01ffffff)) == 0) RT_BREAKPOINT(); } while (0)
3001#else
3002# define TSCDELTA_DBG_VARS() ((void)0)
3003# define TSCDELTA_DBG_START_LOOP() ((void)0)
3004# define TSCDELTA_DBG_CHECK_LOOP() ((void)0)
3005#endif
3006#if 0
3007# define TSCDELTA_DBG_SYNC_MSG(a_Args) SUPR0Printf a_Args
3008#else
3009# define TSCDELTA_DBG_SYNC_MSG(a_Args) ((void)0)
3010#endif
3011#if 0
3012# define TSCDELTA_DBG_SYNC_MSG2(a_Args) SUPR0Printf a_Args
3013#else
3014# define TSCDELTA_DBG_SYNC_MSG2(a_Args) ((void)0)
3015#endif
3016#if 0
3017# define TSCDELTA_DBG_SYNC_MSG9(a_Args) SUPR0Printf a_Args
3018#else
3019# define TSCDELTA_DBG_SYNC_MSG9(a_Args) ((void)0)
3020#endif
3021
3022
3023static bool supdrvTscDeltaSync2_Before(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3024 bool fIsMaster, PRTCCUINTREG pfEFlags, PSUPDRVGIPTSCDELTARGS pArgs)
3025{
3026 uint32_t iMySeq = fIsMaster ? 0 : 256;
3027 uint32_t const iMaxSeq = iMySeq + 16; /* For the last loop, darn linux/freebsd C-ishness. */
3028 uint32_t u32Tmp;
3029 uint32_t iSync2Loops = 0;
3030 RTCCUINTREG fEFlags;
3031 TSCDELTA_DBG_VARS();
3032
3033#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3034 *pfEFlags = X86_EFL_IF | X86_EFL_1; /* should shut up most nagging compilers. */
3035#else
3036 *pfEFlags = 0;
3037#endif
3038
3039 /*
3040 * The master tells the worker to get on it's mark.
3041 */
3042 if (fIsMaster)
3043 {
3044 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
3045 { /* likely*/ }
3046 else
3047 {
3048 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #1 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
3049 return false;
3050 }
3051 }
3052
3053 /*
3054 * Wait for the on your mark signal (ack in the master case). We process timeouts here.
3055 */
3056 ASMAtomicWriteU32(&(pMySync)->uSyncSeq, 0);
3057 for (;;)
3058 {
3059 fEFlags = ASMIntDisableFlags();
3060 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
3061 if (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY)
3062 break;
3063 ASMSetFlags(fEFlags);
3064 ASMNopPause();
3065
3066 /* Abort? */
3067 if (u32Tmp != GIP_TSC_DELTA_SYNC2_READY)
3068 {
3069 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #2 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
3070 return false;
3071 }
3072
3073 /* Check for timeouts every so often (not every loop in case RDTSC is
3074 trapping or something). Must check the first time around. */
3075#if 0 /* For debugging the timeout paths. */
3076 static uint32_t volatile xxx;
3077#endif
3078 if ( ( (iSync2Loops & 0x3ff) == 0
3079 && ASMReadTSC() - pMySync->uTscStart > pMySync->cMaxTscTicks)
3080#if 0 /* This is crazy, I know, but enable this code and the results are markedly better when enabled on the 1.4GHz AMD (debug). */
3081 || (!fIsMaster && (++xxx & 0xf) == 0)
3082#endif
3083 )
3084 {
3085 /* Try switch our own state into timeout mode so the master cannot tell us to 'GO',
3086 ignore the timeout if we've got the go ahead already (simpler). */
3087 if (ASMAtomicCmpXchgU32(&pMySync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_READY))
3088 {
3089 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: timeout\n", fIsMaster ? "master" : "worker"));
3090 ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_STEADY);
3091 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
3092 return false;
3093 }
3094 }
3095 iSync2Loops++;
3096 }
3097
3098 /*
3099 * Interrupts are now disabled and will remain disabled until we do
3100 * TSCDELTA_MASTER_SYNC_AFTER / TSCDELTA_OTHER_SYNC_AFTER.
3101 */
3102 *pfEFlags = fEFlags;
3103
3104 /*
3105 * The worker tells the master that it is on its mark and that the master
3106 * need to get into position as well.
3107 */
3108 if (!fIsMaster)
3109 {
3110 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
3111 { /* likely */ }
3112 else
3113 {
3114 ASMSetFlags(fEFlags);
3115 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #3 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
3116 return false;
3117 }
3118 }
3119
3120 /*
3121 * The master sends the 'go' to the worker and wait for ACK.
3122 */
3123 if (fIsMaster)
3124 {
3125 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
3126 { /* likely */ }
3127 else
3128 {
3129 ASMSetFlags(fEFlags);
3130 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #4 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
3131 return false;
3132 }
3133 }
3134
3135 /*
3136 * Wait for the 'go' signal (ack in the master case).
3137 */
3138 TSCDELTA_DBG_START_LOOP();
3139 for (;;)
3140 {
3141 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
3142 if (u32Tmp == GIP_TSC_DELTA_SYNC2_GO)
3143 break;
3144 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY))
3145 { /* likely */ }
3146 else
3147 {
3148 ASMSetFlags(fEFlags);
3149 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #5 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
3150 return false;
3151 }
3152
3153 TSCDELTA_DBG_CHECK_LOOP();
3154 ASMNopPause();
3155 }
3156
3157 /*
3158 * The worker acks the 'go' (shouldn't fail).
3159 */
3160 if (!fIsMaster)
3161 {
3162 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
3163 { /* likely */ }
3164 else
3165 {
3166 ASMSetFlags(fEFlags);
3167 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #6 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
3168 return false;
3169 }
3170 }
3171
3172 /*
3173 * Try enter mostly lockstep execution with it.
3174 */
3175 for (;;)
3176 {
3177 uint32_t iOtherSeq1, iOtherSeq2;
3178 ASMCompilerBarrier();
3179 ASMSerializeInstruction();
3180
3181 ASMAtomicWriteU32(&pMySync->uSyncSeq, iMySeq);
3182 ASMNopPause();
3183 iOtherSeq1 = ASMAtomicXchgU32(&pOtherSync->uSyncSeq, iMySeq);
3184 ASMNopPause();
3185 iOtherSeq2 = ASMAtomicReadU32(&pMySync->uSyncSeq);
3186
3187 ASMCompilerBarrier();
3188 if (iOtherSeq1 == iOtherSeq2)
3189 return true;
3190
3191 /* Did the other guy give up? Should we give up? */
3192 if ( iOtherSeq1 == UINT32_MAX
3193 || iOtherSeq2 == UINT32_MAX)
3194 return true;
3195 if (++iMySeq >= iMaxSeq)
3196 {
3197 ASMAtomicWriteU32(&pMySync->uSyncSeq, UINT32_MAX);
3198 return true;
3199 }
3200 ASMNopPause();
3201 }
3202}
3203
3204#define TSCDELTA_MASTER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
3205 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
3206 { /*likely*/ } \
3207 else if (true) \
3208 { \
3209 TSCDELTA_DBG_SYNC_MSG9(("sync/before/master: #89\n")); \
3210 break; \
3211 } else do {} while (0)
3212#define TSCDELTA_OTHER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
3213 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
3214 { /*likely*/ } \
3215 else if (true) \
3216 { \
3217 TSCDELTA_DBG_SYNC_MSG9(("sync/before/other: #89\n")); \
3218 break; \
3219 } else do {} while (0)
3220
3221
3222static bool supdrvTscDeltaSync2_After(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3223 bool fIsMaster, RTCCUINTREG fEFlags)
3224{
3225 TSCDELTA_DBG_VARS();
3226 RT_NOREF1(pOtherSync);
3227
3228 /*
3229 * Wait for the 'ready' signal. In the master's case, this means the
3230 * worker has completed its data collection, while in the worker's case it
3231 * means the master is done processing the data and it's time for the next
3232 * loop iteration (or whatever).
3233 */
3234 ASMSetFlags(fEFlags);
3235 TSCDELTA_DBG_START_LOOP();
3236 for (;;)
3237 {
3238 uint32_t u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
3239 if ( u32Tmp == GIP_TSC_DELTA_SYNC2_READY
3240 || (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY && !fIsMaster) /* kicked twice => race */ )
3241 return true;
3242 ASMNopPause();
3243 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_GO))
3244 { /* likely */}
3245 else
3246 {
3247 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #1 u32Tmp=%#x\n", u32Tmp));
3248 return false; /* shouldn't ever happen! */
3249 }
3250 TSCDELTA_DBG_CHECK_LOOP();
3251 ASMNopPause();
3252 }
3253}
3254
3255#define TSCDELTA_MASTER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
3256 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_fEFlags))) \
3257 { /* likely */ } \
3258 else if (true) \
3259 { \
3260 TSCDELTA_DBG_SYNC_MSG9(("sync/after/master: #97\n")); \
3261 break; \
3262 } else do {} while (0)
3263
3264#define TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(a_pMySync, a_pOtherSync) \
3265 /* \
3266 * Tell the worker that we're done processing the data and ready for the next round. \
3267 */ \
3268 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
3269 { /* likely */ } \
3270 else if (true)\
3271 { \
3272 TSCDELTA_DBG_SYNC_MSG(("sync/after/master: #99 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
3273 break; \
3274 } else do {} while (0)
3275
3276#define TSCDELTA_OTHER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
3277 if (true) { \
3278 /* \
3279 * Tell the master that we're done collecting data and wait for the next round to start. \
3280 */ \
3281 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
3282 { /* likely */ } \
3283 else \
3284 { \
3285 ASMSetFlags(a_fEFlags); \
3286 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #0 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
3287 break; \
3288 } \
3289 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_fEFlags))) \
3290 { /* likely */ } \
3291 else \
3292 { \
3293 TSCDELTA_DBG_SYNC_MSG9(("sync/after/other: #98\n")); \
3294 break; \
3295 } \
3296 } else do {} while (0)
3297/** @} */
3298
3299
3300#ifdef GIP_TSC_DELTA_METHOD_1
3301/**
3302 * TSC delta measurement algorithm \#1 (GIP_TSC_DELTA_METHOD_1).
3303 *
3304 *
3305 * We ignore the first few runs of the loop in order to prime the
3306 * cache. Also, we need to be careful about using 'pause' instruction
3307 * in critical busy-wait loops in this code - it can cause undesired
3308 * behaviour with hyperthreading.
3309 *
3310 * We try to minimize the measurement error by computing the minimum
3311 * read time of the compare statement in the worker by taking TSC
3312 * measurements across it.
3313 *
3314 * It must be noted that the computed minimum read time is mostly to
3315 * eliminate huge deltas when the worker is too early and doesn't by
3316 * itself help produce more accurate deltas. We allow two times the
3317 * computed minimum as an arbitrary acceptable threshold. Therefore,
3318 * it is still possible to get negative deltas where there are none
3319 * when the worker is earlier. As long as these occasional negative
3320 * deltas are lower than the time it takes to exit guest-context and
3321 * the OS to reschedule EMT on a different CPU, we won't expose a TSC
3322 * that jumped backwards. It is due to the existence of the negative
3323 * deltas that we don't recompute the delta with the master and
3324 * worker interchanged to eliminate the remaining measurement error.
3325 *
3326 *
3327 * @param pArgs The argument/state data.
3328 * @param pMySync My synchronization structure.
3329 * @param pOtherSync My partner's synchronization structure.
3330 * @param fIsMaster Set if master, clear if worker.
3331 * @param iTry The attempt number.
3332 */
3333static void supdrvTscDeltaMethod1Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3334 bool fIsMaster, uint32_t iTry)
3335{
3336 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3337 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3338 uint64_t uMinCmpReadTime = UINT64_MAX;
3339 unsigned iLoop;
3340 NOREF(iTry);
3341
3342 for (iLoop = 0; iLoop < GIP_TSC_DELTA_LOOPS; iLoop++)
3343 {
3344 RTCCUINTREG fEFlags;
3345 if (fIsMaster)
3346 {
3347 /*
3348 * The master.
3349 */
3350 AssertMsg(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD,
3351 ("%#llx idMaster=%#x idWorker=%#x (idGipMaster=%#x)\n",
3352 pGipCpuMaster->u64TSCSample, pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, pArgs->pDevExt->idGipMaster));
3353 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3354
3355 do
3356 {
3357 ASMSerializeInstruction();
3358 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
3359 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3360
3361 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3362
3363 /* Process the data. */
3364 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3365 {
3366 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
3367 {
3368 int64_t iDelta = pGipCpuWorker->u64TSCSample
3369 - (pGipCpuMaster->u64TSCSample - pGipCpuMaster->i64TSCDelta);
3370 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3371 ? iDelta < pGipCpuWorker->i64TSCDelta
3372 : iDelta > pGipCpuWorker->i64TSCDelta || pGipCpuWorker->i64TSCDelta == INT64_MAX)
3373 pGipCpuWorker->i64TSCDelta = iDelta;
3374 }
3375 }
3376
3377 /* Reset our TSC sample and tell the worker to move on. */
3378 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
3379 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3380 }
3381 else
3382 {
3383 /*
3384 * The worker.
3385 */
3386 uint64_t uTscWorker;
3387 uint64_t uTscWorkerFlushed;
3388 uint64_t uCmpReadTime;
3389
3390 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
3391 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3392
3393 /*
3394 * Keep reading the TSC until we notice that the master has read his. Reading
3395 * the TSC -after- the master has updated the memory is way too late. We thus
3396 * compensate by trying to measure how long it took for the worker to notice
3397 * the memory flushed from the master.
3398 */
3399 do
3400 {
3401 ASMSerializeInstruction();
3402 uTscWorker = ASMReadTSC();
3403 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3404 ASMSerializeInstruction();
3405 uTscWorkerFlushed = ASMReadTSC();
3406
3407 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
3408 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3409 {
3410 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
3411 if (uCmpReadTime < (uMinCmpReadTime << 1))
3412 {
3413 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
3414 if (uCmpReadTime < uMinCmpReadTime)
3415 uMinCmpReadTime = uCmpReadTime;
3416 }
3417 else
3418 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3419 }
3420 else if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS)
3421 {
3422 if (uCmpReadTime < uMinCmpReadTime)
3423 uMinCmpReadTime = uCmpReadTime;
3424 }
3425
3426 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3427 }
3428 }
3429
3430 TSCDELTA_DBG_SYNC_MSG9(("sync/method1loop/%s: #92 iLoop=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iLoop,
3431 pMySync->uSyncVar));
3432
3433 /*
3434 * We must reset the worker TSC sample value in case it gets picked as a
3435 * GIP master later on (it's trashed above, naturally).
3436 */
3437 if (!fIsMaster)
3438 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3439}
3440#endif /* GIP_TSC_DELTA_METHOD_1 */
3441
3442
3443#ifdef GIP_TSC_DELTA_METHOD_2
3444/*
3445 * TSC delta measurement algorithm \#2 configuration and code - Experimental!!
3446 */
3447
3448# define GIP_TSC_DELTA_M2_LOOPS (7 + GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3449# define GIP_TSC_DELTA_M2_PRIMER_LOOPS 0
3450
3451
3452static void supdrvTscDeltaMethod2ProcessDataOnMaster(PSUPDRVGIPTSCDELTARGS pArgs)
3453{
3454 int64_t iMasterTscDelta = pArgs->pMaster->i64TSCDelta;
3455 int64_t iBestDelta = pArgs->pWorker->i64TSCDelta;
3456 uint32_t idxResult;
3457 uint32_t cHits = 0;
3458
3459 /*
3460 * Look for matching entries in the master and worker tables.
3461 */
3462 for (idxResult = 0; idxResult < RT_ELEMENTS(pArgs->uMaster.M2.Data.aResults); idxResult++)
3463 {
3464 uint32_t idxOther = pArgs->uMaster.M2.Data.aResults[idxResult].iSeqOther;
3465 if (idxOther & 1)
3466 {
3467 idxOther >>= 1;
3468 if (idxOther < RT_ELEMENTS(pArgs->uWorker.M2.Data.aResults))
3469 {
3470 if (pArgs->uWorker.M2.Data.aResults[idxOther].iSeqOther == pArgs->uMaster.M2.Data.aResults[idxResult].iSeqMine)
3471 {
3472 int64_t iDelta;
3473 iDelta = pArgs->uWorker.M2.Data.aResults[idxOther].uTsc
3474 - (pArgs->uMaster.M2.Data.aResults[idxResult].uTsc - iMasterTscDelta);
3475 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3476 ? iDelta < iBestDelta
3477 : iDelta > iBestDelta || iBestDelta == INT64_MAX)
3478 iBestDelta = iDelta;
3479 cHits++;
3480 }
3481 }
3482 }
3483 }
3484
3485 /*
3486 * Save the results.
3487 */
3488 if (cHits > 2)
3489 pArgs->pWorker->i64TSCDelta = iBestDelta;
3490 pArgs->uMaster.M2.cHits += cHits;
3491}
3492
3493
3494/**
3495 * The core function of the 2nd TSC delta measurement algorithm.
3496 *
3497 * The idea here is that we have the two CPUs execute the exact same code
3498 * collecting a largish set of TSC samples. The code has one data dependency on
3499 * the other CPU which intention it is to synchronize the execution as well as
3500 * help cross references the two sets of TSC samples (the sequence numbers).
3501 *
3502 * The @a fLag parameter is used to modify the execution a tiny bit on one or
3503 * both of the CPUs. When @a fLag differs between the CPUs, it is thought that
3504 * it will help with making the CPUs enter lock step execution occasionally.
3505 *
3506 */
3507static void supdrvTscDeltaMethod2CollectData(PSUPDRVTSCDELTAMETHOD2 pMyData, uint32_t volatile *piOtherSeqNo, bool fLag)
3508{
3509 SUPDRVTSCDELTAMETHOD2ENTRY *pEntry = &pMyData->aResults[0];
3510 uint32_t cLeft = RT_ELEMENTS(pMyData->aResults);
3511
3512 ASMAtomicWriteU32(&pMyData->iCurSeqNo, 0);
3513 ASMSerializeInstruction();
3514 while (cLeft-- > 0)
3515 {
3516 uint64_t uTsc;
3517 uint32_t iSeqMine = ASMAtomicIncU32(&pMyData->iCurSeqNo);
3518 uint32_t iSeqOther = ASMAtomicReadU32(piOtherSeqNo);
3519 ASMCompilerBarrier();
3520 ASMSerializeInstruction(); /* Way better result than with ASMMemoryFenceSSE2() in this position! */
3521 uTsc = ASMReadTSC();
3522 ASMAtomicIncU32(&pMyData->iCurSeqNo);
3523 ASMCompilerBarrier();
3524 ASMSerializeInstruction();
3525 pEntry->iSeqMine = iSeqMine;
3526 pEntry->iSeqOther = iSeqOther;
3527 pEntry->uTsc = uTsc;
3528 pEntry++;
3529 ASMSerializeInstruction();
3530 if (fLag)
3531 ASMNopPause();
3532 }
3533}
3534
3535
3536/**
3537 * TSC delta measurement algorithm \#2 (GIP_TSC_DELTA_METHOD_2).
3538 *
3539 * See supdrvTscDeltaMethod2CollectData for algorithm details.
3540 *
3541 * @param pArgs The argument/state data.
3542 * @param pMySync My synchronization structure.
3543 * @param pOtherSync My partner's synchronization structure.
3544 * @param fIsMaster Set if master, clear if worker.
3545 * @param iTry The attempt number.
3546 */
3547static void supdrvTscDeltaMethod2Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3548 bool fIsMaster, uint32_t iTry)
3549{
3550 unsigned iLoop;
3551 RT_NOREF1(iTry);
3552
3553 for (iLoop = 0; iLoop < GIP_TSC_DELTA_M2_LOOPS; iLoop++)
3554 {
3555 RTCCUINTREG fEFlags;
3556 if (fIsMaster)
3557 {
3558 /*
3559 * Adjust the loop lag fudge.
3560 */
3561# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3562 if (iLoop < GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3563 {
3564 /* Lag during the priming to be nice to everyone.. */
3565 pArgs->uMaster.M2.fLag = true;
3566 pArgs->uWorker.M2.fLag = true;
3567 }
3568 else
3569# endif
3570 if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4)
3571 {
3572 /* 25 % of the body without lagging. */
3573 pArgs->uMaster.M2.fLag = false;
3574 pArgs->uWorker.M2.fLag = false;
3575 }
3576 else if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4 * 2)
3577 {
3578 /* 25 % of the body with both lagging. */
3579 pArgs->uMaster.M2.fLag = true;
3580 pArgs->uWorker.M2.fLag = true;
3581 }
3582 else
3583 {
3584 /* 50% of the body with alternating lag. */
3585 pArgs->uMaster.M2.fLag = (iLoop & 1) == 0;
3586 pArgs->uWorker.M2.fLag= (iLoop & 1) == 1;
3587 }
3588
3589 /*
3590 * Sync up with the worker and collect data.
3591 */
3592 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3593 supdrvTscDeltaMethod2CollectData(&pArgs->uMaster.M2.Data, &pArgs->uWorker.M2.Data.iCurSeqNo, pArgs->uMaster.M2.fLag);
3594 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3595
3596 /*
3597 * Process the data.
3598 */
3599# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3600 if (iLoop >= GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3601# endif
3602 supdrvTscDeltaMethod2ProcessDataOnMaster(pArgs);
3603
3604 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3605 }
3606 else
3607 {
3608 /*
3609 * The worker.
3610 */
3611 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3612 supdrvTscDeltaMethod2CollectData(&pArgs->uWorker.M2.Data, &pArgs->uMaster.M2.Data.iCurSeqNo, pArgs->uWorker.M2.fLag);
3613 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3614 }
3615 }
3616}
3617
3618#endif /* GIP_TSC_DELTA_METHOD_2 */
3619
3620
3621
3622static int supdrvTscDeltaVerify(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync,
3623 PSUPTSCDELTASYNC2 pOtherSync, bool fIsMaster, int64_t iWorkerTscDelta)
3624{
3625 /*PSUPGIPCPU pGipCpuWorker = pArgs->pWorker; - unused */
3626 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3627 uint32_t i;
3628 TSCDELTA_DBG_VARS();
3629
3630 for (;;)
3631 {
3632 RTCCUINTREG fEFlags;
3633 AssertCompile((RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) & 1) == 0);
3634 AssertCompile(RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) == RT_ELEMENTS(pArgs->uWorker.Verify.auTscs));
3635
3636 if (fIsMaster)
3637 {
3638 uint64_t uTscWorker;
3639 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3640
3641 /*
3642 * Collect TSC, master goes first.
3643 */
3644 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i += 2)
3645 {
3646 /* Read, kick & wait #1. */
3647 uint64_t uTsc = ASMReadTSC();
3648 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3649 ASMSerializeInstruction();
3650 pArgs->uMaster.Verify.auTscs[i] = uTsc;
3651 TSCDELTA_DBG_START_LOOP();
3652 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3653 {
3654 TSCDELTA_DBG_CHECK_LOOP();
3655 ASMNopPause();
3656 }
3657
3658 /* Read, kick & wait #2. */
3659 uTsc = ASMReadTSC();
3660 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3661 ASMSerializeInstruction();
3662 pArgs->uMaster.Verify.auTscs[i + 1] = uTsc;
3663 TSCDELTA_DBG_START_LOOP();
3664 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3665 {
3666 TSCDELTA_DBG_CHECK_LOOP();
3667 ASMNopPause();
3668 }
3669 }
3670
3671 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3672
3673 /*
3674 * Process the data.
3675 */
3676#ifdef TSCDELTA_VERIFY_WITH_STATS
3677 pArgs->cMaxVerifyTscTicks = INT64_MIN;
3678 pArgs->cMinVerifyTscTicks = INT64_MAX;
3679 pArgs->iVerifyBadTscDiff = 0;
3680#endif
3681 ASMAtomicWriteS32(&pArgs->rcVerify, VINF_SUCCESS);
3682 uTscWorker = 0;
3683 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i++)
3684 {
3685 /* Master vs previous worker entry. */
3686 uint64_t uTscMaster = pArgs->uMaster.Verify.auTscs[i] - pGipCpuMaster->i64TSCDelta;
3687 int64_t iDiff;
3688 if (i > 0)
3689 {
3690 iDiff = uTscMaster - uTscWorker;
3691#ifdef TSCDELTA_VERIFY_WITH_STATS
3692 if (iDiff > pArgs->cMaxVerifyTscTicks)
3693 pArgs->cMaxVerifyTscTicks = iDiff;
3694 if (iDiff < pArgs->cMinVerifyTscTicks)
3695 pArgs->cMinVerifyTscTicks = iDiff;
3696#endif
3697 if (iDiff < 0)
3698 {
3699#ifdef TSCDELTA_VERIFY_WITH_STATS
3700 pArgs->iVerifyBadTscDiff = -iDiff;
3701#endif
3702 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3703 break;
3704 }
3705 }
3706
3707 /* Worker vs master. */
3708 uTscWorker = pArgs->uWorker.Verify.auTscs[i] - iWorkerTscDelta;
3709 iDiff = uTscWorker - uTscMaster;
3710#ifdef TSCDELTA_VERIFY_WITH_STATS
3711 if (iDiff > pArgs->cMaxVerifyTscTicks)
3712 pArgs->cMaxVerifyTscTicks = iDiff;
3713 if (iDiff < pArgs->cMinVerifyTscTicks)
3714 pArgs->cMinVerifyTscTicks = iDiff;
3715#endif
3716 if (iDiff < 0)
3717 {
3718#ifdef TSCDELTA_VERIFY_WITH_STATS
3719 pArgs->iVerifyBadTscDiff = iDiff;
3720#endif
3721 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3722 break;
3723 }
3724 }
3725
3726 /* Done. */
3727 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3728 }
3729 else
3730 {
3731 /*
3732 * The worker, master leads.
3733 */
3734 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3735
3736 for (i = 0; i < RT_ELEMENTS(pArgs->uWorker.Verify.auTscs); i += 2)
3737 {
3738 uint64_t uTsc;
3739
3740 /* Wait, Read and Kick #1. */
3741 TSCDELTA_DBG_START_LOOP();
3742 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3743 {
3744 TSCDELTA_DBG_CHECK_LOOP();
3745 ASMNopPause();
3746 }
3747 uTsc = ASMReadTSC();
3748 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3749 ASMSerializeInstruction();
3750 pArgs->uWorker.Verify.auTscs[i] = uTsc;
3751
3752 /* Wait, Read and Kick #2. */
3753 TSCDELTA_DBG_START_LOOP();
3754 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3755 {
3756 TSCDELTA_DBG_CHECK_LOOP();
3757 ASMNopPause();
3758 }
3759 uTsc = ASMReadTSC();
3760 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3761 ASMSerializeInstruction();
3762 pArgs->uWorker.Verify.auTscs[i + 1] = uTsc;
3763 }
3764
3765 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3766 }
3767 return pArgs->rcVerify;
3768 }
3769
3770 /*
3771 * Timed out, please retry.
3772 */
3773 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_TRY_AGAIN);
3774 return VERR_TIMEOUT;
3775}
3776
3777
3778
3779/**
3780 * Handles the special abort procedure during synchronization setup in
3781 * supdrvTscMeasureDeltaCallbackUnwrapped().
3782 *
3783 * @returns 0 (dummy, ignored)
3784 * @param pArgs Pointer to argument/state data.
3785 * @param pMySync Pointer to my sync structure.
3786 * @param fIsMaster Set if we're the master, clear if worker.
3787 * @param fTimeout Set if it's a timeout.
3788 */
3789DECL_NO_INLINE(static, int)
3790supdrvTscMeasureDeltaCallbackAbortSyncSetup(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, bool fIsMaster, bool fTimeout)
3791{
3792 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3793 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3794 TSCDELTA_DBG_VARS();
3795 RT_NOREF1(pMySync);
3796
3797 /*
3798 * Clear our sync pointer and make sure the abort flag is set.
3799 */
3800 ASMAtomicWriteNullPtr(ppMySync);
3801 ASMAtomicWriteBool(&pArgs->fAbortSetup, true);
3802 if (fTimeout)
3803 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
3804
3805 /*
3806 * Make sure the other party is out of there and won't be touching our
3807 * sync state again (would cause stack corruption).
3808 */
3809 TSCDELTA_DBG_START_LOOP();
3810 while (ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2) != NULL)
3811 {
3812 ASMNopPause();
3813 ASMNopPause();
3814 ASMNopPause();
3815 TSCDELTA_DBG_CHECK_LOOP();
3816 }
3817
3818 return 0;
3819}
3820
3821
3822/**
3823 * This is used by supdrvTscMeasureInitialDeltas() to read the TSC on two CPUs
3824 * and compute the delta between them.
3825 *
3826 * To reduce code size a good when timeout handling was added, a dummy return
3827 * value had to be added (saves 1-3 lines per timeout case), thus this
3828 * 'Unwrapped' function and the dummy 0 return value.
3829 *
3830 * @returns 0 (dummy, ignored)
3831 * @param idCpu The CPU we are current scheduled on.
3832 * @param pArgs Pointer to a parameter package.
3833 *
3834 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
3835 * read the TSC at exactly the same time on both the master and the
3836 * worker CPUs. Due to DMA, bus arbitration, cache locality,
3837 * contention, SMI, pipelining etc. there is no guaranteed way of
3838 * doing this on x86 CPUs.
3839 */
3840static int supdrvTscMeasureDeltaCallbackUnwrapped(RTCPUID idCpu, PSUPDRVGIPTSCDELTARGS pArgs)
3841{
3842 PSUPDRVDEVEXT pDevExt = pArgs->pDevExt;
3843 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3844 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3845 bool const fIsMaster = idCpu == pGipCpuMaster->idCpu;
3846 uint32_t iTry;
3847 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3848 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3849 SUPTSCDELTASYNC2 MySync;
3850 PSUPTSCDELTASYNC2 pOtherSync;
3851 int rc;
3852 TSCDELTA_DBG_VARS();
3853
3854 /* A bit of paranoia first. */
3855 if (!pGipCpuMaster || !pGipCpuWorker)
3856 return 0;
3857
3858 /*
3859 * If the CPU isn't part of the measurement, return immediately.
3860 */
3861 if ( !fIsMaster
3862 && idCpu != pGipCpuWorker->idCpu)
3863 return 0;
3864
3865 /*
3866 * Set up my synchronization stuff and wait for the other party to show up.
3867 *
3868 * We don't wait forever since the other party may be off fishing (offline,
3869 * spinning with ints disables, whatever), we must play nice to the rest of
3870 * the system as this context generally isn't one in which we will get
3871 * preempted and we may hold up a number of lower priority interrupts.
3872 */
3873 ASMAtomicWriteU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT);
3874 ASMAtomicWritePtr(ppMySync, &MySync);
3875 MySync.uTscStart = ASMReadTSC();
3876 MySync.cMaxTscTicks = pArgs->cMaxTscTicks;
3877
3878 /* Look for the partner, might not be here yet... Special abort considerations. */
3879 iTry = 0;
3880 TSCDELTA_DBG_START_LOOP();
3881 while ((pOtherSync = ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2)) == NULL)
3882 {
3883 ASMNopPause();
3884 if ( ASMAtomicReadBool(&pArgs->fAbortSetup)
3885 || !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuMaster->idCpu) )
3886 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3887 if ( (iTry++ & 0xff) == 0
3888 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3889 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3890 TSCDELTA_DBG_CHECK_LOOP();
3891 ASMNopPause();
3892 }
3893
3894 /* I found my partner, waiting to be found... Special abort considerations. */
3895 if (fIsMaster)
3896 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* parnaoia */
3897 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3898
3899 iTry = 0;
3900 TSCDELTA_DBG_START_LOOP();
3901 while (ASMAtomicReadU32(&MySync.uSyncVar) == GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)
3902 {
3903 ASMNopPause();
3904 if (ASMAtomicReadBool(&pArgs->fAbortSetup))
3905 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3906 if ( (iTry++ & 0xff) == 0
3907 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3908 {
3909 if ( fIsMaster
3910 && !ASMAtomicCmpXchgU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_ABORT, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT))
3911 break; /* race #1: slave has moved on, handle timeout in loop instead. */
3912 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3913 }
3914 TSCDELTA_DBG_CHECK_LOOP();
3915 }
3916
3917 if (!fIsMaster)
3918 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* race #1 */
3919 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3920
3921/** @todo Add a resumable state to pArgs so we don't waste time if we time
3922 * out or something. Timeouts are legit, any of the two CPUs may get
3923 * interrupted. */
3924
3925 /*
3926 * Start by seeing if we have a zero delta between the two CPUs.
3927 * This should normally be the case.
3928 */
3929 rc = supdrvTscDeltaVerify(pArgs, &MySync, pOtherSync, fIsMaster, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3930 if (RT_SUCCESS(rc))
3931 {
3932 if (fIsMaster)
3933 {
3934 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3935 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3936 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3937 }
3938 }
3939 /*
3940 * If the verification didn't time out, do regular delta measurements.
3941 * We retry this until we get a reasonable value.
3942 */
3943 else if (rc != VERR_TIMEOUT)
3944 {
3945 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
3946 for (iTry = 0; iTry < 12; iTry++)
3947 {
3948 /*
3949 * Check the state before we start.
3950 */
3951 uint32_t u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3952 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3953 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3954 {
3955 TSCDELTA_DBG_SYNC_MSG(("sync/loop/%s: #0 iTry=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iTry, u32Tmp));
3956 break;
3957 }
3958
3959 /*
3960 * Do the measurements.
3961 */
3962#ifdef GIP_TSC_DELTA_METHOD_1
3963 supdrvTscDeltaMethod1Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3964#elif defined(GIP_TSC_DELTA_METHOD_2)
3965 supdrvTscDeltaMethod2Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3966#else
3967# error "huh??"
3968#endif
3969
3970 /*
3971 * Check the state.
3972 */
3973 u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3974 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3975 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3976 {
3977 if (fIsMaster)
3978 TSCDELTA_DBG_SYNC_MSG(("sync/loop/master: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3979 else
3980 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/worker: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3981 break;
3982 }
3983
3984 /*
3985 * Success? If so, stop trying. Master decides.
3986 */
3987 if (fIsMaster)
3988 {
3989 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
3990 {
3991 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3992 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3993 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/master: #9 iTry=%u MyState=%#x\n", iTry, MySync.uSyncVar));
3994 break;
3995 }
3996 }
3997 }
3998 if (fIsMaster)
3999 pArgs->iTry = iTry;
4000 }
4001
4002 /*
4003 * End the synchronization dance. We tell the other that we're done,
4004 * then wait for the same kind of reply.
4005 */
4006 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_FINAL);
4007 ASMAtomicWriteNullPtr(ppMySync);
4008 iTry = 0;
4009 TSCDELTA_DBG_START_LOOP();
4010 while (ASMAtomicReadU32(&MySync.uSyncVar) != GIP_TSC_DELTA_SYNC2_FINAL)
4011 {
4012 iTry++;
4013 if ( iTry == 0
4014 && !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuMaster->idCpu))
4015 break; /* this really shouldn't happen. */
4016 TSCDELTA_DBG_CHECK_LOOP();
4017 ASMNopPause();
4018 }
4019
4020 /*
4021 * Collect some runtime stats.
4022 */
4023 if (fIsMaster)
4024 pArgs->cElapsedMasterTscTicks = ASMReadTSC() - MySync.uTscStart;
4025 else
4026 pArgs->cElapsedWorkerTscTicks = ASMReadTSC() - MySync.uTscStart;
4027 return 0;
4028}
4029
4030/**
4031 * Callback used by supdrvTscMeasureInitialDeltas() to read the TSC on two CPUs
4032 * and compute the delta between them.
4033 *
4034 * @param idCpu The CPU we are current scheduled on.
4035 * @param pvUser1 Pointer to a parameter package (SUPDRVGIPTSCDELTARGS).
4036 * @param pvUser2 Unused.
4037 */
4038static DECLCALLBACK(void) supdrvTscMeasureDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
4039{
4040 supdrvTscMeasureDeltaCallbackUnwrapped(idCpu, (PSUPDRVGIPTSCDELTARGS)pvUser1);
4041 RT_NOREF1(pvUser2);
4042}
4043
4044
4045/**
4046 * Measures the TSC delta between the master GIP CPU and one specified worker
4047 * CPU.
4048 *
4049 * @returns VBox status code.
4050 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED on pure measurement
4051 * failure.
4052 * @param pDevExt Pointer to the device instance data.
4053 * @param idxWorker The index of the worker CPU from the GIP's array of
4054 * CPUs.
4055 *
4056 * @remarks This must be called with preemption enabled!
4057 */
4058static int supdrvTscMeasureDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
4059{
4060 int rc;
4061 int rc2;
4062 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4063 RTCPUID idMaster = pDevExt->idGipMaster;
4064 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
4065 PSUPGIPCPU pGipCpuMaster;
4066 uint32_t iGipCpuMaster;
4067#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4068 uint32_t u32Tmp;
4069#endif
4070
4071 /* Validate input a bit. */
4072 AssertReturn(pGip, VERR_INVALID_PARAMETER);
4073 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4074 Assert(RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4075
4076 /*
4077 * Don't attempt measuring the delta for the GIP master.
4078 */
4079 if (pGipCpuWorker->idCpu == idMaster)
4080 {
4081 if (pGipCpuWorker->i64TSCDelta == INT64_MAX) /* This shouldn't happen, but just in case. */
4082 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
4083 return VINF_SUCCESS;
4084 }
4085
4086 /*
4087 * One measurement at a time, at least for now. We might be using
4088 * broadcast IPIs so, so be nice to the rest of the system.
4089 */
4090#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4091 rc = RTSemMutexRequest(pDevExt->mtxTscDelta, RT_INDEFINITE_WAIT);
4092#else
4093 rc = RTSemFastMutexRequest(pDevExt->mtxTscDelta);
4094#endif
4095 if (RT_FAILURE(rc))
4096 return rc;
4097
4098 /*
4099 * If the CPU has hyper-threading and the APIC IDs of the master and worker are adjacent,
4100 * try pick a different master. (This fudge only works with multi core systems.)
4101 * ASSUMES related threads have adjacent APIC IDs. ASSUMES two threads per core.
4102 *
4103 * We skip this on AMDs for now as their HTT is different from Intel's and
4104 * it doesn't seem to have any favorable effect on the results.
4105 *
4106 * If the master is offline, we need a new master too, so share the code.
4107 */
4108 iGipCpuMaster = supdrvGipFindCpuIndexForCpuId(pGip, idMaster);
4109 AssertReturn(iGipCpuMaster < pGip->cCpus, VERR_INVALID_CPU_ID);
4110 pGipCpuMaster = &pGip->aCPUs[iGipCpuMaster];
4111#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4112 if ( ( (pGipCpuMaster->idApic & ~1) == (pGipCpuWorker->idApic & ~1)
4113 && pGip->cOnlineCpus > 2
4114 && ASMHasCpuId()
4115 && RTX86IsValidStdRange(ASMCpuId_EAX(0))
4116 && (ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_HTT)
4117 && ( !ASMIsAmdCpu()
4118 || RTX86GetCpuFamily(u32Tmp = ASMCpuId_EAX(1)) > 0x15
4119 || ( RTX86GetCpuFamily(u32Tmp) == 0x15 /* Piledriver+, not bulldozer (FX-4150 didn't like it). */
4120 && RTX86GetCpuModelAMD(u32Tmp) >= 0x02) ) )
4121 || !RTMpIsCpuOnline(idMaster) )
4122 {
4123 uint32_t i;
4124 for (i = 0; i < pGip->cCpus; i++)
4125 if ( i != iGipCpuMaster
4126 && i != idxWorker
4127 && pGip->aCPUs[i].enmState == SUPGIPCPUSTATE_ONLINE
4128 && pGip->aCPUs[i].i64TSCDelta != INT64_MAX
4129 && pGip->aCPUs[i].idCpu != NIL_RTCPUID
4130 && pGip->aCPUs[i].idCpu != idMaster /* paranoia starts here... */
4131 && pGip->aCPUs[i].idCpu != pGipCpuWorker->idCpu
4132 && pGip->aCPUs[i].idApic != pGipCpuWorker->idApic
4133 && pGip->aCPUs[i].idApic != pGipCpuMaster->idApic
4134 && RTMpIsCpuOnline(pGip->aCPUs[i].idCpu))
4135 {
4136 iGipCpuMaster = i;
4137 pGipCpuMaster = &pGip->aCPUs[i];
4138 idMaster = pGipCpuMaster->idCpu;
4139 break;
4140 }
4141 }
4142#endif /* defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) */
4143
4144 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpuWorker->iCpuSet))
4145 {
4146 /*
4147 * Initialize data package for the RTMpOnPair callback.
4148 */
4149 PSUPDRVGIPTSCDELTARGS pArgs = (PSUPDRVGIPTSCDELTARGS)RTMemAllocZ(sizeof(*pArgs));
4150 if (pArgs)
4151 {
4152 pArgs->pWorker = pGipCpuWorker;
4153 pArgs->pMaster = pGipCpuMaster;
4154 pArgs->pDevExt = pDevExt;
4155 pArgs->pSyncMaster = NULL;
4156 pArgs->pSyncWorker = NULL;
4157 pArgs->cMaxTscTicks = ASMAtomicReadU64(&pGip->u64CpuHz) / 512; /* 1953 us */
4158
4159 /*
4160 * Do the RTMpOnPair call. We reset i64TSCDelta first so we
4161 * and supdrvTscMeasureDeltaCallback can use it as a success check.
4162 */
4163 /** @todo Store the i64TSCDelta result in pArgs first? Perhaps deals with
4164 * that when doing the restart loop reorg. */
4165 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
4166 rc = RTMpOnPair(pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, RTMPON_F_CONCURRENT_EXEC,
4167 supdrvTscMeasureDeltaCallback, pArgs, NULL);
4168 if (RT_SUCCESS(rc))
4169 {
4170#if 0
4171 SUPR0Printf("mponpair ticks: %9llu %9llu max: %9llu iTry: %u%s\n", pArgs->cElapsedMasterTscTicks,
4172 pArgs->cElapsedWorkerTscTicks, pArgs->cMaxTscTicks, pArgs->iTry,
4173 pArgs->fTimedOut ? " timed out" :"");
4174#endif
4175#if 0
4176 SUPR0Printf("rcVerify=%d iVerifyBadTscDiff=%lld cMinVerifyTscTicks=%lld cMaxVerifyTscTicks=%lld\n",
4177 pArgs->rcVerify, pArgs->iVerifyBadTscDiff, pArgs->cMinVerifyTscTicks, pArgs->cMaxVerifyTscTicks);
4178#endif
4179 if (RT_LIKELY(pGipCpuWorker->i64TSCDelta != INT64_MAX))
4180 {
4181 /*
4182 * Work the TSC delta applicability rating. It starts
4183 * optimistic in supdrvGipInit, we downgrade it here.
4184 */
4185 SUPGIPUSETSCDELTA enmRating;
4186 if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO
4187 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO)
4188 enmRating = SUPGIPUSETSCDELTA_NOT_ZERO;
4189 else if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO
4190 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO)
4191 enmRating = SUPGIPUSETSCDELTA_ROUGHLY_ZERO;
4192 else
4193 enmRating = SUPGIPUSETSCDELTA_PRACTICALLY_ZERO;
4194 if (pGip->enmUseTscDelta < enmRating)
4195 {
4196 AssertCompile(sizeof(pGip->enmUseTscDelta) == sizeof(uint32_t));
4197 ASMAtomicWriteU32((uint32_t volatile *)&pGip->enmUseTscDelta, enmRating);
4198 }
4199 }
4200 else
4201 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
4202 }
4203 /** @todo return try-again if we get an offline CPU error. */
4204
4205 RTMemFree(pArgs);
4206 }
4207 else
4208 rc = VERR_NO_MEMORY;
4209 }
4210 else
4211 rc = VERR_CPU_OFFLINE;
4212
4213 /*
4214 * We're done now.
4215 */
4216#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4217 rc2 = RTSemMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
4218#else
4219 rc2 = RTSemFastMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
4220#endif
4221 return rc;
4222}
4223
4224
4225/**
4226 * Resets the TSC-delta related TSC samples and optionally the deltas
4227 * themselves.
4228 *
4229 * @param pDevExt Pointer to the device instance data.
4230 * @param fResetTscDeltas Whether the TSC-deltas are also to be reset.
4231 *
4232 * @remarks This might be called while holding a spinlock!
4233 */
4234static void supdrvTscResetSamples(PSUPDRVDEVEXT pDevExt, bool fResetTscDeltas)
4235{
4236 unsigned iCpu;
4237 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4238 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4239 {
4240 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
4241 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
4242 if (fResetTscDeltas)
4243 {
4244 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpu->iCpuSet);
4245 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
4246 }
4247 }
4248}
4249
4250
4251/**
4252 * Picks an online CPU as the master TSC for TSC-delta computations.
4253 *
4254 * @returns VBox status code.
4255 * @param pDevExt Pointer to the device instance data.
4256 * @param pidxMaster Where to store the CPU array index of the chosen
4257 * master. Optional, can be NULL.
4258 */
4259static int supdrvTscPickMaster(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
4260{
4261 /*
4262 * Pick the first CPU online as the master TSC and make it the new GIP master based
4263 * on the APIC ID.
4264 *
4265 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
4266 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
4267 * master as this point since the sync/async timer isn't created yet.
4268 */
4269 unsigned iCpu;
4270 uint32_t idxMaster = UINT32_MAX;
4271 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4272 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
4273 {
4274 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
4275 if (idxCpu != UINT16_MAX)
4276 {
4277 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
4278 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpu->iCpuSet))
4279 {
4280 idxMaster = idxCpu;
4281 pGipCpu->i64TSCDelta = GIP_TSC_DELTA_INITIAL_MASTER_VALUE;
4282 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpu->idCpu);
4283 if (pidxMaster)
4284 *pidxMaster = idxMaster;
4285 return VINF_SUCCESS;
4286 }
4287 }
4288 }
4289 return VERR_CPU_OFFLINE;
4290}
4291
4292
4293/**
4294 * Performs the initial measurements of the TSC deltas between CPUs.
4295 *
4296 * This is called by supdrvGipCreate(), supdrvGipPowerNotificationCallback() or
4297 * triggered by it if threaded.
4298 *
4299 * @returns VBox status code.
4300 * @param pDevExt Pointer to the device instance data.
4301 *
4302 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
4303 * idCpu, GIP's online CPU set which are populated in
4304 * supdrvGipInitOnCpu().
4305 */
4306static int supdrvTscMeasureInitialDeltas(PSUPDRVDEVEXT pDevExt)
4307{
4308 PSUPGIPCPU pGipCpuMaster;
4309 unsigned iCpu;
4310 unsigned iOddEven;
4311 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4312 uint32_t idxMaster = UINT32_MAX;
4313 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
4314
4315 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4316 supdrvTscResetSamples(pDevExt, true /* fClearDeltas */);
4317 int rc = supdrvTscPickMaster(pDevExt, &idxMaster);
4318 if (RT_FAILURE(rc))
4319 {
4320 SUPR0Printf("Failed to pick a CPU master for TSC-delta measurements rc=%Rrc\n", rc);
4321 return rc;
4322 }
4323 AssertReturn(idxMaster < pGip->cCpus, VERR_INVALID_CPU_INDEX);
4324 pGipCpuMaster = &pGip->aCPUs[idxMaster];
4325 Assert(pDevExt->idGipMaster == pGipCpuMaster->idCpu);
4326
4327 /*
4328 * If there is only a single CPU online we have nothing to do.
4329 */
4330 if (pGip->cOnlineCpus <= 1)
4331 {
4332 AssertReturn(pGip->cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
4333 return VINF_SUCCESS;
4334 }
4335
4336 /*
4337 * Loop thru the GIP CPU array and get deltas for each CPU (except the
4338 * master). We do the CPUs with the even numbered APIC IDs first so that
4339 * we've got alternative master CPUs to pick from on hyper-threaded systems.
4340 */
4341 for (iOddEven = 0; iOddEven < 2; iOddEven++)
4342 {
4343 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4344 {
4345 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
4346 if ( iCpu != idxMaster
4347 && (iOddEven > 0 || (pGipCpuWorker->idApic & 1) == 0)
4348 && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4349 {
4350 rc = supdrvTscMeasureDeltaOne(pDevExt, iCpu);
4351 if (RT_FAILURE(rc))
4352 {
4353 SUPR0Printf("supdrvTscMeasureDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
4354 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
4355 break;
4356 }
4357
4358 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
4359 {
4360 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retry...\n");
4361 rc = VERR_TRY_AGAIN;
4362 break;
4363 }
4364 }
4365 }
4366 }
4367
4368 return rc;
4369}
4370
4371
4372#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4373
4374/**
4375 * Switches the TSC-delta measurement thread into the butchered state.
4376 *
4377 * @returns VBox status code.
4378 * @param pDevExt Pointer to the device instance data.
4379 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
4380 * @param pszFailed An error message to log.
4381 * @param rcFailed The error code to exit the thread with.
4382 */
4383static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
4384{
4385 if (!fSpinlockHeld)
4386 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4387
4388 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Butchered;
4389 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4390 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", pszFailed, rcFailed));
4391 return rcFailed;
4392}
4393
4394
4395/**
4396 * The TSC-delta measurement thread.
4397 *
4398 * @returns VBox status code.
4399 * @param hThread The thread handle.
4400 * @param pvUser Opaque pointer to the device instance data.
4401 */
4402static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
4403{
4404 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
4405 int rc = VERR_INTERNAL_ERROR_2;
4406 for (;;)
4407 {
4408 /*
4409 * Switch on the current state.
4410 */
4411 SUPDRVTSCDELTATHREADSTATE enmState;
4412 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4413 enmState = pDevExt->enmTscDeltaThreadState;
4414 switch (enmState)
4415 {
4416 case kTscDeltaThreadState_Creating:
4417 {
4418 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4419 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
4420 if (RT_FAILURE(rc))
4421 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4422 RT_FALL_THRU();
4423 }
4424
4425 case kTscDeltaThreadState_Listening:
4426 {
4427 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4428
4429 /*
4430 * Linux counts uninterruptible sleeps as load, hence we shall do a
4431 * regular, interruptible sleep here and ignore wake ups due to signals.
4432 * See task_contributes_to_load() in include/linux/sched.h in the Linux sources.
4433 */
4434 rc = RTThreadUserWaitNoResume(hThread, pDevExt->cMsTscDeltaTimeout);
4435 if ( RT_FAILURE(rc)
4436 && rc != VERR_TIMEOUT
4437 && rc != VERR_INTERRUPTED)
4438 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
4439 RTThreadUserReset(hThread);
4440 break;
4441 }
4442
4443 case kTscDeltaThreadState_WaitAndMeasure:
4444 {
4445 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Measuring;
4446 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); /* (Safe on windows as long as spinlock isn't IRQ safe.) */
4447 if (RT_FAILURE(rc))
4448 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4449 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4450 RTThreadSleep(1);
4451 RT_FALL_THRU();
4452 }
4453
4454 case kTscDeltaThreadState_Measuring:
4455 {
4456 if (pDevExt->fTscThreadRecomputeAllDeltas)
4457 {
4458 int cTries = 8;
4459 int cMsWaitPerTry = 10;
4460 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4461 Assert(pGip);
4462 do
4463 {
4464 RTCpuSetCopy(&pDevExt->TscDeltaCpuSet, &pGip->OnlineCpuSet);
4465 rc = supdrvTscMeasureInitialDeltas(pDevExt);
4466 if ( RT_SUCCESS(rc)
4467 || ( RT_FAILURE(rc)
4468 && rc != VERR_TRY_AGAIN
4469 && rc != VERR_CPU_OFFLINE))
4470 {
4471 break;
4472 }
4473 RTThreadSleep(cMsWaitPerTry);
4474 } while (cTries-- > 0);
4475 pDevExt->fTscThreadRecomputeAllDeltas = false;
4476 }
4477 else
4478 {
4479 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4480 unsigned iCpu;
4481
4482 /* Measure TSC-deltas only for the CPUs that are in the set. */
4483 rc = VINF_SUCCESS;
4484 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4485 {
4486 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
4487 if (RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4488 {
4489 if (pGipCpuWorker->i64TSCDelta == INT64_MAX)
4490 {
4491 int rc2 = supdrvTscMeasureDeltaOne(pDevExt, iCpu);
4492 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4493 rc = rc2;
4494 }
4495 else
4496 {
4497 /*
4498 * The thread/someone must've called SUPR0TscDeltaMeasureBySetIndex(),
4499 * mark the delta as fine to get the timer thread off our back.
4500 */
4501 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
4502 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
4503 }
4504 }
4505 }
4506 }
4507 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4508 if (pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4509 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4510 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4511 Assert(rc != VERR_NOT_AVAILABLE); /* VERR_NOT_AVAILABLE is used as init value, see supdrvTscDeltaThreadInit(). */
4512 ASMAtomicWriteS32(&pDevExt->rcTscDelta, rc);
4513 break;
4514 }
4515
4516 case kTscDeltaThreadState_Terminating:
4517 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Destroyed;
4518 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4519 return VINF_SUCCESS;
4520
4521 case kTscDeltaThreadState_Butchered:
4522 default:
4523 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
4524 }
4525 }
4526 /* not reached */
4527}
4528
4529
4530/**
4531 * Waits for the TSC-delta measurement thread to respond to a state change.
4532 *
4533 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
4534 * other error code on internal error.
4535 *
4536 * @param pDevExt The device instance data.
4537 * @param enmCurState The current state.
4538 * @param enmNewState The new state we're waiting for it to enter.
4539 */
4540static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTATHREADSTATE enmCurState,
4541 SUPDRVTSCDELTATHREADSTATE enmNewState)
4542{
4543 SUPDRVTSCDELTATHREADSTATE enmActualState;
4544 int rc;
4545
4546 /*
4547 * Wait a short while for the expected state transition.
4548 */
4549 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
4550 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4551 enmActualState = pDevExt->enmTscDeltaThreadState;
4552 if (enmActualState == enmNewState)
4553 {
4554 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4555 rc = VINF_SUCCESS;
4556 }
4557 else if (enmActualState == enmCurState)
4558 {
4559 /*
4560 * Wait longer if the state has not yet transitioned to the one we want.
4561 */
4562 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4563 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
4564 if ( RT_SUCCESS(rc)
4565 || rc == VERR_TIMEOUT)
4566 {
4567 /*
4568 * Check the state whether we've succeeded.
4569 */
4570 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4571 enmActualState = pDevExt->enmTscDeltaThreadState;
4572 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4573 if (enmActualState == enmNewState)
4574 rc = VINF_SUCCESS;
4575 else if (enmActualState == enmCurState)
4576 {
4577 rc = VERR_TIMEOUT;
4578 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmActualState=%d enmNewState=%d\n",
4579 enmActualState, enmNewState));
4580 }
4581 else
4582 {
4583 rc = VERR_INTERNAL_ERROR;
4584 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
4585 enmActualState, enmNewState));
4586 }
4587 }
4588 else
4589 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
4590 }
4591 else
4592 {
4593 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4594 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state %d when transitioning from %d to %d\n",
4595 enmActualState, enmCurState, enmNewState));
4596 rc = VERR_INTERNAL_ERROR;
4597 }
4598
4599 return rc;
4600}
4601
4602
4603/**
4604 * Signals the TSC-delta thread to start measuring TSC-deltas.
4605 *
4606 * @param pDevExt Pointer to the device instance data.
4607 * @param fForceAll Force re-calculating TSC-deltas on all CPUs.
4608 */
4609static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt, bool fForceAll)
4610{
4611 if (pDevExt->hTscDeltaThread != NIL_RTTHREAD)
4612 {
4613 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4614 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4615 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4616 {
4617 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4618 if (fForceAll)
4619 pDevExt->fTscThreadRecomputeAllDeltas = true;
4620 }
4621 else if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_WaitAndMeasure
4622 && fForceAll)
4623 pDevExt->fTscThreadRecomputeAllDeltas = true;
4624 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4625 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4626 }
4627}
4628
4629
4630/**
4631 * Terminates the actual thread running supdrvTscDeltaThread().
4632 *
4633 * This is an internal worker function for supdrvTscDeltaThreadInit() and
4634 * supdrvTscDeltaTerm().
4635 *
4636 * @param pDevExt Pointer to the device instance data.
4637 */
4638static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
4639{
4640 int rc;
4641 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4642 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Terminating;
4643 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4644 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4645 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
4646 if (RT_FAILURE(rc))
4647 {
4648 /* Signal a few more times before giving up. */
4649 int cTriesLeft = 5;
4650 while (--cTriesLeft > 0)
4651 {
4652 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4653 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
4654 if (rc != VERR_TIMEOUT)
4655 break;
4656 }
4657 }
4658}
4659
4660
4661/**
4662 * Initializes and spawns the TSC-delta measurement thread.
4663 *
4664 * A thread is required for servicing re-measurement requests from events like
4665 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
4666 * under all contexts on all OSs.
4667 *
4668 * @returns VBox status code.
4669 * @param pDevExt Pointer to the device instance data.
4670 *
4671 * @remarks Must only be called -after- initializing GIP and setting up MP
4672 * notifications!
4673 */
4674static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
4675{
4676 int rc;
4677 Assert(pDevExt->pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4678 rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
4679 if (RT_SUCCESS(rc))
4680 {
4681 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
4682 if (RT_SUCCESS(rc))
4683 {
4684 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Creating;
4685 pDevExt->cMsTscDeltaTimeout = 60000;
4686 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
4687 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
4688 if (RT_SUCCESS(rc))
4689 {
4690 rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening);
4691 if (RT_SUCCESS(rc))
4692 {
4693 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4694 return rc;
4695 }
4696
4697 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
4698 supdrvTscDeltaThreadTerminate(pDevExt);
4699 }
4700 else
4701 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
4702 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4703 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4704 }
4705 else
4706 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
4707 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4708 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4709 }
4710 else
4711 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
4712
4713 return rc;
4714}
4715
4716
4717/**
4718 * Terminates the TSC-delta measurement thread and cleanup.
4719 *
4720 * @param pDevExt Pointer to the device instance data.
4721 */
4722static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
4723{
4724 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
4725 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4726 {
4727 supdrvTscDeltaThreadTerminate(pDevExt);
4728 }
4729
4730 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
4731 {
4732 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4733 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4734 }
4735
4736 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4737 {
4738 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4739 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4740 }
4741
4742 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4743}
4744
4745#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4746
4747/**
4748 * Measure the TSC delta for the CPU given by its CPU set index.
4749 *
4750 * @returns VBox status code.
4751 * @retval VERR_INTERRUPTED if interrupted while waiting.
4752 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED if we were unable to get a
4753 * measurement.
4754 * @retval VERR_CPU_OFFLINE if the specified CPU is offline.
4755 *
4756 * @param pSession The caller's session. GIP must've been mapped.
4757 * @param iCpuSet The CPU set index of the CPU to measure.
4758 * @param fFlags Flags, SUP_TSCDELTA_MEASURE_F_XXX.
4759 * @param cMsWaitRetry Number of milliseconds to wait between each retry.
4760 * @param cMsWaitThread Number of milliseconds to wait for the thread to get
4761 * ready.
4762 * @param cTries Number of times to try, pass 0 for the default.
4763 */
4764SUPR0DECL(int) SUPR0TscDeltaMeasureBySetIndex(PSUPDRVSESSION pSession, uint32_t iCpuSet, uint32_t fFlags,
4765 RTMSINTERVAL cMsWaitRetry, RTMSINTERVAL cMsWaitThread, uint32_t cTries)
4766{
4767 PSUPDRVDEVEXT pDevExt;
4768 PSUPGLOBALINFOPAGE pGip;
4769 uint16_t iGipCpu;
4770 int rc;
4771#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4772 uint64_t msTsStartWait;
4773 uint32_t iWaitLoop;
4774#endif
4775
4776 /*
4777 * Validate and adjust the input.
4778 */
4779 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4780 if (!pSession->fGipReferenced)
4781 return VERR_WRONG_ORDER;
4782
4783 pDevExt = pSession->pDevExt;
4784 AssertReturn(SUP_IS_DEVEXT_VALID(pDevExt), VERR_INVALID_PARAMETER);
4785
4786 pGip = pDevExt->pGip;
4787 AssertPtrReturn(pGip, VERR_INTERNAL_ERROR_2);
4788
4789 AssertReturn(iCpuSet < RTCPUSET_MAX_CPUS, VERR_INVALID_CPU_INDEX);
4790 AssertReturn(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx), VERR_INVALID_CPU_INDEX);
4791 iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet];
4792 AssertReturn(iGipCpu < pGip->cCpus, VERR_INVALID_CPU_INDEX);
4793
4794 if (fFlags & ~SUP_TSCDELTA_MEASURE_F_VALID_MASK)
4795 return VERR_INVALID_FLAGS;
4796
4797 /*
4798 * The request is a noop if the TSC delta isn't being used.
4799 */
4800 if (pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4801 return VINF_SUCCESS;
4802
4803 if (cTries == 0)
4804 cTries = 12;
4805 else if (cTries > 256)
4806 cTries = 256;
4807
4808 if (cMsWaitRetry == 0)
4809 cMsWaitRetry = 2;
4810 else if (cMsWaitRetry > 1000)
4811 cMsWaitRetry = 1000;
4812
4813#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4814 /*
4815 * Has the TSC already been measured and we're not forced to redo it?
4816 */
4817 if ( pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX
4818 && !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE))
4819 return VINF_SUCCESS;
4820
4821 /*
4822 * Asynchronous request? Forward it to the thread, no waiting.
4823 */
4824 if (fFlags & SUP_TSCDELTA_MEASURE_F_ASYNC)
4825 {
4826 /** @todo Async. doesn't implement options like retries, waiting. We'll need
4827 * to pass those options to the thread somehow and implement it in the
4828 * thread. Check if anyone uses/needs fAsync before implementing this. */
4829 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4830 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, iCpuSet);
4831 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4832 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4833 {
4834 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4835 rc = VINF_SUCCESS;
4836 }
4837 else if (pDevExt->enmTscDeltaThreadState != kTscDeltaThreadState_WaitAndMeasure)
4838 rc = VERR_THREAD_IS_DEAD;
4839 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4840 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4841 return VINF_SUCCESS;
4842 }
4843
4844 /*
4845 * If a TSC-delta measurement request is already being serviced by the thread,
4846 * wait 'cTries' times if a retry-timeout is provided, otherwise bail as busy.
4847 */
4848 msTsStartWait = RTTimeSystemMilliTS();
4849 for (iWaitLoop = 0;; iWaitLoop++)
4850 {
4851 uint64_t cMsElapsed;
4852 SUPDRVTSCDELTATHREADSTATE enmState;
4853 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4854 enmState = pDevExt->enmTscDeltaThreadState;
4855 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4856
4857 if (enmState == kTscDeltaThreadState_Measuring)
4858 { /* Must wait, the thread is busy. */ }
4859 else if (enmState == kTscDeltaThreadState_WaitAndMeasure)
4860 { /* Must wait, this state only says what will happen next. */ }
4861 else if (enmState == kTscDeltaThreadState_Terminating)
4862 { /* Must wait, this state only says what should happen next. */ }
4863 else
4864 break; /* All other states, the thread is either idly listening or dead. */
4865
4866 /* Wait or fail. */
4867 if (cMsWaitThread == 0)
4868 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4869 cMsElapsed = RTTimeSystemMilliTS() - msTsStartWait;
4870 if (cMsElapsed >= cMsWaitThread)
4871 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4872
4873 rc = RTThreadSleep(RT_MIN((RTMSINTERVAL)(cMsWaitThread - cMsElapsed), RT_MIN(iWaitLoop + 1, 10)));
4874 if (rc == VERR_INTERRUPTED)
4875 return rc;
4876 }
4877#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4878
4879 /*
4880 * Try measure the TSC delta the given number of times.
4881 */
4882 for (;;)
4883 {
4884 /* Unless we're forced to measure the delta, check whether it's done already. */
4885 if ( !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE)
4886 && pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX)
4887 {
4888 rc = VINF_SUCCESS;
4889 break;
4890 }
4891
4892 /* Measure it. */
4893 rc = supdrvTscMeasureDeltaOne(pDevExt, iGipCpu);
4894 if (rc != VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED)
4895 {
4896 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
4897 break;
4898 }
4899
4900 /* Retry? */
4901 if (cTries <= 1)
4902 break;
4903 cTries--;
4904
4905 /* Always delay between retries (be nice to the rest of the system
4906 and avoid the BSOD hounds). */
4907 rc = RTThreadSleep(cMsWaitRetry);
4908 if (rc == VERR_INTERRUPTED)
4909 break;
4910 }
4911
4912 return rc;
4913}
4914SUPR0_EXPORT_SYMBOL(SUPR0TscDeltaMeasureBySetIndex);
4915
4916
4917/**
4918 * Service a TSC-delta measurement request.
4919 *
4920 * @returns VBox status code.
4921 * @param pDevExt Pointer to the device instance data.
4922 * @param pSession The support driver session.
4923 * @param pReq Pointer to the TSC-delta measurement request.
4924 */
4925int VBOXCALL supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq)
4926{
4927 uint32_t cTries;
4928 uint32_t iCpuSet;
4929 uint32_t fFlags;
4930 RTMSINTERVAL cMsWaitRetry;
4931 RT_NOREF1(pDevExt);
4932
4933 /*
4934 * Validate and adjust/resolve the input so they can be passed onto SUPR0TscDeltaMeasureBySetIndex.
4935 */
4936 AssertPtr(pDevExt); AssertPtr(pSession); AssertPtr(pReq); /* paranoia^2 */
4937
4938 if (pReq->u.In.idCpu == NIL_RTCPUID)
4939 return VERR_INVALID_CPU_ID;
4940 iCpuSet = RTMpCpuIdToSetIndex(pReq->u.In.idCpu);
4941 if (iCpuSet >= RTCPUSET_MAX_CPUS)
4942 return VERR_INVALID_CPU_ID;
4943
4944 cTries = pReq->u.In.cRetries == 0 ? 0 : (uint32_t)pReq->u.In.cRetries + 1;
4945
4946 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
4947
4948 fFlags = 0;
4949 if (pReq->u.In.fAsync)
4950 fFlags |= SUP_TSCDELTA_MEASURE_F_ASYNC;
4951 if (pReq->u.In.fForce)
4952 fFlags |= SUP_TSCDELTA_MEASURE_F_FORCE;
4953
4954 return SUPR0TscDeltaMeasureBySetIndex(pSession, iCpuSet, fFlags, cMsWaitRetry,
4955 cTries == 0 ? 5 * RT_MS_1SEC : cMsWaitRetry * cTries /*cMsWaitThread*/,
4956 cTries);
4957}
4958
4959
4960/**
4961 * Reads TSC with delta applied.
4962 *
4963 * Will try to resolve delta value INT64_MAX before applying it. This is the
4964 * main purpose of this function, to handle the case where the delta needs to be
4965 * determined.
4966 *
4967 * @returns VBox status code.
4968 * @param pDevExt Pointer to the device instance data.
4969 * @param pSession The support driver session.
4970 * @param pReq Pointer to the TSC-read request.
4971 */
4972int VBOXCALL supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq)
4973{
4974 PSUPGLOBALINFOPAGE pGip;
4975 int rc;
4976
4977 /*
4978 * Validate. We require the client to have mapped GIP (no asserting on
4979 * ring-3 preconditions).
4980 */
4981 AssertPtr(pDevExt); AssertPtr(pReq); AssertPtr(pSession); /* paranoia^2 */
4982 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4983 return VERR_WRONG_ORDER;
4984 pGip = pDevExt->pGip;
4985 AssertReturn(pGip, VERR_INTERNAL_ERROR_2);
4986
4987 /*
4988 * We're usually here because we need to apply delta, but we shouldn't be
4989 * upset if the GIP is some different mode.
4990 */
4991 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4992 {
4993 uint32_t cTries = 0;
4994 for (;;)
4995 {
4996 /*
4997 * Start by gathering the data, using CLI for disabling preemption
4998 * while we do that.
4999 */
5000 RTCCUINTREG fEFlags = ASMIntDisableFlags();
5001 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
5002 int iGipCpu = 0; /* gcc maybe used uninitialized */
5003 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
5004 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
5005 {
5006 int64_t i64Delta = pGip->aCPUs[iGipCpu].i64TSCDelta;
5007 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
5008 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
5009 ASMSetFlags(fEFlags);
5010
5011 /*
5012 * If we're lucky we've got a delta, but no predictions here
5013 * as this I/O control is normally only used when the TSC delta
5014 * is set to INT64_MAX.
5015 */
5016 if (i64Delta != INT64_MAX)
5017 {
5018 pReq->u.Out.u64AdjustedTsc -= i64Delta;
5019 rc = VINF_SUCCESS;
5020 break;
5021 }
5022
5023 /* Give up after a few times. */
5024 if (cTries >= 4)
5025 {
5026 rc = VWRN_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
5027 break;
5028 }
5029
5030 /* Need to measure the delta an try again. */
5031 rc = supdrvTscMeasureDeltaOne(pDevExt, iGipCpu);
5032 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
5033 /** @todo should probably delay on failure... dpc watchdogs */
5034 }
5035 else
5036 {
5037 /* This really shouldn't happen. */
5038 AssertMsgFailed(("idCpu=%#x iCpuSet=%#x (%d)\n", RTMpCpuId(), iCpuSet, iCpuSet));
5039 pReq->u.Out.idApic = (uint16_t)supdrvGipGetApicIdSlow(); /** @todo idApic should be 32-bit... */
5040 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
5041 ASMSetFlags(fEFlags);
5042 rc = VERR_INTERNAL_ERROR_5; /** @todo change to warning. */
5043 break;
5044 }
5045 }
5046 }
5047 else
5048 {
5049 /*
5050 * No delta to apply. Easy. Deal with preemption the lazy way.
5051 */
5052 RTCCUINTREG fEFlags = ASMIntDisableFlags();
5053 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
5054 int iGipCpu = 0; /* gcc may be used uninitialized */
5055 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
5056 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
5057 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
5058 else
5059 pReq->u.Out.idApic = (uint16_t)supdrvGipGetApicIdSlow(); /** @todo idApic should be 32-bit... */
5060 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
5061 ASMSetFlags(fEFlags);
5062 rc = VINF_SUCCESS;
5063 }
5064
5065 return rc;
5066}
5067
5068
5069/**
5070 * Worker for supdrvIOCtl_GipSetFlags.
5071 *
5072 * @returns VBox status code.
5073 * @retval VERR_WRONG_ORDER if an enable-once-per-session flag is set again for
5074 * a session.
5075 *
5076 * @param pDevExt Pointer to the device instance data.
5077 * @param pSession The support driver session.
5078 * @param fOrMask The OR mask of the GIP flags, see SUPGIP_FLAGS_XXX.
5079 * @param fAndMask The AND mask of the GIP flags, see SUPGIP_FLAGS_XXX.
5080 *
5081 * @remarks Caller must own the GIP mutex.
5082 *
5083 * @remarks This function doesn't validate any of the flags.
5084 */
5085static int supdrvGipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask)
5086{
5087 uint32_t cRefs;
5088 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5089 AssertMsg((fOrMask & fAndMask) == fOrMask, ("%#x & %#x\n", fOrMask, fAndMask)); /* ASSUMED by code below */
5090
5091 /*
5092 * Compute GIP test-mode flags.
5093 */
5094 if (fOrMask & SUPGIP_FLAGS_TESTING_ENABLE)
5095 {
5096 if (!pSession->fGipTestMode)
5097 {
5098 Assert(pDevExt->cGipTestModeRefs < _64K);
5099 pSession->fGipTestMode = true;
5100 cRefs = ++pDevExt->cGipTestModeRefs;
5101 if (cRefs == 1)
5102 {
5103 fOrMask |= SUPGIP_FLAGS_TESTING | SUPGIP_FLAGS_TESTING_START;
5104 fAndMask &= ~SUPGIP_FLAGS_TESTING_STOP;
5105 }
5106 }
5107 else
5108 {
5109 LogRelMax(10, ("supdrvGipSetFlags: SUPGIP_FLAGS_TESTING_ENABLE already set for this session\n"));
5110 return VERR_WRONG_ORDER;
5111 }
5112 }
5113 else if ( !(fAndMask & SUPGIP_FLAGS_TESTING_ENABLE)
5114 && pSession->fGipTestMode)
5115 {
5116 Assert(pDevExt->cGipTestModeRefs > 0);
5117 Assert(pDevExt->cGipTestModeRefs < _64K);
5118 pSession->fGipTestMode = false;
5119 cRefs = --pDevExt->cGipTestModeRefs;
5120 if (!cRefs)
5121 fOrMask |= SUPGIP_FLAGS_TESTING_STOP;
5122 else
5123 fAndMask |= SUPGIP_FLAGS_TESTING_ENABLE;
5124 }
5125
5126 /*
5127 * Commit the flags. This should be done as atomically as possible
5128 * since the flag consumers won't be holding the GIP mutex.
5129 */
5130 ASMAtomicOrU32(&pGip->fFlags, fOrMask);
5131 ASMAtomicAndU32(&pGip->fFlags, fAndMask);
5132
5133 return VINF_SUCCESS;
5134}
5135
5136
5137/**
5138 * Sets GIP test mode parameters.
5139 *
5140 * @returns VBox status code.
5141 * @param pDevExt Pointer to the device instance data.
5142 * @param pSession The support driver session.
5143 * @param fOrMask The OR mask of the GIP flags, see SUPGIP_FLAGS_XXX.
5144 * @param fAndMask The AND mask of the GIP flags, see SUPGIP_FLAGS_XXX.
5145 */
5146int VBOXCALL supdrvIOCtl_GipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask)
5147{
5148 PSUPGLOBALINFOPAGE pGip;
5149 int rc;
5150
5151 /*
5152 * Validate. We require the client to have mapped GIP (no asserting on
5153 * ring-3 preconditions).
5154 */
5155 AssertPtr(pDevExt); AssertPtr(pSession); /* paranoia^2 */
5156 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
5157 return VERR_WRONG_ORDER;
5158 pGip = pDevExt->pGip;
5159 AssertReturn(pGip, VERR_INTERNAL_ERROR_3);
5160
5161 if (fOrMask & ~SUPGIP_FLAGS_VALID_MASK)
5162 return VERR_INVALID_PARAMETER;
5163 if ((fAndMask & ~SUPGIP_FLAGS_VALID_MASK) != ~SUPGIP_FLAGS_VALID_MASK)
5164 return VERR_INVALID_PARAMETER;
5165
5166 /*
5167 * Don't confuse supdrvGipSetFlags or anyone else by both setting
5168 * and clearing the same flags. AND takes precedence.
5169 */
5170 fOrMask &= fAndMask;
5171
5172 /*
5173 * Take the loader lock to avoid having to think about races between two
5174 * clients changing the flags at the same time (state is not simple).
5175 */
5176#ifdef SUPDRV_USE_MUTEX_FOR_GIP
5177 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
5178#else
5179 RTSemFastMutexRequest(pDevExt->mtxGip);
5180#endif
5181
5182 rc = supdrvGipSetFlags(pDevExt, pSession, fOrMask, fAndMask);
5183
5184#ifdef SUPDRV_USE_MUTEX_FOR_GIP
5185 RTSemMutexRelease(pDevExt->mtxGip);
5186#else
5187 RTSemFastMutexRelease(pDevExt->mtxGip);
5188#endif
5189 return rc;
5190}
5191
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette