VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/testmanager/batch/virtual_test_sheriff.py@ 65914

Last change on this file since 65914 was 65863, checked in by vboxsync, 8 years ago

vsheriff: Tx Unit Hang in guest kernel log

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 60.8 KB
Line 
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3# $Id: virtual_test_sheriff.py 65863 2017-02-23 15:29:16Z vboxsync $
4# pylint: disable=C0301
5
6"""
7Virtual Test Sheriff.
8
9Duties:
10 - Try to a assign failure reasons to recently failed tests.
11 - Reboot or disable bad test boxes.
12
13"""
14
15__copyright__ = \
16"""
17Copyright (C) 2012-2016 Oracle Corporation
18
19This file is part of VirtualBox Open Source Edition (OSE), as
20available from http://www.215389.xyz. This file is free software;
21you can redistribute it and/or modify it under the terms of the GNU
22General Public License (GPL) as published by the Free Software
23Foundation, in version 2 as it comes in the "COPYING" file of the
24VirtualBox OSE distribution. VirtualBox OSE is distributed in the
25hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
26
27The contents of this file may alternatively be used under the terms
28of the Common Development and Distribution License Version 1.0
29(CDDL) only, as it comes in the "COPYING.CDDL" file of the
30VirtualBox OSE distribution, in which case the provisions of the
31CDDL are applicable instead of those of the GPL.
32
33You may elect to license modified versions of this file under the
34terms and conditions of either the GPL or the CDDL or both.
35"""
36__version__ = "$Revision: 65863 $"
37
38
39# Standard python imports
40import sys;
41import os;
42import hashlib;
43import StringIO;
44from optparse import OptionParser;
45from PIL import Image; # pylint: disable=import-error
46
47# Add Test Manager's modules path
48g_ksTestManagerDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))));
49sys.path.append(g_ksTestManagerDir);
50
51# Test Manager imports
52from testmanager.core.db import TMDatabaseConnection;
53from testmanager.core.build import BuildDataEx;
54from testmanager.core.failurereason import FailureReasonLogic;
55from testmanager.core.testbox import TestBoxLogic, TestBoxData;
56from testmanager.core.testcase import TestCaseDataEx;
57from testmanager.core.testgroup import TestGroupData;
58from testmanager.core.testset import TestSetLogic, TestSetData;
59from testmanager.core.testresults import TestResultLogic, TestResultFileData;
60from testmanager.core.testresultfailures import TestResultFailureLogic, TestResultFailureData;
61from testmanager.core.useraccount import UserAccountLogic;
62
63
64class VirtualTestSheriffCaseFile(object):
65 """
66 A failure investigation case file.
67
68 """
69
70
71 ## Max log file we'll read into memory. (256 MB)
72 kcbMaxLogRead = 0x10000000;
73
74 def __init__(self, oSheriff, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase):
75 self.oSheriff = oSheriff;
76 self.oTestSet = oTestSet; # TestSetData
77 self.oTree = oTree; # TestResultDataEx
78 self.oBuild = oBuild; # BuildDataEx
79 self.oTestBox = oTestBox; # TestBoxData
80 self.oTestGroup = oTestGroup; # TestGroupData
81 self.oTestCase = oTestCase; # TestCaseDataEx
82 self.sMainLog = ''; # The main log file. Empty string if not accessible.
83
84 # Generate a case file name.
85 self.sName = '#%u: %s' % (self.oTestSet.idTestSet, self.oTestCase.sName,)
86 self.sLongName = '#%u: "%s" on "%s" running %s %s (%s), "%s" by %s, using %s %s %s r%u' \
87 % ( self.oTestSet.idTestSet,
88 self.oTestCase.sName,
89 self.oTestBox.sName,
90 self.oTestBox.sOs,
91 self.oTestBox.sOsVersion,
92 self.oTestBox.sCpuArch,
93 self.oTestBox.sCpuName,
94 self.oTestBox.sCpuVendor,
95 self.oBuild.oCat.sProduct,
96 self.oBuild.oCat.sBranch,
97 self.oBuild.oCat.sType,
98 self.oBuild.iRevision, );
99
100 # Investigation notes.
101 self.tReason = None; # None or one of the ktReason_XXX constants.
102 self.dReasonForResultId = {}; # Reason assignments indexed by idTestResult.
103 self.dCommentForResultId = {}; # Comment assignments indexed by idTestResult.
104
105 #
106 # Reason.
107 #
108
109 def noteReason(self, tReason):
110 """ Notes down a possible reason. """
111 self.oSheriff.dprint(u'noteReason: %s -> %s' % (self.tReason, tReason,));
112 self.tReason = tReason;
113 return True;
114
115 def noteReasonForId(self, tReason, idTestResult, sComment = None):
116 """ Notes down a possible reason for a specific test result. """
117 self.oSheriff.dprint(u'noteReasonForId: %u: %s -> %s%s'
118 % (idTestResult, self.dReasonForResultId.get(idTestResult, None), tReason,
119 (u' (%s)' % (sComment,)) if sComment is not None else ''));
120 self.dReasonForResultId[idTestResult] = tReason;
121 if sComment is not None:
122 self.dCommentForResultId[idTestResult] = sComment;
123 return True;
124
125
126 #
127 # Test classification.
128 #
129
130 def isVBoxTest(self):
131 """ Test classification: VirtualBox (using the build) """
132 return self.oBuild.oCat.sProduct.lower() in [ 'virtualbox', 'vbox' ];
133
134 def isVBoxUnitTest(self):
135 """ Test case classification: The unit test doing all our testcase/*.cpp stuff. """
136 return self.isVBoxTest() \
137 and self.oTestCase.sName.lower() == 'unit tests';
138
139 def isVBoxInstallTest(self):
140 """ Test case classification: VirtualBox Guest installation test. """
141 return self.isVBoxTest() \
142 and self.oTestCase.sName.lower().startswith('install:');
143
144 def isVBoxUSBTest(self):
145 """ Test case classification: VirtualBox USB test. """
146 return self.isVBoxTest() \
147 and self.oTestCase.sName.lower().startswith('usb:');
148
149 def isVBoxStorageTest(self):
150 """ Test case classification: VirtualBox Storage test. """
151 return self.isVBoxTest() \
152 and self.oTestCase.sName.lower().startswith('storage:');
153
154 def isVBoxGAsTest(self):
155 """ Test case classification: VirtualBox Guest Additions test. """
156 return self.isVBoxTest() \
157 and self.oTestCase.sName.lower().startswith('ga\'s tests');
158
159 def isVBoxAPITest(self):
160 """ Test case classification: VirtualBox API test. """
161 return self.isVBoxTest() \
162 and self.oTestCase.sName.lower().startswith('api:');
163
164 def isVBoxBenchmarkTest(self):
165 """ Test case classification: VirtualBox Benchmark test. """
166 return self.isVBoxTest() \
167 and self.oTestCase.sName.lower().startswith('benchmark:');
168
169 def isVBoxSmokeTest(self):
170 """ Test case classification: Smoke test. """
171 return self.isVBoxTest() \
172 and self.oTestCase.sName.lower().startswith('smoketest');
173
174
175 #
176 # Utility methods.
177 #
178
179 def getMainLog(self):
180 """
181 Tries to reads the main log file since this will be the first source of information.
182 """
183 if len(self.sMainLog) > 0:
184 return self.sMainLog;
185 (oFile, oSizeOrError, _) = self.oTestSet.openFile('main.log', 'rb');
186 if oFile is not None:
187 try:
188 self.sMainLog = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
189 except Exception as oXcpt:
190 self.oSheriff.vprint(u'Error reading main log file: %s' % (oXcpt,))
191 self.sMainLog = '';
192 else:
193 self.oSheriff.vprint(u'Error opening main log file: %s' % (oSizeOrError,));
194 return self.sMainLog;
195
196 def getLogFile(self, oFile):
197 """
198 Tries to reads the given file as a utf-8 log file.
199 oFile is a TestFileDataEx instance.
200 Returns empty string if problems opening or reading the file.
201 """
202 sContent = '';
203 (oFile, oSizeOrError, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
204 if oFile is not None:
205 try:
206 sContent = oFile.read(min(self.kcbMaxLogRead, oSizeOrError)).decode('utf-8', 'replace');
207 except Exception as oXcpt:
208 self.oSheriff.vprint(u'Error reading the "%s" log file: %s' % (oFile.sFile, oXcpt,))
209 else:
210 self.oSheriff.vprint(u'Error opening the "%s" log file: %s' % (oFile.sFile, oSizeOrError,));
211 return sContent;
212
213 def getScreenshotSha256(self, oFile):
214 """
215 Tries to read the given screenshot file, uncompress it, and do SHA-2
216 on the raw pixels.
217 Returns SHA-2 digest string on success, None on failure.
218 """
219 (oFile, _, _) = self.oTestSet.openFile(oFile.sFile, 'rb');
220 try:
221 abImageFile = oFile.read();
222 except Exception as oXcpt:
223 self.oSheriff.vprint(u'Error reading the "%s" image file: %s' % (oFile.sFile, oXcpt,))
224 else:
225 try:
226 oImage = Image.open(StringIO.StringIO(abImageFile));
227 except Exception as oXcpt:
228 self.oSheriff.vprint(u'Error opening the "%s" image bytes using PIL.Image.open: %s' % (oFile.sFile, oXcpt,))
229 else:
230 try:
231 oHash = hashlib.sha256();
232 oHash.update(oImage.tostring());
233 except Exception as oXcpt:
234 self.oSheriff.vprint(u'Error hashing the uncompressed image bytes for "%s": %s' % (oFile.sFile, oXcpt,))
235 else:
236 return oHash.hexdigest();
237 return None;
238
239
240
241 def isSingleTestFailure(self):
242 """
243 Figure out if this is a single test failing or if it's one of the
244 more complicated ones.
245 """
246 if self.oTree.cErrors == 1:
247 return True;
248 if self.oTree.deepCountErrorContributers() <= 1:
249 return True;
250 return False;
251
252
253
254class VirtualTestSheriff(object): # pylint: disable=R0903
255 """
256 Add build info into Test Manager database.
257 """
258
259 ## The user account for the virtual sheriff.
260 ksLoginName = 'vsheriff';
261
262 def __init__(self):
263 """
264 Parse command line.
265 """
266 self.oDb = None;
267 self.tsNow = None;
268 self.oTestResultLogic = None;
269 self.oTestSetLogic = None;
270 self.oFailureReasonLogic = None; # FailureReasonLogic;
271 self.oTestResultFailureLogic = None; # TestResultFailureLogic
272 self.oLogin = None;
273 self.uidSelf = -1;
274 self.oLogFile = None;
275 self.asBsodReasons = [];
276 self.asUnitTestReasons = [];
277
278 oParser = OptionParser();
279 oParser.add_option('--start-hours-ago', dest = 'cStartHoursAgo', metavar = '<hours>', default = 0, type = 'int',
280 help = 'When to start specified as hours relative to current time. Defauls is right now.', );
281 oParser.add_option('--hours-period', dest = 'cHoursBack', metavar = '<period-in-hours>', default = 2, type = 'int',
282 help = 'Work period specified in hours. Defauls is 2 hours.');
283 oParser.add_option('--real-run-back', dest = 'fRealRun', action = 'store_true', default = False,
284 help = 'Whether to commit the findings to the database. Default is a dry run.');
285 oParser.add_option('-q', '--quiet', dest = 'fQuiet', action = 'store_true', default = False,
286 help = 'Quiet execution');
287 oParser.add_option('-l', '--log', dest = 'sLogFile', metavar = '<logfile>', default = None,
288 help = 'Where to log messages.');
289 oParser.add_option('--debug', dest = 'fDebug', action = 'store_true', default = False,
290 help = 'Enables debug mode.');
291
292 (self.oConfig, _) = oParser.parse_args();
293
294 if self.oConfig.sLogFile is not None and len(self.oConfig.sLogFile) > 0:
295 self.oLogFile = open(self.oConfig.sLogFile, "a");
296 self.oLogFile.write('VirtualTestSheriff: $Revision: 65863 $ \n');
297
298
299 def eprint(self, sText):
300 """
301 Prints error messages.
302 Returns 1 (for exit code usage.)
303 """
304 print 'error: %s' % (sText,);
305 if self.oLogFile is not None:
306 self.oLogFile.write((u'error: %s\n' % (sText,)).encode('utf-8'));
307 return 1;
308
309 def dprint(self, sText):
310 """
311 Prints debug info.
312 """
313 if self.oConfig.fDebug:
314 if not self.oConfig.fQuiet:
315 print 'debug: %s' % (sText, );
316 if self.oLogFile is not None:
317 self.oLogFile.write((u'debug: %s\n' % (sText,)).encode('utf-8'));
318 return 0;
319
320 def vprint(self, sText):
321 """
322 Prints verbose info.
323 """
324 if not self.oConfig.fQuiet:
325 print 'info: %s' % (sText,);
326 if self.oLogFile is not None:
327 self.oLogFile.write((u'info: %s\n' % (sText,)).encode('utf-8'));
328 return 0;
329
330 def getFailureReason(self, tReason):
331 """ Gets the failure reason object for tReason. """
332 return self.oFailureReasonLogic.cachedLookupByNameAndCategory(tReason[1], tReason[0]);
333
334 def selfCheck(self):
335 """ Does some self checks, looking up things we expect to be in the database and such. """
336 rcExit = 0;
337 for sAttr in dir(self.__class__):
338 if sAttr.startswith('ktReason_'):
339 tReason = getattr(self.__class__, sAttr);
340 oFailureReason = self.getFailureReason(tReason);
341 if oFailureReason is None:
342 rcExit = self.eprint(u'Failed to find failure reason "%s" in category "%s" in the database!'
343 % (tReason[1], tReason[0],));
344
345 # Check the user account as well.
346 if self.oLogin is None:
347 oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
348 if oLogin is None:
349 rcExit = self.eprint(u'Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
350 return rcExit;
351
352
353
354 def badTestBoxManagement(self):
355 """
356 Looks for bad test boxes and first tries once to reboot them then disables them.
357 """
358 rcExit = 0;
359
360 #
361 # We skip this entirely if we're running in the past and not in harmless debug mode.
362 #
363 if self.oConfig.cStartHoursAgo != 0 \
364 and (not self.oConfig.fDebug or self.oConfig.fRealRun):
365 return rcExit;
366 tsNow = self.tsNow if self.oConfig.fDebug else None;
367 cHoursBack = self.oConfig.cHoursBack if self.oConfig.fDebug else 2;
368 oTestBoxLogic = TestBoxLogic(self.oDb);
369
370 #
371 # Generate a list of failures reasons we consider bad-testbox behavior.
372 #
373 aidFailureReasons = [
374 self.getFailureReason(self.ktReason_Host_DriverNotUnloading).idFailureReason,
375 ];
376
377 #
378 # Get list of bad test boxes for given period and check them out individually.
379 #
380 aidBadTestBoxes = self.oTestSetLogic.fetchBadTestBoxIds(cHoursBack = cHoursBack, tsNow = tsNow,
381 aidFailureReasons = aidFailureReasons);
382 for idTestBox in aidBadTestBoxes:
383 # Skip if the testbox is already disabled or has a pending reboot command.
384 try:
385 oTestBox = TestBoxData().initFromDbWithId(self.oDb, idTestBox);
386 except Exception as oXcpt:
387 rcExit = self.eprint('Failed to get data for test box #%u in badTestBoxManagement: %s' % (idTestBox, oXcpt,));
388 continue;
389 if not oTestBox.fEnabled:
390 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has been disabled already.'
391 % ( idTestBox, oTestBox.sName, ));
392 continue;
393 if oTestBox.enmPendingCmd != TestBoxData.ksTestBoxCmd_None:
394 self.dprint(u'badTestBoxManagement: Skipping test box #%u (%s) as it has a command pending: %s'
395 % ( idTestBox, oTestBox.sName, oTestBox.enmPendingCmd));
396 continue;
397
398 # Get the most recent testsets for this box (descending on tsDone) and see how bad it is.
399 aoSets = self.oTestSetLogic.fetchSetsForTestBox(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow);
400 cOkay = 0;
401 cBad = 0;
402 iFirstOkay = len(aoSets);
403 for iSet, oSet in enumerate(aoSets):
404 if oSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
405 cBad += 1;
406 else:
407 # Check for bad failure reasons.
408 oFailure = None;
409 if oSet.enmStatus in TestSetData.kasBadTestStatuses:
410 oFailure = self.oTestResultFailureLogic.getById(oSet.idTestResult);
411 if oFailure is not None and oFailure.idFailureReason in aidFailureReasons:
412 cBad += 1;
413 else:
414 # This is an okay test result then.
415 ## @todo maybe check the elapsed time here, it could still be a bad run?
416 cOkay += 1;
417 if iFirstOkay > iSet:
418 iFirstOkay = iSet;
419 if iSet > 10:
420 break;
421
422 # We react if there are two or more bad-testbox statuses at the head of the
423 # history and at least three in the last 10 results.
424 if iFirstOkay >= 2 and cBad > 2:
425 if oTestBoxLogic.hasTestBoxRecentlyBeenRebooted(idTestBox, cHoursBack = cHoursBack, tsNow = tsNow):
426 self.vprint(u'Disabling testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u'
427 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
428 if self.oConfig.fRealRun is True:
429 try:
430 oTestBoxLogic.disableTestBox(idTestBox, self.uidSelf, fCommit = True,
431 sComment = 'Automatically disabled (iFirstOkay=%u cBad=%u cOkay=%u)'
432 % (iFirstOkay, cBad, cOkay),);
433 except Exception as oXcpt:
434 rcExit = self.eprint(u'Error disabling testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
435 else:
436 self.vprint(u'Rebooting testbox #%u (%s) - iFirstOkay=%u cBad=%u cOkay=%u'
437 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
438 if self.oConfig.fRealRun is True:
439 try:
440 oTestBoxLogic.rebootTestBox(idTestBox, self.uidSelf, fCommit = True,
441 sComment = 'Automatically rebooted (iFirstOkay=%u cBad=%u cOkay=%u)'
442 % (iFirstOkay, cBad, cOkay),);
443 except Exception as oXcpt:
444 rcExit = self.eprint(u'Error rebooting testbox #%u (%u): %s\n' % (idTestBox, oTestBox.sName, oXcpt,));
445 else:
446 self.dprint(u'badTestBoxManagement: #%u (%s) looks ok: iFirstOkay=%u cBad=%u cOkay=%u'
447 % ( idTestBox, oTestBox.sName, iFirstOkay, cBad, cOkay));
448 return rcExit;
449
450
451 ## @name Failure reasons we know.
452 ## @{
453 ktReason_BSOD_Recovery = ( 'BSOD', 'Recovery' );
454 ktReason_BSOD_Automatic_Repair = ( 'BSOD', 'Automatic Repair' );
455 ktReason_BSOD_C0000225 = ( 'BSOD', '0xC0000225 (boot)' );
456 ktReason_Guru_Generic = ( 'Guru Meditations', 'Generic Guru Meditation' );
457 ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_INSTR_NOT_IMPLEMENTED' );
458 ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED = ( 'Guru Meditations', 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' );
459 ktReason_Guru_VERR_TRPM_DONT_PANIC = ( 'Guru Meditations', 'VERR_TRPM_DONT_PANIC' );
460 ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED = ( 'Guru Meditations', 'VERR_PGM_PHYS_PAGE_RESERVED' );
461 ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE = ( 'Guru Meditations', 'VERR_VMX_INVALID_GUEST_STATE' );
462 ktReason_Guru_VINF_EM_TRIPLE_FAULT = ( 'Guru Meditations', 'VINF_EM_TRIPLE_FAULT' );
463 ktReason_Host_HostMemoryLow = ( 'Host', 'HostMemoryLow' );
464 ktReason_Host_DriverNotLoaded = ( 'Host', 'Driver not loaded' );
465 ktReason_Host_DriverNotUnloading = ( 'Host', 'Driver not unloading' );
466 ktReason_Host_NotSignedWithBuildCert = ( 'Host', 'Not signed with build cert' );
467 ktReason_Host_Reboot_OSX_Watchdog_Timeout = ( 'Host Reboot', 'OSX Watchdog Timeout' );
468 ktReason_Networking_Nonexistent_host_nic = ( 'Networking', 'Nonexistent host networking interface' );
469 ktReason_OSInstall_GRUB_hang = ( 'O/S Install', 'GRUB hang' );
470 ktReason_Panic_BootManagerC000000F = ( 'Panic', 'Hardware Changed' );
471 ktReason_Panic_MP_BIOS_IO_APIC = ( 'Panic', 'MP-BIOS/IO-APIC' );
472 ktReason_Panic_HugeMemory = ( 'Panic', 'Huge memory assertion' );
473 ktReason_Panic_IOAPICDoesntWork = ( 'Panic', 'IO-APIC and timer does not work' );
474 ktReason_Panic_TxUnitHang = ( 'Panic', 'Tx Unit Hang' );
475 ktReason_XPCOM_Exit_Minus_11 = ( 'API / (XP)COM', 'exit -11' );
476 ktReason_XPCOM_VBoxSVC_Hang = ( 'API / (XP)COM', 'VBoxSVC hang' );
477 ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption = ( 'API / (XP)COM', 'VBoxSVC hang + heap corruption' );
478 ktReason_XPCOM_NS_ERROR_CALL_FAILED = ( 'API / (XP)COM', 'NS_ERROR_CALL_FAILED' );
479 ktReason_Unknown_Heap_Corruption = ( 'Unknown', 'Heap corruption' );
480 ktReason_Unknown_Reboot_Loop = ( 'Unknown', 'Reboot loop' );
481 ktReason_Unknown_File_Not_Found = ( 'Unknown', 'File not found' );
482 ktReason_Unknown_VM_Crash = ( 'Unknown', 'VM crash' );
483 ktReason_VMM_kvm_lock_spinning = ( 'VMM', 'kvm_lock_spinning' );
484 ktReason_Ignore_Buggy_Test_Driver = ( 'Ignore', 'Buggy test driver' );
485 ktReason_Ignore_Stale_Files = ( 'Ignore', 'Stale files' );
486 ktReason_Buggy_Build_Broken_Build = ( 'Broken Build', 'Buggy build' );
487 ## @}
488
489 ## BSOD category.
490 ksBsodCategory = 'BSOD';
491 ## Special reason indicating that the flesh and blood sheriff has work to do.
492 ksBsodAddNew = 'Add new BSOD';
493
494 ## Unit test category.
495 ksUnitTestCategory = 'Unit';
496 ## Special reason indicating that the flesh and blood sheriff has work to do.
497 ksUnitTestAddNew = 'Add new';
498
499 ## Used for indica that we shouldn't report anything for this test result ID and
500 ## consider promoting the previous error to test set level if it's the only one.
501 ktHarmless = ( 'Probably', 'Caused by previous error' );
502
503
504 def caseClosed(self, oCaseFile):
505 """
506 Reports the findings in the case and closes it.
507 """
508 #
509 # Log it and create a dReasonForReasultId we can use below.
510 #
511 dCommentForResultId = oCaseFile.dCommentForResultId;
512 if len(oCaseFile.dReasonForResultId) > 0:
513 # Must weed out ktHarmless.
514 dReasonForResultId = {};
515 for idKey, tReason in oCaseFile.dReasonForResultId.items():
516 if tReason is not self.ktHarmless:
517 dReasonForResultId[idKey] = tReason;
518 if len(dReasonForResultId) == 0:
519 self.vprint(u'TODO: Closing %s without a real reason, only %s.'
520 % (oCaseFile.sName, oCaseFile.dReasonForResultId));
521 return False;
522
523 # Try promote to single reason.
524 atValues = dReasonForResultId.values();
525 fSingleReason = True;
526 if len(dReasonForResultId) == 1 and dReasonForResultId.keys()[0] != oCaseFile.oTestSet.idTestResult:
527 self.dprint(u'Promoting single reason to whole set: %s' % (atValues[0],));
528 elif len(dReasonForResultId) > 1 and len(atValues) == atValues.count(atValues[0]):
529 self.dprint(u'Merged %d reasons to a single one: %s' % (len(atValues), atValues[0]));
530 else:
531 fSingleReason = False;
532 if fSingleReason:
533 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: atValues[0], };
534 if len(dCommentForResultId) > 0:
535 dCommentForResultId = { oCaseFile.oTestSet.idTestResult: dCommentForResultId.values()[0], };
536 elif oCaseFile.tReason is not None:
537 dReasonForResultId = { oCaseFile.oTestSet.idTestResult: oCaseFile.tReason, };
538 else:
539 self.vprint(u'Closing %s without a reason - this should not happen!' % (oCaseFile.sName,));
540 return False;
541
542 self.vprint(u'Closing %s with following reason%s: %s'
543 % ( oCaseFile.sName, 's' if dReasonForResultId > 0 else '', dReasonForResultId, ));
544
545 #
546 # Add the test failure reason record(s).
547 #
548 for idTestResult, tReason in dReasonForResultId.items():
549 oFailureReason = self.getFailureReason(tReason);
550 if oFailureReason is not None:
551 sComment = 'Set by $Revision: 65863 $' # Handy for reverting later.
552 if idTestResult in dCommentForResultId:
553 sComment += ': ' + dCommentForResultId[idTestResult];
554
555 oAdd = TestResultFailureData();
556 oAdd.initFromValues(idTestResult = idTestResult,
557 idFailureReason = oFailureReason.idFailureReason,
558 uidAuthor = self.uidSelf,
559 idTestSet = oCaseFile.oTestSet.idTestSet,
560 sComment = sComment,);
561 if self.oConfig.fRealRun:
562 try:
563 self.oTestResultFailureLogic.addEntry(oAdd, self.uidSelf, fCommit = True);
564 except Exception as oXcpt:
565 self.eprint(u'caseClosed: Exception "%s" while adding reason %s for %s'
566 % (oXcpt, oAdd, oCaseFile.sLongName,));
567 else:
568 self.eprint(u'caseClosed: Cannot locate failure reason: %s / %s' % ( tReason[0], tReason[1],));
569 return True;
570
571 #
572 # Tools for assiting log parsing.
573 #
574
575 @staticmethod
576 def matchFollowedByLines(sStr, off, asFollowingLines):
577 """ Worker for isThisFollowedByTheseLines. """
578
579 # Advance off to the end of the line.
580 off = sStr.find('\n', off);
581 if off < 0:
582 return False;
583 off += 1;
584
585 # Match each string with the subsequent lines.
586 for iLine, sLine in enumerate(asFollowingLines):
587 offEnd = sStr.find('\n', off);
588 if offEnd < 0:
589 return iLine + 1 == len(asFollowingLines) and sStr.find(sLine, off) < 0;
590 if len(sLine) > 0 and sStr.find(sLine, off, offEnd) < 0:
591 return False;
592
593 # next line.
594 off = offEnd + 1;
595
596 return True;
597
598 @staticmethod
599 def isThisFollowedByTheseLines(sStr, sFirst, asFollowingLines):
600 """
601 Looks for a line contining sFirst which is then followed by lines
602 with the strings in asFollowingLines. (No newline chars anywhere!)
603 Returns True / False.
604 """
605 off = sStr.find(sFirst, 0);
606 while off >= 0:
607 if VirtualTestSheriff.matchFollowedByLines(sStr, off, asFollowingLines):
608 return True;
609 off = sStr.find(sFirst, off + 1);
610 return False;
611
612 @staticmethod
613 def findAndReturnResetOfLine(sHaystack, sNeedle):
614 """
615 Looks for sNeedle in sHaystack.
616 Returns The text following the needle up to the end of the line.
617 Returns None if not found.
618 """
619 if sHaystack is None:
620 return None;
621 off = sHaystack.find(sNeedle);
622 if off < 0:
623 return None;
624 off += len(sNeedle)
625 offEol = sHaystack.find('\n', off);
626 if offEol < 0:
627 offEol = len(sHaystack);
628 return sHaystack[off:offEol]
629
630 @staticmethod
631 def findInAnyAndReturnResetOfLine(asHaystacks, sNeedle):
632 """
633 Looks for sNeedle in zeroe or more haystacks (asHaystack).
634 Returns The text following the first needed found up to the end of the line.
635 Returns None if not found.
636 """
637 for sHaystack in asHaystacks:
638 sRet = VirtualTestSheriff.findAndReturnResetOfLine(sHaystack, sNeedle);
639 if sRet is not None:
640 return sRet;
641 return None;
642
643
644 #
645 # The investigative units.
646 #
647
648 katSimpleInstallUninstallMainLogReasons = [
649 # ( Whether to stop on hit, reason tuple, needle text. )
650 ];
651
652 kdatSimpleInstallUninstallMainLogReasonsPerOs = {
653 'darwin': [
654 # ( Whether to stop on hit, reason tuple, needle text. )
655 ( True, ktReason_Host_DriverNotUnloading,
656 'Can\'t remove kext org.virtualbox.kext.VBoxDrv; services failed to terminate - 0xe00002c7' ),
657 ],
658 };
659
660
661 def investigateInstallUninstallFailure(self, oCaseFile, oFailedResult, sResultLog, fInstall):
662 """
663 Investigates an install or uninstall failure.
664
665 We lump the two together since the installation typically also performs
666 an uninstall first and will be seeing similar issues to the uninstall.
667 """
668 _ = fInstall;
669
670 atSimple = self.katSimpleInstallUninstallMainLogReasons;
671 if oCaseFile.oTestBox.sOs in self.kdatSimpleInstallUninstallMainLogReasonsPerOs:
672 atSimple = self.kdatSimpleInstallUninstallMainLogReasonsPerOs[oCaseFile.oTestBox.sOs] + atSimple;
673
674 fFoundSomething = False;
675 for fStopOnHit, tReason, sNeedle in atSimple:
676 if sResultLog.find(sNeedle) > 0:
677 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
678 if fStopOnHit:
679 return True;
680 fFoundSomething = True;
681
682 return fFoundSomething if fFoundSomething else None;
683
684
685 def investigateBadTestBox(self, oCaseFile):
686 """
687 Checks out bad-testbox statuses.
688 """
689 _ = oCaseFile;
690 return False;
691
692
693 def investigateVBoxUnitTest(self, oCaseFile):
694 """
695 Checks out a VBox unittest problem.
696 """
697
698 #
699 # Process simple test case failures first, using their name as reason.
700 # We do the reason management just like for BSODs.
701 #
702 cRelevantOnes = 0;
703 sMainLog = oCaseFile.getMainLog();
704 aoFailedResults = oCaseFile.oTree.getListOfFailures();
705 for oFailedResult in aoFailedResults:
706 if oFailedResult is oCaseFile.oTree:
707 self.vprint('TODO: toplevel failure');
708 cRelevantOnes += 1
709
710 elif oFailedResult.sName == 'Installing VirtualBox':
711 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
712 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
713 cRelevantOnes += 1
714
715 elif oFailedResult.sName == 'Uninstalling VirtualBox':
716 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
717 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
718 cRelevantOnes += 1
719
720 elif oFailedResult.oParent is not None:
721 # Get the 2nd level node because that's where we'll find the unit test name.
722 while oFailedResult.oParent.oParent is not None:
723 oFailedResult = oFailedResult.oParent;
724
725 # Only report a failure once.
726 if oFailedResult.idTestResult not in oCaseFile.dReasonForResultId:
727 sKey = oFailedResult.sName;
728 if sKey.startswith('testcase/'):
729 sKey = sKey[9:];
730 if sKey in self.asUnitTestReasons:
731 tReason = ( self.ksUnitTestCategory, sKey );
732 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
733 else:
734 self.dprint(u'Unit test failure "%s" not found in %s;' % (sKey, self.asUnitTestReasons));
735 tReason = ( self.ksUnitTestCategory, self.ksUnitTestAddNew );
736 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sKey);
737 cRelevantOnes += 1
738 else:
739 self.vprint(u'Internal error: expected oParent to NOT be None for %s' % (oFailedResult,));
740
741 #
742 # If we've caught all the relevant ones by now, report the result.
743 #
744 if len(oCaseFile.dReasonForResultId) >= cRelevantOnes:
745 return self.caseClosed(oCaseFile);
746 return False;
747
748 def extractGuestCpuStack(self, sInfoText):
749 """
750 Extracts the guest CPU stacks from the input file.
751
752 Returns a dictionary keyed by the CPU number, value being a list of
753 raw stack lines (no header).
754 Returns empty dictionary if no stacks where found.
755 """
756 dRet = {};
757 off = 0;
758 while True:
759 # Find the stack.
760 offStart = sInfoText.find('=== start guest stack VCPU ', off);
761 if offStart < 0:
762 break;
763 offEnd = sInfoText.find('=== end guest stack', offStart + 20);
764 if offEnd >= 0:
765 offEnd += 3;
766 else:
767 offEnd = sInfoText.find('=== start guest stack VCPU', offStart + 20);
768 if offEnd < 0:
769 offEnd = len(sInfoText);
770
771 sStack = sInfoText[offStart : offEnd];
772 sStack = sStack.replace('\r',''); # paranoia
773 asLines = sStack.split('\n');
774
775 # Figure the CPU.
776 asWords = asLines[0].split();
777 if len(asWords) < 6 or not asWords[5].isdigit():
778 break;
779 iCpu = int(asWords[5]);
780
781 # Add it and advance.
782 dRet[iCpu] = [sLine.rstrip() for sLine in asLines[2:-1]]
783 off = offEnd;
784 return dRet;
785
786 def investigateInfoKvmLockSpinning(self, oCaseFile, sInfoText, dLogs):
787 """ Investigates kvm_lock_spinning deadlocks """
788 #
789 # Extract the stacks. We need more than one CPU to create a deadlock.
790 #
791 dStacks = self.extractGuestCpuStack(sInfoText);
792 self.dprint('kvm_lock_spinning: found %s stacks' % (len(dStacks),));
793 if len(dStacks) >= 2:
794 #
795 # Examin each of the stacks. Each must have kvm_lock_spinning in
796 # one of the first three entries.
797 #
798 cHits = 0;
799 for iCpu in dStacks:
800 asBacktrace = dStacks[iCpu];
801 for iFrame in xrange(min(3, len(asBacktrace))):
802 if asBacktrace[iFrame].find('kvm_lock_spinning') >= 0:
803 cHits += 1;
804 break;
805 self.dprint('kvm_lock_spinning: %s/%s hits' % (cHits, len(dStacks),));
806 if cHits == len(dStacks):
807 return (True, self.ktReason_VMM_kvm_lock_spinning);
808
809 _ = dLogs; _ = oCaseFile;
810 return (False, None);
811
812 ## Things we search a main or VM log for to figure out why something went bust.
813 katSimpleMainAndVmLogReasons = [
814 # ( Whether to stop on hit, reason tuple, needle text. )
815 ( False, ktReason_Guru_Generic, 'GuruMeditation' ),
816 ( False, ktReason_Guru_Generic, 'Guru Meditation' ),
817 ( True, ktReason_Guru_VERR_IEM_INSTR_NOT_IMPLEMENTED, 'VERR_IEM_INSTR_NOT_IMPLEMENTED' ),
818 ( True, ktReason_Guru_VERR_IEM_ASPECT_NOT_IMPLEMENTED, 'VERR_IEM_ASPECT_NOT_IMPLEMENTED' ),
819 ( True, ktReason_Guru_VERR_TRPM_DONT_PANIC, 'VERR_TRPM_DONT_PANIC' ),
820 ( True, ktReason_Guru_VERR_PGM_PHYS_PAGE_RESERVED, 'VERR_PGM_PHYS_PAGE_RESERVED' ),
821 ( True, ktReason_Guru_VERR_VMX_INVALID_GUEST_STATE, 'VERR_VMX_INVALID_GUEST_STATE' ),
822 ( True, ktReason_Guru_VINF_EM_TRIPLE_FAULT, 'VINF_EM_TRIPLE_FAULT' ),
823 ( True, ktReason_Networking_Nonexistent_host_nic,
824 'rc=E_FAIL text="Nonexistent host networking interface, name \'eth0\' (VERR_INTERNAL_ERROR)"' ),
825 ( True, ktReason_Host_Reboot_OSX_Watchdog_Timeout, ': "OSX Watchdog Timeout: ' ),
826 ( False, ktReason_XPCOM_NS_ERROR_CALL_FAILED,
827 'Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))' ),
828 ( True, ktReason_Host_HostMemoryLow, 'HostMemoryLow' ),
829 ( True, ktReason_Host_HostMemoryLow, 'Failed to procure handy pages; rc=VERR_NO_MEMORY' ),
830 ( True, ktReason_Unknown_File_Not_Found,
831 'Error: failed to start machine. Error message: File not found. (VERR_FILE_NOT_FOUND)' ),
832 ( True, ktReason_Unknown_File_Not_Found, # lump it in with file-not-found for now.
833 'Error: failed to start machine. Error message: Not supported. (VERR_NOT_SUPPORTED)' ),
834 ( False, ktReason_Unknown_VM_Crash, 'txsDoConnectViaTcp: Machine state: Aborted' )
835 ];
836
837 ## Things we search a VBoxHardening.log file for to figure out why something went bust.
838 katSimpleVBoxHardeningLogReasons = [
839 # ( Whether to stop on hit, reason tuple, needle text. )
840 ( True, ktReason_Host_DriverNotLoaded, 'Error opening VBoxDrvStub: STATUS_OBJECT_NAME_NOT_FOUND' ),
841 ( True, ktReason_Host_NotSignedWithBuildCert, 'Not signed with the build certificate' ),
842 ];
843
844 ## Things we search a kernel.log file for to figure out why something went bust.
845 katSimpleKernelLogReasons = [
846 # ( Whether to stop on hit, reason tuple, needle text. )
847 ( True, ktReason_Panic_HugeMemory, 'mm/huge_memory.c:1988' ),
848 ( True, ktReason_Panic_IOAPICDoesntWork, 'IO-APIC + timer doesn''t work' ),
849 ( True, ktReason_Panic_TxUnitHang, 'Detected Tx Unit Hang' ),
850 ];
851
852 ## Things we search the _RIGHT_ _STRIPPED_ vgatext for.
853 katSimpleVgaTextReasons = [
854 # ( Whether to stop on hit, reason tuple, needle text. )
855 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
856 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n\n" ),
857 ( True, ktReason_Panic_MP_BIOS_IO_APIC,
858 "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"
859 "...trying to set up timer (IRQ0) through the 8259A ... failed.\n"
860 "...trying to set up timer as Virtual Wire IRQ... failed.\n"
861 "...trying to set up timer as ExtINT IRQ... failed :(.\n"
862 "Kernel panic - not syncing: IO-APIC + timer doesn't work! Boot with apic=debug\n"
863 "and send a report. Then try booting with the 'noapic' option\n"
864 "\n" ),
865 ( True, ktReason_OSInstall_GRUB_hang,
866 "-----\nGRUB Loading stage2..\n\n\n\n" ),
867 ( True, ktReason_Panic_BootManagerC000000F,
868 "Windows failed to start. A recent hardware or software change might be the" ),
869 ];
870
871 ## Things we search for in the info.txt file. Require handlers for now.
872 katInfoTextHandlers = [
873 # ( Trigger text, handler method )
874 ( "kvm_lock_spinning", investigateInfoKvmLockSpinning ),
875 ];
876
877 ## Mapping screenshot/failure SHA-256 hashes to failure reasons.
878 katSimpleScreenshotHashReasons = [
879 # ( Whether to stop on hit, reason tuple, lowercased sha-256 of PIL.Image.tostring output )
880 ( True, ktReason_BSOD_Recovery, '576f8e38d62b311cac7e3dc3436a0d0b9bd8cfd7fa9c43aafa95631520a45eac' ),
881 ( True, ktReason_BSOD_Automatic_Repair, 'c6a72076cc619937a7a39cfe9915b36d94cee0d4e3ce5ce061485792dcee2749' ),
882 ( True, ktReason_BSOD_Automatic_Repair, '26c4d8a724ff2c5e1051f3d5b650dbda7b5fdee0aa3e3c6059797f7484a515df' ),
883 ( True, ktReason_BSOD_C0000225, 'bd13a144be9dcdfb16bc863ff4c8f02a86e263c174f2cd5ffd27ca5f3aa31789' ),
884 ( True, ktReason_BSOD_C0000225, '8348b465e7ee9e59dd4e785880c57fd8677de05d11ac21e786bfde935307b42f' ),
885 ( True, ktReason_BSOD_C0000225, '1316e1fc818a73348412788e6910b8c016f237d8b4e15b20caf4a866f7a7840e' ),
886 ( True, ktReason_BSOD_C0000225, '54e0acbff365ce20a85abbe42bcd53647b8b9e80c68e45b2cd30e86bf177a0b5' ),
887 ( True, ktReason_BSOD_C0000225, '50fec50b5199923fa48b3f3e782687cc381e1c8a788ebda14e6a355fbe3bb1b3' ),
888 ];
889
890 def investigateVMResult(self, oCaseFile, oFailedResult, sResultLog):
891 """
892 Investigates a failed VM run.
893 """
894
895 def investigateLogSet():
896 """
897 Investigates the current set of VM related logs.
898 """
899 self.dprint('investigateLogSet: lengths: result log %u, VM log %u, kernel log %u, vga text %u, info text %u'
900 % ( len(sResultLog) if sResultLog is not None else 0,
901 len(sVMLog) if sVMLog is not None else 0,
902 len(sKrnlLog) if sKrnlLog is not None else 0,
903 len(sVgaText) if sVgaText is not None else 0,
904 len(sInfoText) if sInfoText is not None else 0, ));
905
906 #self.dprint(u'main.log<<<\n%s\n<<<\n' % (sResultLog,));
907 #self.dprint(u'vbox.log<<<\n%s\n<<<\n' % (sVMLog,));
908 #self.dprint(u'krnl.log<<<\n%s\n<<<\n' % (sKrnlLog,));
909 #self.dprint(u'vgatext.txt<<<\n%s\n<<<\n' % (sVgaText,));
910 #self.dprint(u'info.txt<<<\n%s\n<<<\n' % (sInfoText,));
911
912 # TODO: more
913
914 #
915 # Look for BSODs. Some stupid stupid inconsistencies in reason and log messages here, so don't try prettify this.
916 #
917 sDetails = self.findInAnyAndReturnResetOfLine([ sVMLog, sResultLog ],
918 'GIM: HyperV: Guest indicates a fatal condition! P0=');
919 if sDetails is not None:
920 # P0=%#RX64 P1=%#RX64 P2=%#RX64 P3=%#RX64 P4=%#RX64 "
921 sKey = sDetails.split(' ', 1)[0];
922 try: sKey = '0x%08X' % (int(sKey, 16),);
923 except: pass;
924 if sKey in self.asBsodReasons:
925 tReason = ( self.ksBsodCategory, sKey );
926 elif sKey.lower() in self.asBsodReasons: # just in case.
927 tReason = ( self.ksBsodCategory, sKey.lower() );
928 else:
929 self.dprint(u'BSOD "%s" not found in %s;' % (sKey, self.asBsodReasons));
930 tReason = ( self.ksBsodCategory, self.ksBsodAddNew );
931 return oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult, sComment = sDetails.strip());
932
933 #
934 # Look for linux panic.
935 #
936 if sKrnlLog is not None:
937 for fStopOnHit, tReason, sNeedle in self.katSimpleKernelLogReasons:
938 if sKrnlLog.find(sNeedle) > 0:
939 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
940 if fStopOnHit:
941 return True;
942 fFoundSomething = True;
943
944 #
945 # Loop thru the simple stuff.
946 #
947 fFoundSomething = False;
948 for fStopOnHit, tReason, sNeedle in self.katSimpleMainAndVmLogReasons:
949 if sResultLog.find(sNeedle) > 0 or (sVMLog is not None and sVMLog.find(sNeedle) > 0):
950 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
951 if fStopOnHit:
952 return True;
953 fFoundSomething = True;
954
955 # Continue with vga text.
956 if sVgaText is not None and len(sVgaText) > 0:
957 for fStopOnHit, tReason, sNeedle in self.katSimpleVgaTextReasons:
958 if sVgaText.find(sNeedle) > 0:
959 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
960 if fStopOnHit:
961 return True;
962 fFoundSomething = True;
963 _ = sInfoText;
964
965 # Continue with screen hashes.
966 if sScreenHash is not None:
967 for fStopOnHit, tReason, sHash in self.katSimpleScreenshotHashReasons:
968 if sScreenHash == sHash:
969 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
970 if fStopOnHit:
971 return True;
972 fFoundSomething = True;
973
974 # Check VBoxHardening.log.
975 if sNtHardLog is not None:
976 for fStopOnHit, tReason, sNeedle in self.katSimpleVBoxHardeningLogReasons:
977 if sNtHardLog.find(sNeedle) > 0:
978 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
979 if fStopOnHit:
980 return True;
981 fFoundSomething = True;
982
983 #
984 # Complicated stuff.
985 #
986 dLogs = {
987 'sVMLog': sVMLog,
988 'sNtHardLog': sNtHardLog,
989 'sScreenHash': sScreenHash,
990 'sKrnlLog': sKrnlLog,
991 'sVgaText': sVgaText,
992 'sInfoText': sInfoText,
993 };
994
995 # info.txt.
996 if sInfoText is not None and len(sInfoText) > 0:
997 for sNeedle, fnHandler in self.katInfoTextHandlers:
998 if sInfoText.find(sNeedle) > 0:
999 (fStop, tReason) = fnHandler(self, oCaseFile, sInfoText, dLogs);
1000 if tReason is not None:
1001 oCaseFile.noteReasonForId(tReason, oFailedResult.idTestResult);
1002 if fStop:
1003 return True;
1004 fFoundSomething = True;
1005
1006 #
1007 # Check for repeated reboots...
1008 #
1009 if sVMLog is not None:
1010 cResets = sVMLog.count('Changing the VM state from \'RUNNING\' to \'RESETTING\'');
1011 if cResets > 10:
1012 return oCaseFile.noteReasonForId(self.ktReason_Unknown_Reboot_Loop, oFailedResult.idTestResult,
1013 sComment = 'Counted %s reboots' % (cResets,));
1014
1015 return fFoundSomething;
1016
1017 #
1018 # Check if we got any VM or/and kernel logs. Treat them as sets in
1019 # case we run multiple VMs here (this is of course ASSUMING they
1020 # appear in the order that terminateVmBySession uploads them).
1021 #
1022 cTimes = 0;
1023 sVMLog = None;
1024 sNtHardLog = None;
1025 sScreenHash = None;
1026 sKrnlLog = None;
1027 sVgaText = None;
1028 sInfoText = None;
1029 for oFile in oFailedResult.aoFiles:
1030 if oFile.sKind == TestResultFileData.ksKind_LogReleaseVm:
1031 if 'VBoxHardening.log' not in oFile.sFile:
1032 if sVMLog is not None:
1033 if investigateLogSet() is True:
1034 return True;
1035 cTimes += 1;
1036 sInfoText = None;
1037 sVgaText = None;
1038 sKrnlLog = None;
1039 sScreenHash = None;
1040 sNtHardLog = None;
1041 sVMLog = oCaseFile.getLogFile(oFile);
1042 else:
1043 sNtHardLog = oCaseFile.getLogFile(oFile);
1044 elif oFile.sKind == TestResultFileData.ksKind_LogGuestKernel:
1045 sKrnlLog = oCaseFile.getLogFile(oFile);
1046 elif oFile.sKind == TestResultFileData.ksKind_InfoVgaText:
1047 sVgaText = '\n'.join([sLine.rstrip() for sLine in oCaseFile.getLogFile(oFile).split('\n')]);
1048 elif oFile.sKind == TestResultFileData.ksKind_InfoCollection:
1049 sInfoText = oCaseFile.getLogFile(oFile);
1050 elif oFile.sKind == TestResultFileData.ksKind_ScreenshotFailure:
1051 sScreenHash = oCaseFile.getScreenshotSha256(oFile);
1052 if sScreenHash is not None:
1053 sScreenHash = sScreenHash.lower();
1054 self.vprint(u'%s %s' % ( sScreenHash, oFile.sFile,));
1055
1056 if ( sVMLog is not None \
1057 or sNtHardLog is not None \
1058 or cTimes == 0) \
1059 and investigateLogSet() is True:
1060 return True;
1061
1062 return None;
1063
1064
1065 def isResultFromVMRun(self, oFailedResult, sResultLog):
1066 """
1067 Checks if this result and corresponding log snippet looks like a VM run.
1068 """
1069
1070 # Look for startVmEx/ startVmAndConnectToTxsViaTcp and similar output in the log.
1071 if sResultLog.find(' startVm') > 0:
1072 return True;
1073
1074 # Any other indicators? No?
1075 _ = oFailedResult;
1076 return False;
1077
1078 def investigateVBoxVMTest(self, oCaseFile, fSingleVM):
1079 """
1080 Checks out a VBox VM test.
1081
1082 This is generic investigation of a test running one or more VMs, like
1083 for example a smoke test or a guest installation test.
1084
1085 The fSingleVM parameter is a hint, which probably won't come in useful.
1086 """
1087 _ = fSingleVM;
1088
1089 #
1090 # Get a list of test result failures we should be looking into and the main log.
1091 #
1092 aoFailedResults = oCaseFile.oTree.getListOfFailures();
1093 sMainLog = oCaseFile.getMainLog();
1094
1095 #
1096 # There are a set of errors ending up on the top level result record.
1097 # Should deal with these first.
1098 #
1099 if len(aoFailedResults) == 1 and aoFailedResults[0] == oCaseFile.oTree:
1100 # Check if we've just got that XPCOM client smoke test shutdown issue. This will currently always
1101 # be reported on the top result because vboxinstall.py doesn't add an error for it. It is easy to
1102 # ignore other failures in the test if we're not a little bit careful here.
1103 if sMainLog.find('vboxinstaller: Exit code: -11 (') > 0:
1104 oCaseFile.noteReason(self.ktReason_XPCOM_Exit_Minus_11);
1105 return self.caseClosed(oCaseFile);
1106
1107 # Hang after starting VBoxSVC (e.g. idTestSet=136307258)
1108 if self.isThisFollowedByTheseLines(sMainLog, 'oVBoxMgr=<vboxapi.VirtualBoxManager object at',
1109 (' Timeout: ', ' Attempting to abort child...',) ):
1110 if sMainLog.find('*** glibc detected *** /') > 0:
1111 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang_Plus_Heap_Corruption);
1112 else:
1113 oCaseFile.noteReason(self.ktReason_XPCOM_VBoxSVC_Hang);
1114 return self.caseClosed(oCaseFile);
1115
1116 # Look for heap corruption without visible hang.
1117 if sMainLog.find('*** glibc detected *** /') > 0 \
1118 or sMainLog.find("-1073740940") > 0: # STATUS_HEAP_CORRUPTION / 0xc0000374
1119 oCaseFile.noteReason(self.ktReason_Unknown_Heap_Corruption);
1120 return self.caseClosed(oCaseFile);
1121
1122 # Out of memory w/ timeout.
1123 if sMainLog.find('sErrId=HostMemoryLow') > 0:
1124 oCaseFile.noteReason(self.ktReason_Host_HostMemoryLow);
1125 return self.caseClosed(oCaseFile);
1126
1127 # Stale files like vts_rm.exe (windows).
1128 offEnd = sMainLog.rfind('*** The test driver exits successfully. ***');
1129 if offEnd > 0 and sMainLog.find('[Error 145] The directory is not empty: ', offEnd) > 0:
1130 oCaseFile.noteReason(self.ktReason_Ignore_Stale_Files);
1131 return self.caseClosed(oCaseFile);
1132
1133 #
1134 # XPCOM screwup
1135 #
1136 if sMainLog.find('AttributeError: \'NoneType\' object has no attribute \'addObserver\'') > 0:
1137 oCaseFile.noteReason(self.ktReason_Buggy_Build_Broken_Build);
1138 return self.caseClosed(oCaseFile);
1139
1140 #
1141 # Go thru each failed result.
1142 #
1143 for oFailedResult in aoFailedResults:
1144 self.dprint(u'Looking at test result #%u - %s' % (oFailedResult.idTestResult, oFailedResult.getFullName(),));
1145 sResultLog = TestSetData.extractLogSectionElapsed(sMainLog, oFailedResult.tsCreated, oFailedResult.tsElapsed);
1146 if oFailedResult.sName == 'Installing VirtualBox':
1147 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = True)
1148
1149 elif oFailedResult.sName == 'Uninstalling VirtualBox':
1150 self.investigateInstallUninstallFailure(oCaseFile, oFailedResult, sResultLog, fInstall = False)
1151
1152 elif self.isResultFromVMRun(oFailedResult, sResultLog):
1153 self.investigateVMResult(oCaseFile, oFailedResult, sResultLog);
1154
1155 elif sResultLog.find('Exception: 0x800706be (Call to remote object failed (NS_ERROR_CALL_FAILED))') > 0:
1156 oCaseFile.noteReasonForId(self.ktReason_XPCOM_NS_ERROR_CALL_FAILED, oFailedResult.idTestResult);
1157
1158 elif sResultLog.find('The machine is not mutable (state is ') > 0:
1159 self.vprint('Ignoring "machine not mutable" error as it is probably due to an earlier problem');
1160 oCaseFile.noteReasonForId(self.ktHarmless, oFailedResult.idTestResult);
1161
1162 elif sResultLog.find('** error: no action was specified') > 0 \
1163 or sResultLog.find('(len(self._asXml, asText))') > 0:
1164 oCaseFile.noteReasonForId(self.ktReason_Ignore_Buggy_Test_Driver, oFailedResult.idTestResult);
1165
1166 else:
1167 self.vprint(u'TODO: Cannot place idTestResult=%u - %s' % (oFailedResult.idTestResult, oFailedResult.sName,));
1168 self.dprint(u'%s + %s <<\n%s\n<<' % (oFailedResult.tsCreated, oFailedResult.tsElapsed, sResultLog,));
1169
1170 #
1171 # Report home and close the case if we got them all, otherwise log it.
1172 #
1173 if len(oCaseFile.dReasonForResultId) >= len(aoFailedResults):
1174 return self.caseClosed(oCaseFile);
1175
1176 if len(oCaseFile.dReasonForResultId) > 0:
1177 self.vprint(u'TODO: Got %u out of %u - close, but no cigar. :-/'
1178 % (len(oCaseFile.dReasonForResultId), len(aoFailedResults)));
1179 else:
1180 self.vprint(u'XXX: Could not figure out anything at all! :-(');
1181 return False;
1182
1183
1184 def reasoningFailures(self):
1185 """
1186 Guess the reason for failures.
1187 """
1188 #
1189 # Get a list of failed test sets without any assigned failure reason.
1190 #
1191 cGot = 0;
1192 aoTestSets = self.oTestSetLogic.fetchFailedSetsWithoutReason(cHoursBack = self.oConfig.cHoursBack, tsNow = self.tsNow);
1193 for oTestSet in aoTestSets:
1194 self.dprint(u'');
1195 self.dprint(u'reasoningFailures: Checking out test set #%u, status %s' % ( oTestSet.idTestSet, oTestSet.enmStatus,))
1196
1197 #
1198 # Open a case file and assign it to the right investigator.
1199 #
1200 (oTree, _ ) = self.oTestResultLogic.fetchResultTree(oTestSet.idTestSet);
1201 oBuild = BuildDataEx().initFromDbWithId( self.oDb, oTestSet.idBuild, oTestSet.tsCreated);
1202 oTestBox = TestBoxData().initFromDbWithGenId( self.oDb, oTestSet.idGenTestBox);
1203 oTestGroup = TestGroupData().initFromDbWithId( self.oDb, oTestSet.idTestGroup, oTestSet.tsCreated);
1204 oTestCase = TestCaseDataEx().initFromDbWithGenId( self.oDb, oTestSet.idGenTestCase, oTestSet.tsConfig);
1205
1206 oCaseFile = VirtualTestSheriffCaseFile(self, oTestSet, oTree, oBuild, oTestBox, oTestGroup, oTestCase);
1207
1208 if oTestSet.enmStatus == TestSetData.ksTestStatus_BadTestBox:
1209 self.dprint(u'investigateBadTestBox is taking over %s.' % (oCaseFile.sLongName,));
1210 fRc = self.investigateBadTestBox(oCaseFile);
1211
1212 elif oCaseFile.isVBoxUnitTest():
1213 self.dprint(u'investigateVBoxUnitTest is taking over %s.' % (oCaseFile.sLongName,));
1214 fRc = self.investigateVBoxUnitTest(oCaseFile);
1215
1216 elif oCaseFile.isVBoxInstallTest():
1217 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1218 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1219
1220 elif oCaseFile.isVBoxUSBTest():
1221 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1222 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1223
1224 elif oCaseFile.isVBoxStorageTest():
1225 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1226 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1227
1228 elif oCaseFile.isVBoxGAsTest():
1229 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1230 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1231
1232 elif oCaseFile.isVBoxAPITest():
1233 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1234 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = True);
1235
1236 elif oCaseFile.isVBoxBenchmarkTest():
1237 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1238 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1239
1240 elif oCaseFile.isVBoxSmokeTest():
1241 self.dprint(u'investigateVBoxVMTest is taking over %s.' % (oCaseFile.sLongName,));
1242 fRc = self.investigateVBoxVMTest(oCaseFile, fSingleVM = False);
1243
1244 else:
1245 self.vprint(u'reasoningFailures: Unable to classify test set: %s' % (oCaseFile.sLongName,));
1246 fRc = False;
1247 cGot += fRc is True;
1248
1249 self.vprint(u'reasoningFailures: Got %u out of %u' % (cGot, len(aoTestSets), ));
1250 return 0;
1251
1252
1253 def main(self):
1254 """
1255 The 'main' function.
1256 Return exit code (0, 1, etc).
1257 """
1258 # Database stuff.
1259 self.oDb = TMDatabaseConnection()
1260 self.oTestResultLogic = TestResultLogic(self.oDb);
1261 self.oTestSetLogic = TestSetLogic(self.oDb);
1262 self.oFailureReasonLogic = FailureReasonLogic(self.oDb);
1263 self.oTestResultFailureLogic = TestResultFailureLogic(self.oDb);
1264 self.asBsodReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksBsodCategory);
1265 self.asUnitTestReasons = self.oFailureReasonLogic.fetchForSheriffByNamedCategory(self.ksUnitTestCategory);
1266
1267 # Get a fix on our 'now' before we do anything..
1268 self.oDb.execute('SELECT CURRENT_TIMESTAMP - interval \'%s hours\'', (self.oConfig.cStartHoursAgo,));
1269 self.tsNow = self.oDb.fetchOne();
1270
1271 # If we're suppost to commit anything we need to get our user ID.
1272 rcExit = 0;
1273 if self.oConfig.fRealRun:
1274 self.oLogin = UserAccountLogic(self.oDb).tryFetchAccountByLoginName(VirtualTestSheriff.ksLoginName);
1275 if self.oLogin is None:
1276 rcExit = self.eprint('Cannot find my user account "%s"!' % (VirtualTestSheriff.ksLoginName,));
1277 else:
1278 self.uidSelf = self.oLogin.uid;
1279
1280 #
1281 # Do the stuff.
1282 #
1283 if rcExit == 0:
1284 rcExit = self.selfCheck();
1285 if rcExit == 0:
1286 rcExit = self.badTestBoxManagement();
1287 rcExit2 = self.reasoningFailures();
1288 if rcExit == 0:
1289 rcExit = rcExit2;
1290 # Redo the bad testbox management after failure reasons have been assigned (got timing issues).
1291 if rcExit == 0:
1292 rcExit = self.badTestBoxManagement();
1293
1294 # Cleanup.
1295 self.oFailureReasonLogic = None;
1296 self.oTestResultFailureLogic = None;
1297 self.oTestSetLogic = None;
1298 self.oTestResultLogic = None;
1299 self.oDb.close();
1300 self.oDb = None;
1301 if self.oLogFile is not None:
1302 self.oLogFile.close();
1303 self.oLogFile = None;
1304 return rcExit;
1305
1306if __name__ == '__main__':
1307 sys.exit(VirtualTestSheriff().main());
1308
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette