1 // Copyright 2008 Google Inc. All Rights Reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 // error_diag.h: Ambiguous error diagnosis class
17 #ifndef STRESSAPPTEST_ERROR_DIAG_H_
18 #define STRESSAPPTEST_ERROR_DIAG_H_
26 // This file must work with autoconf on its public version,
27 // so these includes are correct.
33 // This describes the components of the system.
36 explicit DeviceTree(string name);
39 // Atomically find arbitrary device in subtree.
40 DeviceTree *FindInSubTree(string name);
41 // Find or add named device.
42 DeviceTree *FindOrAddDevice(string name);
43 // Atomically add sub device.
44 void InsertSubDevice(string name);
45 // Returns parent device.
46 DeviceTree *GetParent() { return parent_; }
47 // Pretty prints device tree.
48 void PrettyPrint(string spacer = " ");
49 // Atomically add error instance to device.
50 void AddErrorInstance(ErrorInstance *error_instance);
51 // Returns true of device is known to be bad.
53 // Returns number of direct sub devices.
54 int NumDirectSubDevices() { return subdevices_.size(); }
57 // Unlocked version of FindInSubTree.
58 DeviceTree *UnlockedFindInSubTree(string name);
60 std::map<string, DeviceTree*> subdevices_; // Map of sub-devices.
61 std::list<ErrorInstance*> errors_; // Log of errors.
62 DeviceTree *parent_; // Pointer to parent device.
63 string name_; // Device name.
64 pthread_mutex_t device_tree_mutex_; // Mutex protecting device tree.
68 // enum type for collected errors.
76 // enum type for error severity.
77 enum SATErrorSeverity {
78 SAT_ERROR_CORRECTABLE = 0,
82 // This describes an error and it's likely causes.
85 ErrorInstance(): type_(SAT_ERROR_NONE), severity_(SAT_ERROR_CORRECTABLE) {}
87 SATErrorType type_; // Type of error: ECC, miscompare, sector.
88 SATErrorSeverity severity_; // Correctable, or fatal.
89 std::set<DeviceTree*> causes_; // Devices that can cause this type of error.
92 // This describes ECC errors.
93 class ECCErrorInstance: public ErrorInstance {
95 ECCErrorInstance() { type_ = SAT_ERROR_ECC; }
97 uint64 addr_; // Address where error occured.
100 // This describes miscompare errors.
101 class MiscompareErrorInstance: public ErrorInstance {
103 MiscompareErrorInstance() { type_ = SAT_ERROR_MISCOMPARE; }
105 uint64 addr_; // Address where miscompare occured.
108 // This describes HDD miscompare errors.
109 class HDDMiscompareErrorInstance: public MiscompareErrorInstance {
111 uint64 addr2_; // addr_ and addr2_ are src and dst memory addr.
112 int offset_; // offset.
113 int block_; // error block.
116 // This describes HDD miscompare errors.
117 class HDDSectorTagErrorInstance: public ErrorInstance {
119 HDDSectorTagErrorInstance() { type_ = SAT_ERROR_SECTOR_TAG; }
122 uint64 addr2_; // addr_ and addr2_ are src and dst memory addr.
123 int sector_; // error sector.
124 int block_; // error block.
127 // Generic error storage and sorting class.
131 virtual ~ErrorDiag();
133 // Add info about a CECC.
134 virtual int AddCeccError(string dimm_string);
136 // Add info about a UECC.
137 virtual int AddUeccError(string dimm_string);
139 // Add info about a miscompare.
140 virtual int AddMiscompareError(string dimm_string, uint64 addr, int count);
142 // Add info about a miscompare from a drive.
143 virtual int AddHDDMiscompareError(string devicename, int block, int offset,
144 void *src_addr, void *dst_addr);
146 // Add info about a sector tag miscompare from a drive.
147 virtual int AddHDDSectorTagError(string devicename, int block, int offset,
148 int sector, void *src_addr, void *dst_addr);
150 // Set platform specific handle and initialize device tree.
151 bool set_os(OsLayer *os);
154 // Create and initialize system device tree.
155 virtual bool InitializeDeviceTree();
157 // Utility Function to translate a virtual address to DIMM number.
158 string AddressToDimmString(OsLayer *os, void *addr, int offset);
160 DeviceTree *system_tree_root_; // System device tree.
161 OsLayer *os_; // Platform handle.
164 DISALLOW_COPY_AND_ASSIGN(ErrorDiag);
167 #endif // STRESSAPPTEST_ERROR_DIAG_H_