1 // Copyright 2006 Google Inc. All Rights Reserved.
4 // os.cc : os and machine specific implementation
5 // Copyright 2006 Google Inc.
6 // for open source release under GPL
8 // This file includes an abstracted interface
9 // for linux-distro specific and HW specific
16 #include <linux/types.h>
22 #include <sys/ioctl.h>
24 #include <sys/types.h>
30 #define SHM_HUGETLB 04000 // remove when glibc defines it
36 // This file must work with autoconf on its public version,
37 // so these includes are correct.
39 #include "error_diag.h"
41 // OsLayer initialization.
46 error_injection_ = false;
48 time_initialized_ = 0;
54 num_cpus_per_node_ = 0;
56 err_log_callback_ = 0;
62 delete error_diagnoser_;
65 // OsLayer initialization.
66 bool OsLayer::Initialize() {
67 time_initialized_ = time(NULL);
68 use_hugepages_ = false;
72 num_cpus_ = sysconf(_SC_NPROCESSORS_ONLN);
73 num_cpus_per_node_ = num_cpus_ / num_nodes_;
75 logprintf(5, "Log: %d nodes, %d cpus.\n", num_nodes_, num_cpus_);
76 sat_assert(CPU_SETSIZE >= num_cpus_);
77 cpu_sets_.resize(num_nodes_);
78 cpu_sets_valid_.resize(num_nodes_);
79 // Create error diagnoser.
80 error_diagnoser_ = new ErrorDiag();
81 if (!error_diagnoser_->set_os(this))
86 // Machine type detected. Can we implement all these functions correctly?
87 bool OsLayer::IsSupported() {
88 // This is the default empty implementation.
89 // SAT won't really run correctly.
93 int OsLayer::AddressMode() {
94 // Detect 32/64 bit binary.
96 return sizeof(pvoid) * 8;
99 // Translates user virtual to physical address.
100 uint64 OsLayer::VirtualToPhysical(void *vaddr) {
101 // Needs platform specific implementation.
105 // Returns the HD device that contains this file.
106 string OsLayer::FindFileDevice(string filename) {
110 // Returns a list of locations corresponding to HD devices.
111 list<string> OsLayer::FindFileDevices() {
112 // No autodetection on unknown systems.
113 list<string> locations;
117 // We need to flush the cacheline here.
118 void OsLayer::Flush(void *vaddr) {
119 // Use the generic flush. This function is just so we can override
120 // this if we are so inclined.
124 // Translate user virtual to physical address.
125 int OsLayer::FindDimm(uint64 addr, char *buf, int len) {
127 snprintf(tmpbuf, sizeof(tmpbuf), "DIMM Unknown");
128 snprintf(buf, len, "%s", tmpbuf);
133 // Classifies addresses according to "regions"
134 // This isn't really implemented meaningfully here..
135 int32 OsLayer::FindRegion(uint64 addr) {
136 static bool warned = false;
138 if (regionsize_ == 0) {
139 regionsize_ = totalmemsize_ / 8;
140 if (regionsize_ < 512 * kMegabyte)
141 regionsize_ = 512 * kMegabyte;
142 regioncount_ = totalmemsize_ / regionsize_;
143 if (regioncount_ < 1) regioncount_ = 1;
146 int32 region_num = addr / regionsize_;
147 if (region_num >= regioncount_) {
149 logprintf(0, "Log: region number %d exceeds region count %d\n",
150 region_num, regioncount_);
153 region_num = region_num % regioncount_;
158 // Report which cores are associated with a given region.
159 cpu_set_t *OsLayer::FindCoreMask(int32 region) {
160 sat_assert(region >= 0);
161 region %= num_nodes_;
162 if (!cpu_sets_valid_[region]) {
163 CPU_ZERO(&cpu_sets_[region]);
164 for (int i = 0; i < num_cpus_per_node_; ++i) {
165 CPU_SET(i + region * num_cpus_per_node_, &cpu_sets_[region]);
167 logprintf(5, "Log: Region %d mask 0x%08X\n",
168 region, cpuset_to_uint32(&cpu_sets_[region]));
169 cpu_sets_valid_[region] = true;
171 return &cpu_sets_[region];
174 // Report an error in an easily parseable way.
175 bool OsLayer::ErrorReport(const char *part, const char *symptom, int count) {
176 time_t now = time(NULL);
177 int ttf = now - time_initialized_;
178 logprintf(0, "Report Error: %s : %s : %d : %ds\n", symptom, part, count, ttf);
182 // Read the number of hugepages out of the kernel interface in proc.
183 int64 OsLayer::FindHugePages() {
186 // This is a kernel interface to query the numebr of hugepages
187 // available in the system.
188 static const char *hugepages_info_file = "/proc/sys/vm/nr_hugepages";
189 int hpfile = open(hugepages_info_file, O_RDONLY);
191 ssize_t bytes_read = read(hpfile, buf, 64);
194 if (bytes_read <= 0) {
195 logprintf(12, "Log: /proc/sys/vm/nr_hugepages "
196 "read did not provide data\n");
200 if (bytes_read == 64) {
201 logprintf(0, "Process Error: /proc/sys/vm/nr_hugepages "
202 "is surprisingly large\n");
206 // Add a null termintation to be string safe.
207 buf[bytes_read] = '\0';
208 // Read the page count.
209 int64 pages = strtoull(buf, NULL, 10); // NOLINT
214 int64 OsLayer::FindFreeMemSize() {
217 if (totalmemsize_ > 0)
218 return totalmemsize_;
220 int64 pages = sysconf(_SC_PHYS_PAGES);
221 int64 avpages = sysconf(_SC_AVPHYS_PAGES);
222 int64 pagesize = sysconf(_SC_PAGESIZE);
223 int64 physsize = pages * pagesize;
224 int64 avphyssize = avpages * pagesize;
226 // Assume 2MB hugepages.
227 int64 hugepagesize = FindHugePages() * 2 * kMegabyte;
229 if ((pages == -1) || (pagesize == -1)) {
230 logprintf(0, "Process Error: sysconf could not determine memory size.\n");
234 // We want to leave enough stuff for things to run.
235 // If more than 2GB is present, leave 192M + 5% for other stuff.
236 // If less than 2GB is present use 85% of what's available.
237 // These are fairly arbitrary numbers that seem to work OK.
239 // TODO(nsanders): is there a more correct way to determine target
241 if (physsize < 2048LL * kMegabyte)
242 minsize = ((pages * 85) / 100) * pagesize;
244 minsize = ((pages * 95) / 100) * pagesize - (192 * kMegabyte);
246 // Use hugepage sizing if available.
247 if (hugepagesize > 0) {
248 if (hugepagesize < minsize) {
249 logprintf(0, "Procedural Error: Not enough hugepages. "
250 "%lldMB available < %lldMB required.\n",
251 hugepagesize / kMegabyte,
252 minsize / kMegabyte);
253 // Require the calculated minimum amount of memory.
256 // Require that we get all hugepages.
260 // Require the calculated minimum amount of memory.
264 logprintf(5, "Log: Total %lld MB. Free %lld MB. Hugepages %lld MB. "
265 "Targeting %lld MB (%lld%%)\n",
266 physsize / kMegabyte,
267 avphyssize / kMegabyte,
268 hugepagesize / kMegabyte,
270 size * 100 / physsize);
272 totalmemsize_ = size;
276 // Allocates all memory available.
277 int64 OsLayer::AllocateAllMem() {
278 int64 length = FindFreeMemSize();
279 bool retval = AllocateTestMem(length, 0);
286 // Allocate the target memory. This may be from malloc, hugepage pool
287 // or other platform specific sources.
288 bool OsLayer::AllocateTestMem(int64 length, uint64 paddr_base) {
289 // Try hugepages first.
293 logprintf(0, "Process Error: non zero paddr_base %#llx is not supported,"
294 " ignore.\n", paddr_base);
296 { // Allocate hugepage mapped memory.
300 if ((shmid = shmget(2, length,
301 SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W)) < 0) {
303 char errtxt[256] = "";
304 strerror_r(err, errtxt, sizeof(errtxt));
305 logprintf(12, "Log: failed to allocate shared mem object - err %d (%s)\n",
307 goto hugepage_failover;
310 shmaddr = shmat(shmid, NULL, NULL);
311 if (shmaddr == reinterpret_cast<void*>(-1)) {
313 char errtxt[256] = "";
314 shmctl(shmid, IPC_RMID, NULL);
315 strerror_r(err, errtxt, sizeof(errtxt));
316 logprintf(0, "Log: failed to attach shared mem object - err %d (%s).\n",
318 goto hugepage_failover;
320 use_hugepages_ = true;
323 logprintf(0, "Log: Using hugepages 0x%x at %p.\n", shmid, shmaddr);
328 if (!use_hugepages_) {
329 // Use memalign to ensure that blocks are aligned enough for disk direct IO.
330 buf = static_cast<char*>(memalign(4096, length));
332 logprintf(0, "Log: Using memaligned allocation at %p.\n", buf);
334 logprintf(0, "Process Error: memalign returned 0\n");
339 testmemsize_ = length;
347 // Free the test memory.
348 void OsLayer::FreeTestMem() {
350 if (use_hugepages_) {
352 shmctl(shmid_, IPC_RMID, NULL);
362 // Prepare the target memory. It may requre mapping in, or this may be a noop.
363 void *OsLayer::PrepareTestMem(uint64 offset, uint64 length) {
364 sat_assert((offset + length) <= testmemsize_);
365 return reinterpret_cast<void*>(reinterpret_cast<char*>(testmem_) + offset);
368 // Release the test memory resources, if any.
369 void OsLayer::ReleaseTestMem(void *addr, uint64 offset, uint64 length) {
372 // No error polling on unknown systems.
373 int OsLayer::ErrorPoll() {
377 // Generally, poll for errors once per second.
378 void OsLayer::ErrorWait() {
383 // Open a PCI bus-dev-func as a file and return its file descriptor.
384 // Error is indicated by return value less than zero.
385 int OsLayer::PciOpen(int bus, int device, int function) {
388 snprintf(dev_file, sizeof(dev_file), "/proc/bus/pci/%02x/%02x.%x",
389 bus, device, function);
391 int fd = open(dev_file, O_RDWR);
393 logprintf(0, "Process Error: Unable to open PCI bus %d, device %d, "
394 "function %d (errno %d).\n",
395 bus, device, function, errno);
403 // Read and write functions to access PCI config.
404 uint32 OsLayer::PciRead(int fd, uint32 offset, int width) {
405 // Strict aliasing rules lawyers will cause data corruption
406 // on cast pointers in some gccs.
413 uint32 size = width / 8;
415 sat_assert((width == 32) || (width == 16) || (width == 8));
416 sat_assert(offset <= (256 - size));
418 if (lseek(fd, offset, SEEK_SET) < 0) {
419 logprintf(0, "Process Error: Can't seek %x\n", offset);
422 if (read(fd, &datacast, size) != size) {
423 logprintf(0, "Process Error: Can't read %x\n", offset);
430 sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
433 sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
441 void OsLayer::PciWrite(int fd, uint32 offset, uint32 value, int width) {
442 // Strict aliasing rules lawyers will cause data corruption
443 // on cast pointers in some gccs.
450 uint32 size = width / 8;
452 sat_assert((width == 32) || (width == 16) || (width == 8));
453 sat_assert(offset <= (256 - size));
455 // Cram the data into the right alignment.
458 sat_assert(&(datacast.l8) == reinterpret_cast<uint8*>(&datacast));
461 sat_assert(&(datacast.l16) == reinterpret_cast<uint16*>(&datacast));
462 datacast.l16 = value;
464 datacast.l32 = value;
467 if (lseek(fd, offset, SEEK_SET) < 0) {
468 logprintf(0, "Process Error: Can't seek %x\n", offset);
471 if (write(fd, &datacast, size) != size) {
472 logprintf(0, "Process Error: Can't write %x to %x\n", datacast.l32, offset);
482 int OsLayer::OpenMSR(uint32 core, uint32 address) {
484 snprintf(buf, sizeof(buf), "/dev/cpu/%d/msr", core);
485 int fd = open(buf, O_RDWR);
489 uint32 pos = lseek(fd, address, SEEK_SET);
490 if (pos != address) {
492 logprintf(5, "Log: can't seek to msr %x, cpu %d\n", address, core);
499 bool OsLayer::ReadMSR(uint32 core, uint32 address, uint64 *data) {
500 int fd = OpenMSR(core, address);
504 // Read from the msr.
505 bool res = (sizeof(*data) == read(fd, data, sizeof(*data)));
508 logprintf(5, "Log: Failed to read msr %x core %d\n", address, core);
515 bool OsLayer::WriteMSR(uint32 core, uint32 address, uint64 *data) {
516 int fd = OpenMSR(core, address);
521 bool res = (sizeof(*data) == write(fd, data, sizeof(*data)));
524 logprintf(5, "Log: Failed to write msr %x core %d\n", address, core);
531 // Extract bits [n+len-1, n] from a 32 bit word.
532 // so GetBitField(0x0f00, 8, 4) == 0xf.
533 uint32 OsLayer::GetBitField(uint32 val, uint32 n, uint32 len) {
534 return (val >> n) & ((1<<len) - 1);
537 // Generic CPU stress workload that would work on any CPU/Platform.
538 // Float-point array moving average calculation.
539 bool OsLayer::CpuStressWorkload() {
540 double float_arr[100];
542 unsigned int seed = 12345;
544 // Initialize array with random numbers.
545 for (int i = 0; i < 100; i++) {
546 float_arr[i] = rand_r(&seed);
547 if (rand_r(&seed) % 2)
548 float_arr[i] *= -1.0;
551 // Calculate moving average.
552 for (int i = 0; i < 100000000; i++) {
554 (float_arr[i % 100] + float_arr[(i + 1) % 100] +
555 float_arr[(i + 99) % 100]) / 3;
556 sum += float_arr[i % 100];
559 // Artificial printf so the loops do not get optimized away.
561 logprintf(12, "Log: I'm Feeling Lucky!\n");
565 PCIDevices OsLayer::GetPCIDevices() {
566 PCIDevices device_list;
568 struct dirent *buf = new struct dirent();
569 struct dirent *entry;
570 dir = opendir(kSysfsPath);
572 logprintf(0, "Process Error: Cannot open %s", kSysfsPath);
573 while (readdir_r(dir, buf, &entry) == 0 && entry) {
575 unsigned int dev, func;
576 // ".", ".." or a special non-device perhaps.
577 if (entry->d_name[0] == '.')
580 device = new PCIDevice();
581 if (sscanf(entry->d_name, "%04x:%02hx:%02x.%d",
582 &device->domain, &device->bus, &dev, &func) < 4) {
583 logprintf(0, "Process Error: Couldn't parse %s", entry->d_name);
589 device->vendor_id = PCIGetValue(entry->d_name, "vendor");
590 device->device_id = PCIGetValue(entry->d_name, "device");
591 PCIGetResources(entry->d_name, device);
592 device_list.insert(device_list.end(), device);
599 int OsLayer::PCIGetValue(string name, string object) {
603 snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath,
604 name.c_str(), object.c_str());
605 fd = open(filename, O_RDONLY);
608 len = read(fd, buf, 256);
611 return strtol(buf, NULL, 0); // NOLINT
614 int OsLayer::PCIGetResources(string name, PCIDevice *device) {
622 snprintf(filename, sizeof(filename), "%s/%s/%s", kSysfsPath,
623 name.c_str(), "resource");
624 file = fopen(filename, "r");
626 logprintf(0, "Process Error: impossible to find resource file for %s",
630 for (i = 0; i < 6; i++) {
631 if (!fgets(buf, 256, file))
633 sscanf(buf, "%llx %llx", &start, &end); // NOLINT
636 size = end - start + 1;
637 device->base_addr[i] = start;
638 device->size[i] = size;