/** * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ #include "hardware-counter.h" #ifndef NO_HARDWARE_COUNTERS #define _GNU_SOURCE 1 #include #include #include #include #include #include #include #include #include #include #include #include namespace HPHP { /////////////////////////////////////////////////////////////////////////////// IMPLEMENT_THREAD_LOCAL_NO_CHECK(HardwareCounter, HardwareCounter::s_counter); static bool s_recordSubprocessTimes = false; static bool s_profileHWEnable; static std::string s_profileHWEvents; static inline bool useCounters() { #ifdef VALGRIND return false; #else return s_profileHWEnable; #endif } class HardwareCounterImpl { public: HardwareCounterImpl(int type, unsigned long config, const char* desc = nullptr) : m_desc(desc ? desc : ""), m_err(0), m_fd(-1), inited(false) { memset (&pe, 0, sizeof (struct perf_event_attr)); pe.type = type; pe.size = sizeof (struct perf_event_attr); pe.config = config; pe.inherit = s_recordSubprocessTimes; pe.disabled = 1; pe.pinned = 0; pe.exclude_kernel = 0; pe.exclude_hv = 1; pe.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING; } ~HardwareCounterImpl() { close(); } void init_if_not() { /* * perf_event_open(struct perf_event_attr *hw_event_uptr, pid_t pid, * int cpu, int group_fd, unsigned long flags) */ if (inited) return; inited = true; m_fd = syscall(__NR_perf_event_open, &pe, 0, -1, -1, 0); if (m_fd < 0) { // Logger::Verbose("perf_event_open failed with: %s", // folly::errnoStr(errno).c_str()); m_err = -1; return; } if (ioctl(m_fd, PERF_EVENT_IOC_ENABLE, 0) < 0) { // Logger::Warning("perf_event failed to enable: %s", // folly::errnoStr(errno).c_str()); close(); m_err = -1; return; } reset(); } int64_t read() { uint64_t values[3]; if (readRaw(values)) { if (!values[2]) return 0; int64_t value = (double)values[0] * values[1] / values[2]; return value + extra; } return 0; } void incCount(int64_t amount) { extra += amount; } bool readRaw(uint64_t* values) { if (m_err || !useCounters()) return false; init_if_not(); if (m_fd > 0) { /* * read the count + scaling values * * It is not necessary to stop an event to read its value */ auto ret = ::read(m_fd, values, sizeof(*values) * 3); if (ret == sizeof(*values) * 3) { values[0] -= reset_values[0]; values[1] -= reset_values[1]; values[2] -= reset_values[2]; return true; } } return false; } void reset() { if (m_err || !useCounters()) return; init_if_not(); extra = 0; if (m_fd > 0) { if (ioctl (m_fd, PERF_EVENT_IOC_RESET, 0) < 0) { // Logger::Warning("perf_event failed to reset with: %s", // folly::errnoStr(errno).c_str()); m_err = -1; return; } auto ret = ::read(m_fd, reset_values, sizeof(reset_values)); if (ret != sizeof(reset_values)) { // Logger::Warning("perf_event failed to reset with: %s", // folly::errnoStr(errno).c_str()); m_err = -1; return; } } } public: std::string m_desc; int m_err; private: int m_fd; struct perf_event_attr pe; bool inited; uint64_t reset_values[3]; uint64_t extra{0}; void close() { if (m_fd > 0) { ::close(m_fd); m_fd = -1; } } }; class InstructionCounter : public HardwareCounterImpl { public: InstructionCounter() : HardwareCounterImpl(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS) {} }; class LoadCounter : public HardwareCounterImpl { public: LoadCounter() : HardwareCounterImpl(PERF_TYPE_HW_CACHE, (PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_READ) << 8))) {} }; class StoreCounter : public HardwareCounterImpl { public: StoreCounter() : HardwareCounterImpl(PERF_TYPE_HW_CACHE, PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_WRITE) << 8)) {} }; HardwareCounter::HardwareCounter() : m_countersSet(false) { m_instructionCounter.reset(new InstructionCounter()); if (s_profileHWEvents.empty()) { m_loadCounter.reset(new LoadCounter()); m_storeCounter.reset(new StoreCounter()); } else { m_countersSet = true; setPerfEvents(s_profileHWEvents); } } HardwareCounter::~HardwareCounter() { } void HardwareCounter::Init(bool enable, const std::string& events, bool subProc) { s_profileHWEnable = enable; s_profileHWEvents = events; s_recordSubprocessTimes = subProc; } void HardwareCounter::Reset() { s_counter->reset(); } void HardwareCounter::reset() { m_instructionCounter->reset(); if (!m_countersSet) { m_storeCounter->reset(); m_loadCounter->reset(); } for (unsigned i = 0; i < m_counters.size(); i++) { m_counters[i]->reset(); } } int64_t HardwareCounter::GetInstructionCount() { return s_counter->getInstructionCount(); } int64_t HardwareCounter::getInstructionCount() { return m_instructionCounter->read(); } int64_t HardwareCounter::GetLoadCount() { return s_counter->getLoadCount(); } int64_t HardwareCounter::getLoadCount() { return m_loadCounter->read(); } int64_t HardwareCounter::GetStoreCount() { return s_counter->getStoreCount(); } int64_t HardwareCounter::getStoreCount() { return m_storeCounter->read(); } void HardwareCounter::IncInstructionCount(int64_t amount) { s_counter->m_instructionCounter->incCount(amount); } void HardwareCounter::IncLoadCount(int64_t amount) { if (!s_counter->m_countersSet) { s_counter->m_loadCounter->incCount(amount); } } void HardwareCounter::IncStoreCount(int64_t amount) { if (!s_counter->m_countersSet) { s_counter->m_storeCounter->incCount(amount); } } struct PerfTable perfTable[] = { /* PERF_TYPE_HARDWARE events */ #define PC(n) PERF_TYPE_HARDWARE, PERF_COUNT_HW_ ## n { "cpu-cycles", PC(CPU_CYCLES) }, { "cycles", PC(CPU_CYCLES) }, { "instructions", PC(INSTRUCTIONS) }, { "cache-references", PC(CACHE_REFERENCES) }, { "cache-misses", PC(CACHE_MISSES) }, { "branch-instructions", PC(BRANCH_INSTRUCTIONS) }, { "branches", PC(BRANCH_INSTRUCTIONS) }, { "branch-misses", PC(BRANCH_MISSES) }, { "bus-cycles", PC(BUS_CYCLES) }, { "stalled-cycles-frontend", PC(STALLED_CYCLES_FRONTEND) }, { "stalled-cycles-backend", PC(STALLED_CYCLES_BACKEND) }, /* PERF_TYPE_HW_CACHE hw_cache_id */ #define PCC(n) PERF_TYPE_HW_CACHE, PERF_COUNT_HW_CACHE_ ## n { "L1-dcache-", PCC(L1D) }, { "L1-icache-", PCC(L1I) }, { "LLC-", PCC(LL) }, { "dTLB-", PCC(DTLB) }, { "iTLB-", PCC(ITLB) }, { "branch-", PCC(BPU) }, /* PERF_TYPE_HW_CACHE hw_cache_op, hw_cache_result */ #define PCCO(n, m) PERF_TYPE_HW_CACHE, \ ((PERF_COUNT_HW_CACHE_OP_ ## n) << 8 | \ (PERF_COUNT_HW_CACHE_RESULT_ ## m) << 16) { "loads", PCCO(READ, ACCESS) }, { "load-misses", PCCO(READ, MISS) }, { "stores", PCCO(WRITE, ACCESS) }, { "store-misses", PCCO(WRITE, MISS) }, { "prefetches", PCCO(PREFETCH, ACCESS) }, { "prefetch-misses", PCCO(PREFETCH, MISS) } }; static int findEvent(const char *event, struct PerfTable *t, int len, int *match_len) { int i; for (i = 0; i < len; i++) { if (!strncmp(event, t[i].name, strlen(t[i].name))) { *match_len = strlen(t[i].name); return i; } } return -1; } #define CPUID_STEPPING(x) ((x) & 0xf) #define CPUID_MODEL(x) (((x) & 0xf0) >> 4) #define CPUID_FAMILY(x) (((x) & 0xf00) >> 8) #define CPUID_TYPE(x) (((x) & 0x3000) >> 12) // hack to get LLC counters on perflab frc machines static bool isIntelE5_2670() { #ifdef __x86_64__ unsigned long x; asm volatile ("cpuid" : "=a"(x): "a"(1) : "ebx", "ecx", "edx"); return CPUID_STEPPING(x) == 6 && CPUID_MODEL(x) == 0xd && CPUID_FAMILY(x) == 6 && CPUID_TYPE(x) == 0; #else return false; #endif } static void checkLLCHack(const char* event, uint32_t& type, uint64_t& config) { if (!strncmp(event, "LLC-load", 8) && isIntelE5_2670()) { type = PERF_TYPE_RAW; if (!strncmp(&event[4], "loads", 5)) { config = 0x534f2e; } else if (!strncmp(&event[4], "load-misses", 11)) { config = 0x53412e; } } } bool HardwareCounter::addPerfEvent(const char* event) { uint32_t type = 0; uint64_t config = 0; int i, match_len; bool found = false; const char* ev = event; while ((i = findEvent(ev, perfTable, sizeof(perfTable)/sizeof(struct PerfTable), &match_len)) != -1) { if (!found) { found = true; type = perfTable[i].type; } else if (type != perfTable[i].type) { // Logger::Warning("failed to find perf event: %s", event); return false; } config |= perfTable[i].config; ev = &ev[match_len]; } checkLLCHack(event, type, config); // Check if we have a raw spec. if (!found && event[0] == 'r' && event[1] != 0) { config = strtoull(event + 1, const_cast(&ev), 16); if (*ev == 0) { found = true; type = PERF_TYPE_RAW; } } if (!found || *ev) { // Logger::Warning("failed to find perf event: %s", event); return false; } std::unique_ptr hwc( new HardwareCounterImpl(type, config, event)); if (hwc->m_err) { // Logger::Warning("failed to set perf event: %s", event); return false; } m_counters.emplace_back(std::move(hwc)); if (!m_countersSet) { // reset load and store counters. This is because // perf does not seem to handle more than three counters // very well. m_loadCounter.reset(); m_storeCounter.reset(); m_countersSet = true; } return true; } bool HardwareCounter::eventExists(const char *event) { // hopefully m_counters set is small, so a linear scan does not hurt for(unsigned i = 0; i < m_counters.size(); i++) { if (!strcmp(event, m_counters[i]->m_desc.c_str())) { return true; } } return false; } bool HardwareCounter::setPerfEvents(std::string sevents) { // Make a copy of the string for use with strtok. auto const sevents_buf = static_cast(malloc(sevents.size() + 1)); memcpy(sevents_buf, sevents.data(), sevents.size()); sevents_buf[sevents.size()] = '\0'; char* strtok_buf = nullptr; char* s = strtok_r(sevents_buf, ",", &strtok_buf); bool success = true; while (s) { if (!eventExists(s) && !addPerfEvent(s)) { success = false; break; } s = strtok_r(nullptr, ",", &strtok_buf); } free(sevents_buf); return success; } bool HardwareCounter::SetPerfEvents(std::string events) { return s_counter->setPerfEvents(events); } void HardwareCounter::clearPerfEvents() { m_counters.clear(); } void HardwareCounter::ClearPerfEvents() { s_counter->clearPerfEvents(); } const std::string s_instructions("instructions"), s_loads("loads"), s_stores("stores"); void HardwareCounter::getPerfEvents(PerfEventCallback f, void* data) { f(s_instructions, getInstructionCount(), data); if (!m_countersSet) { f(s_loads, getLoadCount(), data); f(s_stores, getStoreCount(), data); } for (unsigned i = 0; i < m_counters.size(); i++) { f(m_counters[i]->m_desc, m_counters[i]->read(), data); } } void HardwareCounter::GetPerfEvents(PerfEventCallback f, void* data) { s_counter->getPerfEvents(f, data); } /////////////////////////////////////////////////////////////////////////////// } #else // NO_HARDWARE_COUNTERS namespace HPHP { /////////////////////////////////////////////////////////////////////////////// HardwareCounter HardwareCounter::s_counter; /////////////////////////////////////////////////////////////////////////////// } #endif // NO_HARDWARE_COUNTERS