mirror of
https://github.com/facebook/react.git
synced 2026-02-25 23:34:59 +00:00
* Facebook -> Meta in copyright rg --files | xargs sed -i 's#Copyright (c) Facebook, Inc. and its affiliates.#Copyright (c) Meta Platforms, Inc. and affiliates.#g' * Manual tweaks
470 lines
12 KiB
C++
470 lines
12 KiB
C++
/**
|
|
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
*
|
|
* This source code is licensed under the MIT license found in the
|
|
* LICENSE file in the root directory of this source tree.
|
|
*/
|
|
|
|
#include "hardware-counter.h"
|
|
|
|
#ifndef NO_HARDWARE_COUNTERS
|
|
|
|
#define _GNU_SOURCE 1
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <unistd.h>
|
|
#include <fcntl.h>
|
|
#include <errno.h>
|
|
#include <assert.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/ioctl.h>
|
|
#include <asm/unistd.h>
|
|
#include <sys/prctl.h>
|
|
#include <linux/perf_event.h>
|
|
|
|
namespace HPHP {
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
IMPLEMENT_THREAD_LOCAL_NO_CHECK(HardwareCounter,
|
|
HardwareCounter::s_counter);
|
|
|
|
static bool s_recordSubprocessTimes = false;
|
|
static bool s_profileHWEnable;
|
|
static std::string s_profileHWEvents;
|
|
|
|
static inline bool useCounters() {
|
|
#ifdef VALGRIND
|
|
return false;
|
|
#else
|
|
return s_profileHWEnable;
|
|
#endif
|
|
}
|
|
|
|
class HardwareCounterImpl {
|
|
public:
|
|
HardwareCounterImpl(int type, unsigned long config,
|
|
const char* desc = nullptr)
|
|
: m_desc(desc ? desc : ""), m_err(0), m_fd(-1), inited(false) {
|
|
memset (&pe, 0, sizeof (struct perf_event_attr));
|
|
pe.type = type;
|
|
pe.size = sizeof (struct perf_event_attr);
|
|
pe.config = config;
|
|
pe.inherit = s_recordSubprocessTimes;
|
|
pe.disabled = 1;
|
|
pe.pinned = 0;
|
|
pe.exclude_kernel = 0;
|
|
pe.exclude_hv = 1;
|
|
pe.read_format =
|
|
PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING;
|
|
}
|
|
|
|
~HardwareCounterImpl() {
|
|
close();
|
|
}
|
|
|
|
void init_if_not() {
|
|
/*
|
|
* perf_event_open(struct perf_event_attr *hw_event_uptr, pid_t pid,
|
|
* int cpu, int group_fd, unsigned long flags)
|
|
*/
|
|
if (inited) return;
|
|
inited = true;
|
|
m_fd = syscall(__NR_perf_event_open, &pe, 0, -1, -1, 0);
|
|
if (m_fd < 0) {
|
|
// Logger::Verbose("perf_event_open failed with: %s",
|
|
// folly::errnoStr(errno).c_str());
|
|
m_err = -1;
|
|
return;
|
|
}
|
|
if (ioctl(m_fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
|
|
// Logger::Warning("perf_event failed to enable: %s",
|
|
// folly::errnoStr(errno).c_str());
|
|
close();
|
|
m_err = -1;
|
|
return;
|
|
}
|
|
reset();
|
|
}
|
|
|
|
int64_t read() {
|
|
uint64_t values[3];
|
|
if (readRaw(values)) {
|
|
if (!values[2]) return 0;
|
|
int64_t value = (double)values[0] * values[1] / values[2];
|
|
return value + extra;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void incCount(int64_t amount) {
|
|
extra += amount;
|
|
}
|
|
|
|
bool readRaw(uint64_t* values) {
|
|
if (m_err || !useCounters()) return false;
|
|
init_if_not();
|
|
|
|
if (m_fd > 0) {
|
|
/*
|
|
* read the count + scaling values
|
|
*
|
|
* It is not necessary to stop an event to read its value
|
|
*/
|
|
auto ret = ::read(m_fd, values, sizeof(*values) * 3);
|
|
if (ret == sizeof(*values) * 3) {
|
|
values[0] -= reset_values[0];
|
|
values[1] -= reset_values[1];
|
|
values[2] -= reset_values[2];
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void reset() {
|
|
if (m_err || !useCounters()) return;
|
|
init_if_not();
|
|
extra = 0;
|
|
if (m_fd > 0) {
|
|
if (ioctl (m_fd, PERF_EVENT_IOC_RESET, 0) < 0) {
|
|
// Logger::Warning("perf_event failed to reset with: %s",
|
|
// folly::errnoStr(errno).c_str());
|
|
m_err = -1;
|
|
return;
|
|
}
|
|
auto ret = ::read(m_fd, reset_values, sizeof(reset_values));
|
|
if (ret != sizeof(reset_values)) {
|
|
// Logger::Warning("perf_event failed to reset with: %s",
|
|
// folly::errnoStr(errno).c_str());
|
|
m_err = -1;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
public:
|
|
std::string m_desc;
|
|
int m_err;
|
|
private:
|
|
int m_fd;
|
|
struct perf_event_attr pe;
|
|
bool inited;
|
|
uint64_t reset_values[3];
|
|
uint64_t extra{0};
|
|
|
|
void close() {
|
|
if (m_fd > 0) {
|
|
::close(m_fd);
|
|
m_fd = -1;
|
|
}
|
|
}
|
|
};
|
|
|
|
class InstructionCounter : public HardwareCounterImpl {
|
|
public:
|
|
InstructionCounter() :
|
|
HardwareCounterImpl(PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS) {}
|
|
};
|
|
|
|
class LoadCounter : public HardwareCounterImpl {
|
|
public:
|
|
LoadCounter() :
|
|
HardwareCounterImpl(PERF_TYPE_HW_CACHE,
|
|
(PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_READ) << 8))) {}
|
|
};
|
|
|
|
class StoreCounter : public HardwareCounterImpl {
|
|
public:
|
|
StoreCounter() :
|
|
HardwareCounterImpl(PERF_TYPE_HW_CACHE,
|
|
PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_WRITE) << 8)) {}
|
|
};
|
|
|
|
HardwareCounter::HardwareCounter()
|
|
: m_countersSet(false) {
|
|
m_instructionCounter.reset(new InstructionCounter());
|
|
if (s_profileHWEvents.empty()) {
|
|
m_loadCounter.reset(new LoadCounter());
|
|
m_storeCounter.reset(new StoreCounter());
|
|
} else {
|
|
m_countersSet = true;
|
|
setPerfEvents(s_profileHWEvents);
|
|
}
|
|
}
|
|
|
|
HardwareCounter::~HardwareCounter() {
|
|
}
|
|
|
|
void HardwareCounter::Init(bool enable, const std::string& events,
|
|
bool subProc) {
|
|
s_profileHWEnable = enable;
|
|
s_profileHWEvents = events;
|
|
s_recordSubprocessTimes = subProc;
|
|
}
|
|
|
|
void HardwareCounter::Reset() {
|
|
s_counter->reset();
|
|
}
|
|
|
|
void HardwareCounter::reset() {
|
|
m_instructionCounter->reset();
|
|
if (!m_countersSet) {
|
|
m_storeCounter->reset();
|
|
m_loadCounter->reset();
|
|
}
|
|
for (unsigned i = 0; i < m_counters.size(); i++) {
|
|
m_counters[i]->reset();
|
|
}
|
|
}
|
|
|
|
int64_t HardwareCounter::GetInstructionCount() {
|
|
return s_counter->getInstructionCount();
|
|
}
|
|
|
|
int64_t HardwareCounter::getInstructionCount() {
|
|
return m_instructionCounter->read();
|
|
}
|
|
|
|
int64_t HardwareCounter::GetLoadCount() {
|
|
return s_counter->getLoadCount();
|
|
}
|
|
|
|
int64_t HardwareCounter::getLoadCount() {
|
|
return m_loadCounter->read();
|
|
}
|
|
|
|
int64_t HardwareCounter::GetStoreCount() {
|
|
return s_counter->getStoreCount();
|
|
}
|
|
|
|
int64_t HardwareCounter::getStoreCount() {
|
|
return m_storeCounter->read();
|
|
}
|
|
|
|
void HardwareCounter::IncInstructionCount(int64_t amount) {
|
|
s_counter->m_instructionCounter->incCount(amount);
|
|
}
|
|
|
|
void HardwareCounter::IncLoadCount(int64_t amount) {
|
|
if (!s_counter->m_countersSet) {
|
|
s_counter->m_loadCounter->incCount(amount);
|
|
}
|
|
}
|
|
|
|
void HardwareCounter::IncStoreCount(int64_t amount) {
|
|
if (!s_counter->m_countersSet) {
|
|
s_counter->m_storeCounter->incCount(amount);
|
|
}
|
|
}
|
|
|
|
struct PerfTable perfTable[] = {
|
|
/* PERF_TYPE_HARDWARE events */
|
|
#define PC(n) PERF_TYPE_HARDWARE, PERF_COUNT_HW_ ## n
|
|
{ "cpu-cycles", PC(CPU_CYCLES) },
|
|
{ "cycles", PC(CPU_CYCLES) },
|
|
{ "instructions", PC(INSTRUCTIONS) },
|
|
{ "cache-references", PC(CACHE_REFERENCES) },
|
|
{ "cache-misses", PC(CACHE_MISSES) },
|
|
{ "branch-instructions", PC(BRANCH_INSTRUCTIONS) },
|
|
{ "branches", PC(BRANCH_INSTRUCTIONS) },
|
|
{ "branch-misses", PC(BRANCH_MISSES) },
|
|
{ "bus-cycles", PC(BUS_CYCLES) },
|
|
{ "stalled-cycles-frontend", PC(STALLED_CYCLES_FRONTEND) },
|
|
{ "stalled-cycles-backend", PC(STALLED_CYCLES_BACKEND) },
|
|
|
|
/* PERF_TYPE_HW_CACHE hw_cache_id */
|
|
#define PCC(n) PERF_TYPE_HW_CACHE, PERF_COUNT_HW_CACHE_ ## n
|
|
{ "L1-dcache-", PCC(L1D) },
|
|
{ "L1-icache-", PCC(L1I) },
|
|
{ "LLC-", PCC(LL) },
|
|
{ "dTLB-", PCC(DTLB) },
|
|
{ "iTLB-", PCC(ITLB) },
|
|
{ "branch-", PCC(BPU) },
|
|
|
|
/* PERF_TYPE_HW_CACHE hw_cache_op, hw_cache_result */
|
|
#define PCCO(n, m) PERF_TYPE_HW_CACHE, \
|
|
((PERF_COUNT_HW_CACHE_OP_ ## n) << 8 | \
|
|
(PERF_COUNT_HW_CACHE_RESULT_ ## m) << 16)
|
|
{ "loads", PCCO(READ, ACCESS) },
|
|
{ "load-misses", PCCO(READ, MISS) },
|
|
{ "stores", PCCO(WRITE, ACCESS) },
|
|
{ "store-misses", PCCO(WRITE, MISS) },
|
|
{ "prefetches", PCCO(PREFETCH, ACCESS) },
|
|
{ "prefetch-misses", PCCO(PREFETCH, MISS) }
|
|
};
|
|
|
|
static int findEvent(const char *event, struct PerfTable *t,
|
|
int len, int *match_len) {
|
|
int i;
|
|
|
|
for (i = 0; i < len; i++) {
|
|
if (!strncmp(event, t[i].name, strlen(t[i].name))) {
|
|
*match_len = strlen(t[i].name);
|
|
return i;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
#define CPUID_STEPPING(x) ((x) & 0xf)
|
|
#define CPUID_MODEL(x) (((x) & 0xf0) >> 4)
|
|
#define CPUID_FAMILY(x) (((x) & 0xf00) >> 8)
|
|
#define CPUID_TYPE(x) (((x) & 0x3000) >> 12)
|
|
|
|
// hack to get LLC counters on perflab frc machines
|
|
static bool isIntelE5_2670() {
|
|
#ifdef __x86_64__
|
|
unsigned long x;
|
|
asm volatile ("cpuid" : "=a"(x): "a"(1) : "ebx", "ecx", "edx");
|
|
return CPUID_STEPPING(x) == 6 && CPUID_MODEL(x) == 0xd
|
|
&& CPUID_FAMILY(x) == 6 && CPUID_TYPE(x) == 0;
|
|
#else
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
static void checkLLCHack(const char* event, uint32_t& type, uint64_t& config) {
|
|
if (!strncmp(event, "LLC-load", 8) && isIntelE5_2670()) {
|
|
type = PERF_TYPE_RAW;
|
|
if (!strncmp(&event[4], "loads", 5)) {
|
|
config = 0x534f2e;
|
|
} else if (!strncmp(&event[4], "load-misses", 11)) {
|
|
config = 0x53412e;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool HardwareCounter::addPerfEvent(const char* event) {
|
|
uint32_t type = 0;
|
|
uint64_t config = 0;
|
|
int i, match_len;
|
|
bool found = false;
|
|
const char* ev = event;
|
|
|
|
while ((i = findEvent(ev, perfTable,
|
|
sizeof(perfTable)/sizeof(struct PerfTable),
|
|
&match_len))
|
|
!= -1) {
|
|
if (!found) {
|
|
found = true;
|
|
type = perfTable[i].type;
|
|
} else if (type != perfTable[i].type) {
|
|
// Logger::Warning("failed to find perf event: %s", event);
|
|
return false;
|
|
}
|
|
config |= perfTable[i].config;
|
|
ev = &ev[match_len];
|
|
}
|
|
|
|
checkLLCHack(event, type, config);
|
|
|
|
// Check if we have a raw spec.
|
|
if (!found && event[0] == 'r' && event[1] != 0) {
|
|
config = strtoull(event + 1, const_cast<char**>(&ev), 16);
|
|
if (*ev == 0) {
|
|
found = true;
|
|
type = PERF_TYPE_RAW;
|
|
}
|
|
}
|
|
|
|
if (!found || *ev) {
|
|
// Logger::Warning("failed to find perf event: %s", event);
|
|
return false;
|
|
}
|
|
std::unique_ptr<HardwareCounterImpl> hwc(
|
|
new HardwareCounterImpl(type, config, event));
|
|
if (hwc->m_err) {
|
|
// Logger::Warning("failed to set perf event: %s", event);
|
|
return false;
|
|
}
|
|
m_counters.emplace_back(std::move(hwc));
|
|
if (!m_countersSet) {
|
|
// reset load and store counters. This is because
|
|
// perf does not seem to handle more than three counters
|
|
// very well.
|
|
m_loadCounter.reset();
|
|
m_storeCounter.reset();
|
|
m_countersSet = true;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool HardwareCounter::eventExists(const char *event) {
|
|
// hopefully m_counters set is small, so a linear scan does not hurt
|
|
for(unsigned i = 0; i < m_counters.size(); i++) {
|
|
if (!strcmp(event, m_counters[i]->m_desc.c_str())) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool HardwareCounter::setPerfEvents(std::string sevents) {
|
|
// Make a copy of the string for use with strtok.
|
|
auto const sevents_buf = static_cast<char*>(malloc(sevents.size() + 1));
|
|
memcpy(sevents_buf, sevents.data(), sevents.size());
|
|
sevents_buf[sevents.size()] = '\0';
|
|
|
|
char* strtok_buf = nullptr;
|
|
char* s = strtok_r(sevents_buf, ",", &strtok_buf);
|
|
bool success = true;
|
|
while (s) {
|
|
if (!eventExists(s) && !addPerfEvent(s)) {
|
|
success = false;
|
|
break;
|
|
}
|
|
s = strtok_r(nullptr, ",", &strtok_buf);
|
|
}
|
|
free(sevents_buf);
|
|
return success;
|
|
}
|
|
|
|
bool HardwareCounter::SetPerfEvents(std::string events) {
|
|
return s_counter->setPerfEvents(events);
|
|
}
|
|
|
|
void HardwareCounter::clearPerfEvents() {
|
|
m_counters.clear();
|
|
}
|
|
|
|
void HardwareCounter::ClearPerfEvents() {
|
|
s_counter->clearPerfEvents();
|
|
}
|
|
|
|
const std::string
|
|
s_instructions("instructions"),
|
|
s_loads("loads"),
|
|
s_stores("stores");
|
|
|
|
void HardwareCounter::getPerfEvents(PerfEventCallback f, void* data) {
|
|
f(s_instructions, getInstructionCount(), data);
|
|
if (!m_countersSet) {
|
|
f(s_loads, getLoadCount(), data);
|
|
f(s_stores, getStoreCount(), data);
|
|
}
|
|
for (unsigned i = 0; i < m_counters.size(); i++) {
|
|
f(m_counters[i]->m_desc, m_counters[i]->read(), data);
|
|
}
|
|
}
|
|
|
|
void HardwareCounter::GetPerfEvents(PerfEventCallback f, void* data) {
|
|
s_counter->getPerfEvents(f, data);
|
|
}
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
}
|
|
|
|
|
|
#else // NO_HARDWARE_COUNTERS
|
|
|
|
namespace HPHP {
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
HardwareCounter HardwareCounter::s_counter;
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
}
|
|
|
|
#endif // NO_HARDWARE_COUNTERS
|