Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Workaround broken PCIe speeds on Intel Arc #344

Merged
merged 7 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 13 additions & 12 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,28 +74,29 @@ if(AMDGPU_SUPPORT OR INTEL_SUPPORT OR V3D_SUPPORT)
target_sources(nvtop PRIVATE device_discovery_linux.c)
endif()

if(AMDGPU_SUPPORT OR MSM_SUPPORT OR PANFROST_SUPPORT OR PANTHOR_SUPPORT)
if(AMDGPU_SUPPORT OR INTEL_SUPPORT OR MSM_SUPPORT OR PANFROST_SUPPORT OR PANTHOR_SUPPORT)
# Search for libdrm for AMDGPU support
find_package(Libdrm)

if(Libdrm_FOUND)
message(STATUS "Found libdrm; Enabling support")
target_include_directories(nvtop PRIVATE ${Libdrm_INCLUDE_DIRS})
if (AMDGPU_SUPPORT)
target_sources(nvtop PRIVATE extract_gpuinfo_amdgpu.c)
target_sources(nvtop PRIVATE extract_gpuinfo_amdgpu_utils.c)
endif()

if (MSM_SUPPORT)
target_sources(nvtop PRIVATE extract_gpuinfo_msm.c)
target_sources(nvtop PRIVATE extract_gpuinfo_msm_utils.c)
endif()

else()
message(FATAL_ERROR "libdrm not found; This library is required for AMDGPU and MSM support")
message(FATAL_ERROR "libdrm not found; This library is required for AMDGPU, INTEL, MSM, PANFROST and PANTHOR support")
# CMake will exit if libdrm is not found
endif()
endif()

if (AMDGPU_SUPPORT)
target_sources(nvtop PRIVATE extract_gpuinfo_amdgpu.c)
target_sources(nvtop PRIVATE extract_gpuinfo_amdgpu_utils.c)
endif()

if (MSM_SUPPORT)
target_sources(nvtop PRIVATE extract_gpuinfo_msm.c)
target_sources(nvtop PRIVATE extract_gpuinfo_msm_utils.c)
endif()

if(INTEL_SUPPORT)
target_sources(nvtop PRIVATE extract_gpuinfo_intel.c)
target_sources(nvtop PRIVATE extract_gpuinfo_intel_i915.c)
Expand Down
58 changes: 42 additions & 16 deletions src/extract_gpuinfo_intel.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ void gpuinfo_intel_shutdown(void) {
for (unsigned i = 0; i < intel_gpu_count; ++i) {
struct gpu_info_intel *current = &gpu_infos[i];
if (current->card_fd)
close(current->card_fd);
close(current->card_fd);
nvtop_device_unref(current->card_device);
nvtop_device_unref(current->driver_device);
}
Expand All @@ -87,6 +87,7 @@ static bool parse_drm_fdinfo_intel(struct gpu_info *info, FILE *fdinfo_file, str
case DRIVER_XE:
return parse_drm_fdinfo_intel_xe(info, fdinfo_file, process_info);
}
return false;
}

static void add_intel_cards(struct nvtop_device *dev, struct list_head *devices, unsigned *count) {
Expand Down Expand Up @@ -180,18 +181,39 @@ void gpuinfo_intel_populate_static_info(struct gpu_info *_gpu_info) {
}
}

// Mark integrated GPUs
if (strcmp(gpu_info->base.pdev, INTEGRATED_I915_GPU_PCI_ID) == 0) {
static_info->integrated_graphics = true;
}

nvtop_pcie_link max_link_characteristics;
int ret = nvtop_device_maximum_pcie_link(gpu_info->driver_device, &max_link_characteristics);
if (ret >= 0) {
// Some cards report PCIe GEN 1@ 1x, attempt to detect this and get the card's bridge link speeds
gpu_info->bridge_device = gpu_info->driver_device;
struct nvtop_device *parent;
const char *vendor, *class;
unsigned attempts = 0;
while (ret >= 0 && static_info->integrated_graphics == false &&
// check likely incorrect speed
max_link_characteristics.width == 1 && max_link_characteristics.speed == 2 &&
// check vendor
nvtop_device_get_sysattr_value(gpu_info->bridge_device, "vendor", &vendor) == 0 &&
strcmp(vendor, VENDOR_INTEL_STR) == 0 &&
// check class is either VGA or (non-host) PCI Bridge
nvtop_device_get_sysattr_value(gpu_info->bridge_device, "class", &class) == 0 &&
(strcmp(class, "0x030000") == 0 || strcmp(class, "0x060400") == 0) &&
// don't go more than 2 levels up
attempts++ < 2) {
ret = nvtop_device_get_parent(gpu_info->bridge_device, &parent);
if (ret >= 0 && nvtop_device_maximum_pcie_link(parent, &max_link_characteristics) >= 0) {
gpu_info->bridge_device = parent;
}
}
SET_GPUINFO_STATIC(static_info, max_pcie_link_width, max_link_characteristics.width);
unsigned pcieGen = nvtop_pcie_gen_from_link_speed(max_link_characteristics.speed);
SET_GPUINFO_STATIC(static_info, max_pcie_gen, pcieGen);
}

// Mark integrated GPUs
if (strcmp(gpu_info->base.pdev, INTEGRATED_I915_GPU_PCI_ID) == 0) {
static_info->integrated_graphics = true;
}
}

void gpuinfo_intel_refresh_dynamic_info(struct gpu_info *_gpu_info) {
Expand All @@ -214,6 +236,13 @@ void gpuinfo_intel_refresh_dynamic_info(struct gpu_info *_gpu_info) {
if (nvtop_device_get_syspath(gpu_info->hwmon_device, &syspath) >= 0)
nvtop_device_new_from_syspath(&hwmon_dev_noncached, syspath);
}
nvtop_device *bridge_dev_noncached = NULL;
if (gpu_info->bridge_device) {
if (nvtop_device_get_syspath(gpu_info->bridge_device, &syspath) >= 0)
nvtop_device_new_from_syspath(&bridge_dev_noncached, syspath);
} else {
bridge_dev_noncached = driver_dev_noncached;
}

nvtop_device *clock_device = gpu_info->driver == DRIVER_XE ? driver_dev_noncached : card_dev_noncached;
// GPU clock
Expand All @@ -230,46 +259,43 @@ void gpuinfo_intel_refresh_dynamic_info(struct gpu_info *_gpu_info) {
SET_GPUINFO_DYNAMIC(dynamic_info, gpu_clock_speed_max, val);
}

// TODO: find how to extract global utilization
// gpu util will be computed as the sum of all the processes utilization for now

if (!static_info->integrated_graphics) {
nvtop_pcie_link curr_link_characteristics;
int ret = nvtop_device_current_pcie_link(driver_dev_noncached, &curr_link_characteristics);
int ret = nvtop_device_current_pcie_link(bridge_dev_noncached, &curr_link_characteristics);
if (ret >= 0) {
SET_GPUINFO_DYNAMIC(dynamic_info, pcie_link_width, curr_link_characteristics.width);
unsigned pcieGen = nvtop_pcie_gen_from_link_speed(curr_link_characteristics.speed);
SET_GPUINFO_DYNAMIC(dynamic_info, pcie_link_gen, pcieGen);
}
}

// TODO: Attributes such as memory, fan, temperature, power info should be available once the hwmon patch lands
if (hwmon_dev_noncached) {
const char *hwmon_fan;
// maxFanValue is just a guess, there is no way to get the max fan speed from hwmon
if (nvtop_device_get_sysattr_value(hwmon_dev_noncached, "fan1_input", &hwmon_fan) >= 0) {
unsigned val = strtoul(hwmon_fan, NULL, 10);
SET_GPUINFO_DYNAMIC(dynamic_info, fan_rpm, val);
}
const char *hwmon_temp;
if (nvtop_device_get_sysattr_value(hwmon_dev_noncached, "temp1_input", &hwmon_temp) >= 0) {
// temp1 is for i915, power2 is for `pkg` on xe
if (nvtop_device_get_sysattr_value(hwmon_dev_noncached, "temp1_input", &hwmon_temp) >= 0 ||
nvtop_device_get_sysattr_value(hwmon_dev_noncached, "temp2_input", &hwmon_temp) >= 0) {
unsigned val = strtoul(hwmon_temp, NULL, 10);
SET_GPUINFO_DYNAMIC(dynamic_info, gpu_temp, val / 1000);
}

const char *hwmon_power_max;
// power1 is for i915, power2 is for xe
// power1 is for i915 and `card` on supported cards on xe, power2 is `pkg` on xe
if (nvtop_device_get_sysattr_value(hwmon_dev_noncached, "power1_max", &hwmon_power_max) >= 0 ||
nvtop_device_get_sysattr_value(hwmon_dev_noncached, "power2_max", &hwmon_power_max) >= 0) {
unsigned val = strtoul(hwmon_power_max, NULL, 10);
SET_GPUINFO_DYNAMIC(dynamic_info, power_draw_max, val / 1000);
}

const char *hwmon_energy;
// energy1 is for i915, energy2 is for xe
// energy1 is for i915 and `card` on supported cards on xe, energy2 is `pkg` on xe
if (nvtop_device_get_sysattr_value(hwmon_dev_noncached, "energy1_input", &hwmon_energy) >= 0 ||
nvtop_device_get_sysattr_value(hwmon_dev_noncached, "energy2_input", &hwmon_energy) >= 0) {
nvtop_time ts, ts_diff;
nvtop_time ts;
nvtop_get_current_time(&ts);
unsigned val = strtoul(hwmon_energy, NULL, 10);
unsigned old = gpu_info->energy.energy_uj;
Expand Down
2 changes: 2 additions & 0 deletions src/extract_gpuinfo_intel.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ struct gpu_info_intel {
struct nvtop_device *hwmon_device;
struct intel_process_info_cache *last_update_process_cache, *current_update_process_cache; // Cached processes info

struct nvtop_device *bridge_device;

struct {
unsigned energy_uj;
struct timespec time;
Expand Down
2 changes: 2 additions & 0 deletions src/extract_gpuinfo_intel_i915.c
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,8 @@ bool parse_drm_fdinfo_intel_i915(struct gpu_info *info, FILE *fdinfo_file, struc
struct intel_process_info_cache *cache_entry;
struct unique_cache_id ucid = {.client_id = cid, .pid = process_info->pid, .pdev = gpu_info->base.pdev};
HASH_FIND_CLIENT(gpu_info->last_update_process_cache, &ucid, cache_entry);
// TODO: find how to extract global utilization
// gpu util will be computed as the sum of all the processes utilization for now
if (cache_entry) {
uint64_t time_elapsed = nvtop_difftime_u64(cache_entry->last_measurement_tstamp, current_time);
HASH_DEL(gpu_info->last_update_process_cache, cache_entry);
Expand Down
6 changes: 4 additions & 2 deletions src/extract_gpuinfo_intel_xe.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ void gpuinfo_intel_xe_refresh_dynamic_info(struct gpu_info *_gpu_info) {
struct gpuinfo_dynamic_info *dynamic_info = &gpu_info->base.dynamic_info;

if (gpu_info->card_fd) {
int32_t length = 0;
uint32_t length = 0;
struct drm_xe_query_mem_regions *regions =
xe_device_query_alloc_fetch(gpu_info->card_fd, DRM_XE_DEVICE_QUERY_MEM_REGIONS, &length);
if (regions) {
Expand All @@ -103,7 +103,7 @@ void gpuinfo_intel_xe_refresh_dynamic_info(struct gpu_info *_gpu_info) {
}

static const char xe_drm_intel_vram[] = "drm-total-vram0";
static const char xe_drm_intel_gtt[] = "drm-total-gtt";
// static const char xe_drm_intel_gtt[] = "drm-total-gtt";
// Render
static const char xe_drm_intel_cycles_rcs[] = "drm-cycles-rcs";
static const char xe_drm_intel_total_cycles_rcs[] = "drm-total-cycles-rcs";
Expand Down Expand Up @@ -222,6 +222,8 @@ bool parse_drm_fdinfo_intel_xe(struct gpu_info *info, FILE *fdinfo_file, struct
if (cache_entry) {
HASH_DEL(gpu_info->last_update_process_cache, cache_entry);

// TODO: find how to extract global utilization
// gpu util will be computed as the sum of all the processes utilization for now
{
uint64_t cycles_delta = gpu_cycles.rcs - cache_entry->gpu_cycles.rcs;
uint64_t total_cycles_delta = total_cycles.rcs - cache_entry->total_cycles.rcs;
Expand Down
Loading