Add a script to gather runner info when uploading benchmark results (#6425)

huydhn · web-flow · commit 41e0f08e2407 · 2025-06-04T21:49:12.000-07:00
Implement the logic to gather runner info for GPU. I adopt this logic from https://github.com/pytorch/pytorch-integration-testing/blob/master/vllm-benchmarks/upload_benchmark_results.py#L102 This also cleans up v2 logic which is not used anymore. cc @yangw-dev Please let me know if you have a better approach in mind from the utilization monitoring project. Essentially, I want to get the device name, i.e. CUDA, ROCm, and the device type, i.e. H100, MI300X, so that they can be displayed on the dashboard. Before this change, these fields are set by the caller, now they can be set automatically by the GHA.
diff --git a/.github/actions/upload-benchmark-results/action.yml b/.github/actions/upload-benchmark-results/action.yml
@@ -19,7 +19,7 @@ runs:
       shell: bash
       run: |
         set -eux
-        python3 -mpip install boto3==1.35.33
+        python3 -mpip install boto3==1.35.33 psutil==7.0.0 pynvml==12.0.0
 
     - name: Check that GITHUB_TOKEN is defined
       if: ${{ inputs.schema-version != 'v2' }}
@@ -72,8 +72,7 @@ runs:
       run: |
         set -eux
 
-        # TODO (huydhn): Implement this part
-        echo "runners=[]" >> "${GITHUB_OUTPUT}"
+        python3 "${GITHUB_ACTION_PATH}/../../scripts/benchmarks/gather_runners_info.py"
 
     - name: Gather the dependencies information
       id: gather-dependencies
diff --git a/.github/scripts/benchmark-results-dir-for-testing/v2/android-artifacts-31017223108.json b/.github/scripts/benchmark-results-dir-for-testing/v2/android-artifacts-31017223108.json
diff --git a/.github/scripts/benchmark-results-dir-for-testing/v2/android-artifacts-31017223431.json b/.github/scripts/benchmark-results-dir-for-testing/v2/android-artifacts-31017223431.json
diff --git a/.github/scripts/benchmark-results-dir-for-testing/v3/do_not_overwrite_runners_info.json b/.github/scripts/benchmark-results-dir-for-testing/v3/do_not_overwrite_runners_info.json
@@ -0,0 +1 @@
+{"benchmark": {"name": "ExecuTorch", "mode": "inference", "extra_info": {"app_type": "IOS_APP", "benchmark_config": "{\"model\": \"edsr\", \"config\": \"xnnpack_q8\", \"device_name\": \"apple_iphone_15\", \"device_arn\": \"arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/3b5acd2e-92e2-4778-b651-7726bafe129d\"}"}}, "model": {"name": "edsr", "type": "OSS model", "backend": "xnnpack_q8"}, "metric": {"name": "peak_inference_mem_usage(mb)", "benchmark_values": [333.2014794921875], "target_value": 0, "extra_info": {"method": "forward"}}, "runners": [{"name": "Apple iPhone 15", "type": "iOS 18.0", "avail_mem_in_gb": 0, "total_mem_in_gb": 0}]}
diff --git a/.github/scripts/benchmarks/gather_runners_info.py b/.github/scripts/benchmarks/gather_runners_info.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import json
+import logging
+import os
+import platform
+import socket
+from logging import info
+from typing import Any, Dict
+
+import psutil
+
+
+logging.basicConfig(level=logging.INFO)
+
+
+def set_output(name: str, val: Any) -> None:
+    if os.getenv("GITHUB_OUTPUT"):
+        with open(str(os.getenv("GITHUB_OUTPUT")), "a") as env:
+            print(f"{name}={val}", file=env)
+    else:
+        print(f"::set-output name={name}::{val}")
+
+
+def get_runner_info() -> Dict[str, Any]:
+    device_name = ""
+    device_type = ""
+
+    try:
+        import torch
+
+        if torch.cuda.is_available():
+            # TODO (huydhn): only support CUDA and ROCm for now
+            if torch.version.hip:
+                device_name = "rocm"
+            elif torch.version.cuda:
+                device_name = "cuda"
+
+            device_type = torch.cuda.get_device_name()
+
+    except ImportError:
+        info("Fail to import torch to get the device name")
+
+    runner_info = {
+        "cpu_info": platform.processor(),
+        "cpu_count": psutil.cpu_count(),
+        "avail_mem_in_gb": int(psutil.virtual_memory().total / (1024 * 1024 * 1024)),
+        "extra_info": {
+            "hostname": socket.gethostname(),
+        },
+    }
+
+    # TODO (huydhn): only support CUDA and ROCm for now
+    if device_name and device_type:
+        runner_info["name"] = device_name
+        runner_info["type"] = device_type
+        runner_info["gpu_count"] = torch.cuda.device_count()
+        runner_info["avail_gpu_mem_in_gb"] = int(
+            torch.cuda.get_device_properties(0).total_memory
+            * torch.cuda.device_count()
+            / (1024 * 1024 * 1024)
+        )
+
+    return runner_info
+
+
+def main() -> None:
+    runner_info = get_runner_info()
+    set_output("runners", json.dumps([runner_info]))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/workflows/test_upload_benchmark_results.yml b/.github/workflows/test_upload_benchmark_results.yml
@@ -13,13 +13,6 @@ jobs:
     steps:
       - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
 
-      - name: Test upload the benchmark results (v2)
-        uses: ./.github/actions/upload-benchmark-results
-        with:
-          benchmark-results-dir: .github/scripts/benchmark-results-dir-for-testing/v2
-          schema-version: v2
-          dry-run: true
-
       - name: Test upload the benchmark results (v3)
         uses: ./.github/actions/upload-benchmark-results
         with:

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	+{"benchmark": {"name": "ExecuTorch", "mode": "inference", "extra_info": {"app_type": "IOS_APP", "benchmark_config": "{\"model\": \"edsr\", \"config\": \"xnnpack_q8\", \"device_name\": \"apple_iphone_15\", \"device_arn\": \"arn:aws:devicefarm:us-west-2:308535385114:devicepool:02a2cf0f-6d9b-45ee-ba1a-a086587469e6/3b5acd2e-92e2-4778-b651-7726bafe129d\"}"}}, "model": {"name": "edsr", "type": "OSS model", "backend": "xnnpack_q8"}, "metric": {"name": "peak_inference_mem_usage(mb)", "benchmark_values": [333.2014794921875], "target_value": 0, "extra_info": {"method": "forward"}}, "runners": [{"name": "Apple iPhone 15", "type": "iOS 18.0", "avail_mem_in_gb": 0, "total_mem_in_gb": 0}]}