meta-llama · JashG · Apr 7, 2025 · Apr 7, 2025 · Apr 7, 2025 · Apr 7, 2025
@@ -7,7 +7,7 @@ The `llamastack/distribution-nvidia` distribution consists of the following prov
 |-----|-------------|
 | agents | `inline::meta-reference` |
 | datasetio | `inline::localfs` |
-| eval | `inline::meta-reference` |
+| eval | `remote::nvidia` |
 | inference | `remote::nvidia` |
 | post_training | `remote::nvidia` |
 | safety | `remote::nvidia` |
@@ -29,6 +29,7 @@ The following environment variables can be configured:
 - `NVIDIA_CUSTOMIZER_URL`: NVIDIA Customizer URL (default: `https://customizer.api.nvidia.com`)
 - `NVIDIA_OUTPUT_MODEL_DIR`: NVIDIA Output Model Directory (default: `test-example-model@v1`)
 - `GUARDRAILS_SERVICE_URL`: URL for the NeMo Guardrails Service (default: `http://0.0.0.0:7331`)
+- `NVIDIA_EVALUATOR_URL`: URL for the NeMo Evaluator Service (default: `http://0.0.0.0:7331`)
 - `INFERENCE_MODEL`: Inference model (default: `Llama3.1-8B-Instruct`)
 - `SAFETY_MODEL`: Name of the model to use for safety (default: `meta/llama-3.1-8b-instruct`)
 

@@ -6,7 +6,7 @@
 
 from typing import List
 
-from llama_stack.providers.datatypes import Api, InlineProviderSpec, ProviderSpec
+from llama_stack.providers.datatypes import AdapterSpec, Api, InlineProviderSpec, ProviderSpec, remote_provider_spec
 
 
 def available_providers() -> List[ProviderSpec]:
@@ -25,4 +25,22 @@ def available_providers() -> List[ProviderSpec]:
                 Api.agents,
             ],
         ),
+        remote_provider_spec(
+            api=Api.eval,
+            adapter=AdapterSpec(
+                adapter_type="nvidia",
+                pip_packages=[
+                    "requests",
+                ],
+                module="llama_stack.providers.remote.eval.nvidia",
+                config_class="llama_stack.providers.remote.eval.nvidia.NVIDIAEvalConfig",
+            ),
+            api_dependencies=[
+                Api.datasetio,
+                Api.datasets,
+                Api.scoring,
+                Api.inference,
+                Api.agents,
+            ],
+        ),
     ]
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
@@ -0,0 +1,134 @@
+# NVIDIA NeMo Evaluator Eval Provider
+
+
+## Overview
+
+For the first integration, Benchmarks are mapped to Evaluation Configs on in the NeMo Evaluator. The full evaluation config object is provided as part of the meta-data. The `dataset_id` and `scoring_functions` are not used.
+
+Below are a few examples of how to register a benchmark, which in turn will create an evaluation config in NeMo Evaluator and how to trigger an evaluation.
+
+### Example for register an academic benchmark
+
+```
+POST /eval/benchmarks
+```
+```json
+{
+  "benchmark_id": "mmlu",
+  "dataset_id": "",
+  "scoring_functions": [],
+  "metadata": {
+    "type": "mmlu"
+  }
+}
+```
+
+### Example for register a custom evaluation
+
+```
+POST /eval/benchmarks
+```
+```json
+{
+  "benchmark_id": "my-custom-benchmark",
+  "dataset_id": "",
+  "scoring_functions": [],
+  "metadata": {
+    "type": "custom",
+    "params": {
+      "parallelism": 8
+    },
+    "tasks": {
+      "qa": {
+        "type": "completion",
+        "params": {
+          "template": {
+            "prompt": "{{prompt}}",
+            "max_tokens": 200
+          }
+        },
+        "dataset": {
+          "files_url": "hf://datasets/default/sample-basic-test/testing/testing.jsonl"
+        },
+        "metrics": {
+          "bleu": {
+            "type": "bleu",
+            "params": {
+              "references": [
+                "{{ideal_response}}"
+              ]
+            }
+          }
+        }
+      }
+    }
+  }
+}
+```
+
+### Example for triggering a benchmark/custom evaluation
+
+```
+POST /eval/benchmarks/{benchmark_id}/jobs
+```
+```json
+{
+  "benchmark_id": "my-custom-benchmark",
+  "benchmark_config": {
+    "eval_candidate": {
+      "type": "model",
+      "model": "meta-llama/Llama3.1-8B-Instruct",
+      "sampling_params": {
+        "max_tokens": 100,
+        "temperature": 0.7
+      }
+    },
+    "scoring_params": {}
+  }
+}
+```
+
+Response example:
+```json
+{
+    "job_id": "eval-1234",
+    "status": "in_progress"
+}
+```
+
+### Example for getting the status of a job
+```
+GET /eval/benchmarks/{benchmark_id}/jobs/{job_id}
+```
+
+Response example:
+```json
+{
+  "job_id": "eval-1234",
+  "status": "in_progress"
+}
+```
+
+### Example for cancelling a job
+```
+POST /eval/benchmarks/{benchmark_id}/jobs/{job_id}/cancel
+```
+
+### Example for getting the results
+```
+GET /eval/benchmarks/{benchmark_id}/results
+```
+```json
+{
+  "generations": [],
+  "scores": {
+    "{benchmark_id}": {
+      "score_rows": [],
+      "aggregated_results": {
+        "tasks": {},
+        "groups": {}
+      }
+    }
+  }
+}
+```
@@ -0,0 +1,31 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Any, Dict
+
+from llama_stack.distribution.datatypes import Api
+
+from .config import NVIDIAEvalConfig
+
+
+async def get_adapter_impl(
+    config: NVIDIAEvalConfig,
+    deps: Dict[Api, Any],
+):
+    from .eval import NVIDIAEvalImpl
+
+    impl = NVIDIAEvalImpl(
+        config,
+        deps[Api.datasetio],
+        deps[Api.datasets],
+        deps[Api.scoring],
+        deps[Api.inference],
+        deps[Api.agents],
+    )
+    await impl.initialize()
+    return impl
+
+
+__all__ = ["get_adapter_impl", "NVIDIAEvalImpl"]
@@ -0,0 +1,29 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import os
+from typing import Any, Dict
+
+from pydantic import BaseModel, Field
+
+
+class NVIDIAEvalConfig(BaseModel):
+    """
+     Configuration for the NVIDIA NeMo Evaluator microservice endpoint.
+
+    Attributes:
+        evaluator_url (str): A base url for accessing the NVIDIA evaluation endpoint, e.g. http://localhost:8000.
+    """
+
+    evaluator_url: str = Field(
+        default_factory=lambda: os.getenv("NVIDIA_EVALUATOR_URL", "http://0.0.0.0:7331"),
+        description="The url for accessing the evaluator service",
+    )
+
+    @classmethod
+    def sample_run_config(cls, **kwargs) -> Dict[str, Any]:
+        return {
+            "evaluator_url": "${env.NVIDIA_EVALUATOR_URL:http://localhost:7331}",
+        }