From a4cfd158583e2103d69084013ed88186d80bf9af Mon Sep 17 00:00:00 2001 From: Asfiya Baig Date: Tue, 20 May 2025 10:08:33 -0700 Subject: [PATCH 1/6] TensorRT 10.11 release updates Signed-off-by: Asfiya Baig --- CHANGELOG.md | 17 + CMakeLists.txt | 24 +- README.md | 18 +- VERSION | 2 +- demo/BERT/README.md | 4 +- demo/BERT/builder_varseqlen.py | 6 +- demo/Diffusion/README.md | 22 +- demo/Diffusion/demo_diffusion/dd_argparse.py | 8 +- demo/Diffusion/demo_diffusion/engine.py | 5 +- .../demo_diffusion/model/__init__.py | 2 + demo/Diffusion/demo_diffusion/model/clip.py | 20 +- .../model/diffusion_transformer.py | 204 ++- demo/Diffusion/demo_diffusion/model/load.py | 7 +- .../demo_diffusion/path/resolve_path.py | 2 +- .../demo_diffusion/pipeline/__init__.py | 4 + .../pipeline/diffusion_pipeline.py | 34 +- .../demo_diffusion/pipeline/flux_pipeline.py | 48 +- .../pipeline/model_memory_manager.py | 73 ++ .../pipeline/stable_diffusion_35_pipeline.py | 770 ++++++++++++ demo/Diffusion/demo_txt2img_sd35.py | 131 ++ demo/Diffusion/docs/support_matrix.md | 2 + docker/rockylinux8.Dockerfile | 18 +- docker/rockylinux9.Dockerfile | 18 +- docker/ubuntu-20.04.Dockerfile | 18 +- docker/ubuntu-22.04-aarch64.Dockerfile | 2 +- docker/ubuntu-22.04.Dockerfile | 18 +- docker/ubuntu-cross-aarch64.Dockerfile | 2 +- include/NvInfer.h | 64 +- include/NvInferImpl.h | 23 + include/NvInferRuntime.h | 194 ++- include/NvInferRuntimeBase.h | 14 + include/NvInferVersion.h | 12 +- plugin/CMakeLists.txt | 135 +- plugin/README.md | 6 +- plugin/api/inferPlugin.cpp | 8 +- plugin/bertQKVToContextPlugin/CMakeLists.txt | 29 +- plugin/common/bertCommon.h | 4 +- plugin/common/cublasLtWrapper.cpp | 4 +- plugin/common/cublasWrapper.cpp | 14 +- plugin/common/cudaDriverWrapper.cpp | 6 +- plugin/common/cudnnWrapper.cpp | 14 +- plugin/common/plugin.h | 6 - .../CMakeLists.txt | 3 + ...entangledAttentionPlugin_PluginConfig.yaml | 170 +++ .../DisentangledAttention_PluginGoldenIO.json | 86 ++ plugin/disentangledAttentionPlugin/README.md | 19 +- .../disentangledAttentionCommon.h | 48 + .../disentangledAttentionPlugin.cpp | 388 +++--- .../disentangledAttentionPlugin.h | 108 +- .../disentangledAttentionPluginLegacy.cpp | 376 ++++++ .../disentangledAttentionPluginLegacy.h | 144 +++ .../disentangledKernel.cu | 5 +- .../tftrt/efficientNMSImplicitTFTRTPlugin.h | 2 +- .../modulatedDeformConvPlugin/CMakeLists.txt | 2 + ...tomModulatedDeformConv2d_PluginConfig.yaml | 151 ++- plugin/modulatedDeformConvPlugin/README.md | 7 +- .../modulatedDeformConvCudaHelper.cu | 14 +- .../modulatedDeformConvCudaHelper.h | 8 +- .../modulatedDeformConvPlugin.cpp | 491 +++++--- .../modulatedDeformConvPlugin.h | 93 +- .../modulatedDeformConvPluginKernel.cu | 4 +- .../modulatedDeformConvPluginLegacy.cpp | 441 +++++++ .../modulatedDeformConvPluginLegacy.h | 130 ++ .../CMakeLists.txt | 2 + .../multiscaleDeformableAttnPlugin/README.md | 14 +- .../multiscaleDeformableAttnPlugin.cpp | 440 ++++--- .../multiscaleDeformableAttnPlugin.h | 153 ++- .../multiscaleDeformableAttnPluginLegacy.cpp | 287 +++++ .../multiscaleDeformableAttnPluginLegacy.h | 121 ++ python/CMakeLists.txt | 17 +- python/build.sh | 12 +- python/docstrings/infer/pyCoreDoc.h | 43 +- python/docstrings/infer/pyGraphDoc.h | 4 +- python/docstrings/infer/pyPluginDoc.h | 11 +- python/docstrings/parsers/pyOnnxDoc.h | 7 +- python/include/impl/NvInferPythonPlugin.h | 2 +- .../bindings_wheel/tensorrt/__init__.py | 13 +- python/scripts/process_wheel_template.py | 8 +- python/src/infer/pyCore.cpp | 55 +- python/src/infer/pyPlugin.cpp | 2 +- python/src/parsers/pyOnnx.cpp | 5 +- samples/CMakeLists.txt | 37 + samples/common/CMakeLists.txt | 80 ++ samples/common/common.h | 4 +- samples/common/sampleDevice.cpp | 2 - samples/common/sampleDevice.h | 2 - samples/common/sampleEngines.cpp | 18 +- samples/common/sampleInference.cpp | 47 +- samples/common/sampleOptions.cpp | 38 +- samples/common/sampleOptions.h | 2 + samples/common/sampleUtils.h | 2 - samples/python/downloader.py | 20 +- samples/python/plugin_utils.py | 3 - samples/sampleCharRNN/CMakeLists.txt | 17 +- samples/sampleDynamicReshape/CMakeLists.txt | 17 +- .../sampleEditableTimingCache/CMakeLists.txt | 15 + samples/sampleINT8API/CMakeLists.txt | 18 +- samples/sampleIOFormats/CMakeLists.txt | 17 +- samples/sampleNamedDimensions/CMakeLists.txt | 17 +- samples/sampleNonZeroPlugin/CMakeLists.txt | 20 +- samples/sampleOnnxMNIST/CMakeLists.txt | 17 +- .../sampleOnnxMnistCoordConvAC/CMakeLists.txt | 27 +- .../sampleOnnxMnistCoordConvAC.cpp | 1 + samples/sampleProgressMonitor/CMakeLists.txt | 16 +- samples/trtexec/CMakeLists.txt | 17 +- samples/trtexec/trtexec.cpp | 48 +- shared/CMakeLists.txt | 4 +- tools/Polygraphy/CHANGELOG.md | 16 +- .../api/00_inference_with_tensorrt/README.md | 2 +- .../api/01_comparing_frameworks/README.md | 18 +- .../api/01_comparing_frameworks/example.py | 24 + .../04_int8_calibration_in_tensorrt/README.md | 2 +- .../05_using_tensorrt_network_api/README.md | 2 +- .../08_adding_precision_constraints/README.md | 4 +- tools/Polygraphy/polygraphy/README.md | 2 +- tools/Polygraphy/polygraphy/__init__.py | 2 +- .../Polygraphy/polygraphy/backend/trt/util.py | 7 +- .../polygraphy/comparator/comparator.py | 9 +- .../polygraphy/comparator/compare.py | 1118 +++++++++++++++++ .../polygraphy/comparator/data_loader.py | 4 + tools/Polygraphy/polygraphy/util/array.py | 90 ++ tools/Polygraphy/tests/cuda/test_cuda.py | 4 +- tools/onnx-graphsurgeon/CHANGELOG.md | 6 + .../onnx_graphsurgeon/__init__.py | 2 +- 124 files changed, 6494 insertions(+), 1222 deletions(-) create mode 100644 demo/Diffusion/demo_diffusion/pipeline/model_memory_manager.py create mode 100644 demo/Diffusion/demo_diffusion/pipeline/stable_diffusion_35_pipeline.py create mode 100644 demo/Diffusion/demo_txt2img_sd35.py create mode 100644 plugin/disentangledAttentionPlugin/DisentangledAttentionPlugin_PluginConfig.yaml create mode 100644 plugin/disentangledAttentionPlugin/DisentangledAttention_PluginGoldenIO.json create mode 100644 plugin/disentangledAttentionPlugin/disentangledAttentionCommon.h create mode 100644 plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.cpp create mode 100644 plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.h create mode 100644 plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.cpp create mode 100644 plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.h create mode 100644 plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.cpp create mode 100644 plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.h create mode 100644 samples/common/CMakeLists.txt diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ca735da4..93062dc53 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,22 @@ # TensorRT OSS Release Changelog +## 10.11.0 GA - 2025-5-16 + +Key Features and Updates: + +- Plugin changes + - Migrated `IPluginV2`-descendent version 1 of `cropAndResizePluginDynamic`, to version 2, which implements `IPluginV3`. + - Migrated `IPluginV2`-descendent version 1 of `DisentangledAttention_TRT`, to version 2, which implements `IPluginV3`. + - Migrated `IPluginV2`-descendent version 1 of `MultiscaleDeformableAttnPlugin_TRT`, to version 2, which implements `IPluginV3`. + - Note: The newer versions preserve the attributes and I/O of the corresponding older plugin version. The older plugin versions are deprecated and will be removed in a future release. +- Demo changes + - demoDiffusion + - Added support for Stable Diffusion 3.5-medium and 3.5-large pipelines in BF16 and FP16 precisions. +- Parser changes + - Added `kENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA` parser flag to enable UINT8 asymmetric quantization on engines targeting DLA. + - Removed restriction that inputs to `RandomNormalLike` and `RandomUniformLike` must be tensors. + - Clarified limitations of scan outputs for `Loop` nodes. + ## 10.10.0 GA - 2025-4-28 Key Features and Updates: diff --git a/CMakeLists.txt b/CMakeLists.txt index cadfcd174..bf1e80722 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,6 +18,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR) include(cmake/modules/set_ifndef.cmake) include(cmake/modules/find_library_create_target.cmake) +list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules) set_ifndef(TRT_LIB_DIR ${CMAKE_BINARY_DIR}) set_ifndef(TRT_OUT_DIR ${CMAKE_BINARY_DIR}) @@ -47,10 +48,10 @@ else() set(STATIC_LIB_EXT "a") endif() -file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/include/NvInferVersion.h" VERSION_STRINGS REGEX "#define NV_TENSORRT_.*") +file(STRINGS "${CMAKE_CURRENT_SOURCE_DIR}/include/NvInferVersion.h" VERSION_STRINGS REGEX "#define TRT_.*_ENTERPRISE") foreach(TYPE MAJOR MINOR PATCH BUILD) - string(REGEX MATCH "NV_TENSORRT_${TYPE} [0-9]+" TRT_TYPE_STRING ${VERSION_STRINGS}) + string(REGEX MATCH "TRT_${TYPE}_ENTERPRISE [0-9]+" TRT_TYPE_STRING ${VERSION_STRINGS}) string(REGEX MATCH "[0-9]+" TRT_${TYPE} ${TRT_TYPE_STRING}) endforeach(TYPE) @@ -143,20 +144,25 @@ if(BUILD_PARSERS) configure_protobuf(${PROTOBUF_VERSION}) endif() +# Define library names +set(TRT_NVINFER_NAME "nvinfer") +set(TRT_ONNXPARSER_NAME "nvonnxparser") + # Windows library names have major version appended. if (MSVC) - set(nvinfer_lib_name "nvinfer_${TRT_SOVERSION}") + set(nvinfer_lib_name "${TRT_NVINFER_NAME}_${TRT_SOVERSION}${TRT_LIB_SUFFIX}") set(nvinfer_plugin_lib_name "nvinfer_plugin_${TRT_SOVERSION}") set(nvinfer_vc_plugin_lib_name "nvinfer_vc_plugin_${TRT_SOVERSION}") - set(nvonnxparser_lib_name "nvonnxparser_${TRT_SOVERSION}") + set(nvonnxparser_lib_name "${TRT_ONNXPARSER_NAME}_${TRT_SOVERSION}${TRT_LIB_SUFFIX}") + else() - set(nvinfer_lib_name "nvinfer") + set(nvinfer_lib_name ${TRT_NVINFER_NAME}) set(nvinfer_plugin_lib_name "nvinfer_plugin") set(nvinfer_vc_plugin_lib_name "nvinfer_vc_plugin") - set(nvonnxparser_lib_name "nvonnxparser") + set(nvonnxparser_lib_name ${TRT_ONNXPARSER_NAME}) endif() -find_library_create_target(nvinfer ${nvinfer_lib_name} SHARED ${TRT_LIB_DIR}) +find_library_create_target(nvinfer ${nvinfer_lib_name} SHARED "${TRT_LIB_DIR}") if (DEFINED USE_CUGFX) find_library(CUDART_LIB cugfx_dll HINTS ${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES lib lib/x64 lib64) @@ -217,13 +223,13 @@ endif() if(BUILD_PLUGINS) add_subdirectory(plugin) else() - find_library_create_target(nvinfer_plugin ${nvinfer_plugin_lib_name} SHARED ${TRT_OUT_DIR} ${TRT_LIB_DIR}) + find_library_create_target(nvinfer_plugin ${nvinfer_plugin_lib_name} SHARED "${TRT_OUT_DIR}" "${TRT_LIB_DIR}") endif() if(BUILD_PARSERS) add_subdirectory(parsers) else() - find_library_create_target(nvonnxparser ${nvonnxparser_lib_name} SHARED ${TRT_OUT_DIR} ${TRT_LIB_DIR}) + find_library_create_target(nvonnxparser ${nvonnxparser_lib_name} SHARED "${TRT_OUT_DIR}" "${TRT_LIB_DIR}") endif() if(BUILD_SAMPLES) diff --git a/README.md b/README.md index f2e4bb5e9..699468ba4 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ To build the TensorRT-OSS components, you will first need the following software **TensorRT GA build** -- TensorRT v10.10.0.31 +- TensorRT v10.11.0.33 - Available from direct download links listed below **System Packages** @@ -86,24 +86,24 @@ To build the TensorRT-OSS components, you will first need the following software Else download and extract the TensorRT GA build from [NVIDIA Developer Zone](https://developer.nvidia.com) with the direct links below: - - [TensorRT 10.10.0.31 for CUDA 11.8, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.10.0/tars/TensorRT-10.10.0.31.Linux.x86_64-gnu.cuda-11.8.tar.gz) - - [TensorRT 10.10.0.31 for CUDA 12.9, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.10.0/tars/TensorRT-10.10.0.31.Linux.x86_64-gnu.cuda-12.9.tar.gz) - - [TensorRT 10.10.0.31 for CUDA 11.8, Windows x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.10.0/zip/TensorRT-10.10.0.31.Windows.win10.cuda-11.8.zip) - - [TensorRT 10.10.0.31 for CUDA 12.9, Windows x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.10.0/zip/TensorRT-10.10.0.31.Windows.win10.cuda-12.9.zip) + - [TensorRT 10.11.0.33 for CUDA 11.8, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-11.8.tar.gz) + - [TensorRT 10.11.0.33 for CUDA 12.9, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-12.9.tar.gz) + - [TensorRT 10.11.0.33 for CUDA 11.8, Windows x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/zip/TensorRT-10.11.0.33.Windows.win10.cuda-11.8.zip) + - [TensorRT 10.11.0.33 for CUDA 12.9, Windows x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/zip/TensorRT-10.11.0.33.Windows.win10.cuda-12.9.zip) **Example: Ubuntu 20.04 on x86-64 with cuda-12.9** ```bash cd ~/Downloads - tar -xvzf TensorRT-10.10.0.31.Linux.x86_64-gnu.cuda-12.9.tar.gz - export TRT_LIBPATH=`pwd`/TensorRT-10.10.0.31 + tar -xvzf TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-12.9.tar.gz + export TRT_LIBPATH=`pwd`/TensorRT-10.11.0.33 ``` **Example: Windows on x86-64 with cuda-12.9** ```powershell - Expand-Archive -Path TensorRT-10.10.0.31.Windows.win10.cuda-12.9.zip - $env:TRT_LIBPATH="$pwd\TensorRT-10.10.0.31\lib" + Expand-Archive -Path TensorRT-10.11.0.33.Windows.win10.cuda-12.9.zip + $env:TRT_LIBPATH="$pwd\TensorRT-10.11.0.33\lib" ``` ## Setting Up The Build Environment diff --git a/VERSION b/VERSION index 90c12b3ae..44de1b9bd 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -10.10.0.31 +10.11.0.33 diff --git a/demo/BERT/README.md b/demo/BERT/README.md index d63ab16a1..eee0fe241 100755 --- a/demo/BERT/README.md +++ b/demo/BERT/README.md @@ -73,8 +73,8 @@ The following software version configuration has been tested: | Software | Version | | -------- | ------- | | Python | >=3.8 | -| TensorRT | 10.9 | -| CUDA | 12.8 | +| TensorRT | 10.11 | +| CUDA | 12.9 | ## Setup diff --git a/demo/BERT/builder_varseqlen.py b/demo/BERT/builder_varseqlen.py index b7328cd3e..7e0070163 100755 --- a/demo/BERT/builder_varseqlen.py +++ b/demo/BERT/builder_varseqlen.py @@ -431,7 +431,8 @@ def build_engine(batch_sizes, workspace_size, sequence_length, config, weights_d network_creation_flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) with trt.Builder(TRT_LOGGER) as builder, builder.create_network(network_creation_flag) as network, builder.create_builder_config() as builder_config: - builder_config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace_size * (1024 * 1024)) + if workspace_size is not None: + builder_config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace_size * (1024 * 1024)) builder_config.avg_timing_iterations = 8 if config.use_fp16: builder_config.set_flag(trt.BuilderFlag.FP16) @@ -571,8 +572,7 @@ def main(): parser.add_argument( "-w", "--workspace-size", - default=2500, - help="Workspace size in MiB for building the BERT engine (default: 2500)", + help="Workspace size in MiB for building the BERT engine (default: unlimited)", type=int, ) parser.add_argument( diff --git a/demo/Diffusion/README.md b/demo/Diffusion/README.md index a33b42ae7..93f82a185 100755 --- a/demo/Diffusion/README.md +++ b/demo/Diffusion/README.md @@ -7,7 +7,7 @@ This demo application ("demoDiffusion") showcases the acceleration of Stable Dif ### Clone the TensorRT OSS repository ```bash -git clone git@github.com:NVIDIA/TensorRT.git -b release/10.9 --single-branch +git clone git@github.com:NVIDIA/TensorRT.git -b release/10.11 --single-branch cd TensorRT ``` @@ -49,7 +49,7 @@ onnx 1.15.0 onnx-graphsurgeon 0.5.2 onnxruntime 1.16.3 polygraphy 0.49.9 -tensorrt 10.9.0.34 +tensorrt 10.11.0.33 tokenizers 0.13.3 torch 2.2.0 transformers 4.42.2 @@ -199,12 +199,19 @@ Even faster image generation than LCM, producing coherent images in just 1 step. python3 demo_txt2img_xl.py "Einstein" --version xl-turbo --onnx-dir onnx-sdxl-turbo --engine-dir engine-sdxl-turbo --denoising-steps 1 --scheduler EulerA --guidance-scale 0.0 --width 512 --height 512 ``` -### Generate an image guided by a text prompt using Stable Diffusion 3 +### Generate an image guided by a text prompt using Stable Diffusion 3 and its variants -Run the command below to generate an image using Stable Diffusion 3 +Run the command below to generate an image using Stable Diffusion 3 and Stable Diffusion 3.5 ```bash +# Stable Diffusion 3 python3 demo_txt2img_sd3.py "A vibrant street wall covered in colorful graffiti, the centerpiece spells \"SD3 MEDIUM\", in a storm of colors" --version sd3 --hf-token=$HF_TOKEN + +# Stable Diffusion 3.5-medium +python3 demo_txt2img_sd35.py "a beautiful photograph of Mt. Fuji during cherry blossom" --version=3.5-medium --denoising-steps=30 --guidance-scale 3.5 --hf-token=$HF_TOKEN + +# Stable Diffusion 3.5-large +python3 demo_txt2img_sd35.py "a beautiful photograph of Mt. Fuji during cherry blossom" --version=3.5-large --denoising-steps=30 --guidance-scale 3.5 --hf-token=$HF_TOKEN ``` You can also specify an input image conditioning as shown below @@ -212,6 +219,7 @@ You can also specify an input image conditioning as shown below ```bash wget https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png -O dog-on-bench.png +# Stable Diffusion 3 python3 demo_txt2img_sd3.py "dog wearing a sweater and a blue collar" --version sd3 --input-image dog-on-bench.png --hf-token=$HF_TOKEN ``` @@ -352,7 +360,7 @@ You can use the `--calibraton-dataset` flag to specify the path, which is set to python3 demo_img2img_flux.py "A robot made of exotic candies and chocolates of different kinds. The background is filled with confetti and celebratory gifts." --version="flux.1-dev-depth" --hf-token=$HF_TOKEN --guidance-scale 10 --control-image robot.png --bf16 --denoising-steps 30 --download-onnx-models # FP8 using pre-exported ONNX models -python3 demo_img2img_flux.py "A robot made of exotic candies" --version="flux.1-dev-depth" --hf-token=$HF_TOKEN --guidance-scale 10 --control-image robot.png --fp8 --denoising-steps 30 --download-onnx-models --build-static-batch +python3 demo_img2img_flux.py "A robot made of exotic candies" --version="flux.1-dev-depth" --hf-token=$HF_TOKEN --guidance-scale 10 --control-image robot.png --fp8 --denoising-steps 30 --download-onnx-models --build-static-batch --quantization-level 4 # FP8 using native ONNX export rm -rf onnx/* engine/* && python3 demo_img2img_flux.py "A robot made of exotic candies" --version="flux.1-dev-depth" --hf-token=$HF_TOKEN --guidance-scale 10 --control-image robot.png --quantization-level 4 --fp8 --denoising-steps 30 @@ -368,13 +376,13 @@ python3 demo_img2img_flux.py "A robot made of exotic candies" --version="flux.1- python3 demo_img2img_flux.py "a robot made out of gold" --version="flux.1-dev-canny" --hf-token=$HF_TOKEN --guidance-scale 30 --control-image robot.png --bf16 --denoising-steps 30 --download-onnx-models # FP8 using pre-exported ONNX models -python3 demo_img2img_flux.py "a robot made out of gold" --version="flux.1-dev-canny" --hf-token=$HF_TOKEN --guidance-scale 30 --control-image robot.png --fp8 --denoising-steps 30 --download-onnx-models --build-static-batch +python3 demo_img2img_flux.py "a robot made out of gold" --version="flux.1-dev-canny" --hf-token=$HF_TOKEN --guidance-scale 30 --control-image robot.png --fp8 --denoising-steps 30 --download-onnx-models --build-static-batch --quantization-level 4 # FP8 using native ONNX export rm -rf onnx/* engine/* && python3 demo_img2img_flux.py "a robot made out of gold" --version="flux.1-dev-canny" --hf-token=$HF_TOKEN --guidance-scale 30 --control-image robot.png --quantization-level 4 --fp8 --denoising-steps 30 --calibration-dataset {custom/dataset/path} # FP4 -python3 demo_img2img_flux.py "a robot made out of gold" --version="flux.1-dev-canny" --hf-token=$HF_TOKEN --guidance-scale 30 --control-image robot.png --fp4 --denoising-steps 30 --download-onnx-models +python3 demo_img2img_flux.py "a robot made out of gold" --version="flux.1-dev-canny" --hf-token=$HF_TOKEN --guidance-scale 30 --control-image robot.png --fp4 --denoising-steps 30 --download-onnx-models --build-static-batch ``` #### 4. Generate an Image Using Flux LoRA diff --git a/demo/Diffusion/demo_diffusion/dd_argparse.py b/demo/Diffusion/demo_diffusion/dd_argparse.py index adc6c43ef..174b8069e 100644 --- a/demo/Diffusion/demo_diffusion/dd_argparse.py +++ b/demo/Diffusion/demo_diffusion/dd_argparse.py @@ -71,6 +71,8 @@ def add_arguments(parser): "xl-turbo", "svd-xt-1.1", "sd3", + "3.5-medium", + "3.5-large", "cascade", "flux.1-dev", "flux.1-schnell", @@ -274,6 +276,7 @@ def process_pipeline_args(args: argparse.Namespace) -> Tuple[Dict[str, Any], Dic sm_version = device_info.major * 10 + device_info.minor is_flux = args.version.startswith("flux") + is_sd35 = args.version.startswith("3.5") if args.height % 8 != 0 or args.width % 8 != 0: raise ValueError( @@ -336,7 +339,6 @@ def override_quant_level(level: float, dtype_str: str): elif args.int8: override_quant_level(3.0, "INT8") - if args.quantization_level == 3.0 and args.download_onnx_models: raise ValueError( "Transformer ONNX model for Quantization level 3 is not available for download. Please export the quantized Transformer model natively with the removal of --download-onnx-models." @@ -366,7 +368,7 @@ def override_quant_level(level: float, dtype_str: str): # Torch-fallback and Torch-inference if args.torch_fallback and not args.torch_inference: - assert is_flux, "PyTorch Fallback is only supported for Flux pipelines" + assert is_flux or is_sd35, "PyTorch Fallback is only supported for Flux and Stable Diffusion 3.5 pipelines." args.torch_fallback = args.torch_fallback.split(",") if args.torch_fallback and args.torch_inference: @@ -377,7 +379,7 @@ def override_quant_level(level: float, dtype_str: str): # low-vram if args.low_vram: - assert is_flux, "low-vram mode is only supported for Flux pipelines" + assert is_flux or is_sd35, "low-vram mode is only supported for Flux and Stable Diffusion 3.5 pipelines." # Pack arguments kwargs_init_pipeline = { diff --git a/demo/Diffusion/demo_diffusion/engine.py b/demo/Diffusion/demo_diffusion/engine.py index bdfe50ffd..1591b6d50 100644 --- a/demo/Diffusion/demo_diffusion/engine.py +++ b/demo/Diffusion/demo_diffusion/engine.py @@ -22,16 +22,17 @@ from collections import OrderedDict, defaultdict import numpy as np -import onnx import tensorrt as trt import torch from cuda import cudart -from onnx import numpy_helper from polygraphy.backend.common import bytes_from_path from polygraphy.backend.trt import ( engine_from_bytes, ) +import onnx +from onnx import numpy_helper + TRT_LOGGER = trt.Logger(trt.Logger.ERROR) diff --git a/demo/Diffusion/demo_diffusion/model/__init__.py b/demo/Diffusion/demo_diffusion/model/__init__.py index 22d4e38dd..077f163fa 100644 --- a/demo/Diffusion/demo_diffusion/model/__init__.py +++ b/demo/Diffusion/demo_diffusion/model/__init__.py @@ -29,6 +29,7 @@ from demo_diffusion.model.diffusion_transformer import ( FluxTransformerModel, SD3_MMDiTModel, + SD3TransformerModel, ) from demo_diffusion.model.gan import VQGANModel from demo_diffusion.model.load import unload_torch_model @@ -67,6 +68,7 @@ # diffusion_transformer "SD3_MMDiTModel", "FluxTransformerModel", + "SD3TransformerModel", # gan "VQGANModel", # lora diff --git a/demo/Diffusion/demo_diffusion/model/clip.py b/demo/Diffusion/demo_diffusion/model/clip.py index 711397d68..7cefcc7d4 100644 --- a/demo/Diffusion/demo_diffusion/model/clip.py +++ b/demo/Diffusion/demo_diffusion/model/clip.py @@ -36,12 +36,16 @@ ) -def get_clipwithproj_embedding_dim(version: str, pipeline: str) -> int: +def get_clipwithproj_embedding_dim(version: str, subfolder: str) -> int: """Return the embedding dimension of a CLIP with projection model.""" if version in ("xl-1.0", "xl-turbo", "cascade"): return 1280 + elif version in {"3.5-medium", "3.5-large"} and subfolder == "text_encoder": + return 768 + elif version in {"3.5-medium", "3.5-large"} and subfolder == "text_encoder_2": + return 1280 else: - raise ValueError(f"Invalid version {version} + pipeline {pipeline}") + raise ValueError(f"Invalid version {version} + subfolder {subfolder}") def get_clip_embedding_dim(version, pipeline): @@ -186,7 +190,6 @@ def optimize(self, onnx_graph): opt.info(self.name + ": finished") return opt_onnx_graph - class CLIPWithProjModel(CLIPModel): def __init__( self, @@ -213,13 +216,13 @@ def __init__( fp16=fp16, bf16=bf16, max_batch_size=max_batch_size, - embedding_dim=get_clipwithproj_embedding_dim(version, pipeline), + embedding_dim=get_clipwithproj_embedding_dim(version, subfolder), output_hidden_states=output_hidden_states, ) self.subfolder = subfolder def get_model(self, torch_inference=""): - model_opts = {"variant": "bf16", "torch_dtype": torch.bfloat16} if self.bf16 else {} + model_opts = {"variant": "fp16", "torch_dtype": torch.float16} if self.fp16 else {"torch_dtype": torch.bfloat16} clip_model_dir = load.get_checkpoint_dir(self.framework_model_dir, self.version, self.pipeline, self.subfolder) if not load.is_model_cached(clip_model_dir, model_opts, self.hf_safetensor, model_name="model"): model = CLIPTextModelWithProjection.from_pretrained( @@ -243,7 +246,11 @@ def get_output_names(self): return ["text_embeddings"] def get_dynamic_axes(self): - return {"input_ids": {0: "B"}, "attention_mask": {0: "B"}, "text_embeddings": {0: "B"}} + return { + "input_ids": {0: "B"}, + "attention_mask": {0: "B"}, + "text_embeddings": {0: "B"}, + } def get_input_profile(self, batch_size, image_height, image_width, static_batch, static_shape): self.check_dims(batch_size, image_height, image_width) @@ -277,7 +284,6 @@ def get_sample_input(self, batch_size, image_height, image_width, static_shape): torch.zeros(batch_size, self.text_maxlen, dtype=torch.int32, device=self.device), ) - class SD3_CLIPGModel(CLIPModel): def __init__( self, diff --git a/demo/Diffusion/demo_diffusion/model/diffusion_transformer.py b/demo/Diffusion/demo_diffusion/model/diffusion_transformer.py index 36e0d44d3..00b601a36 100644 --- a/demo/Diffusion/demo_diffusion/model/diffusion_transformer.py +++ b/demo/Diffusion/demo_diffusion/model/diffusion_transformer.py @@ -27,9 +27,7 @@ from demo_diffusion.utils_sd3.sd3_impls import BaseModel as BaseModelSD3 # List of models to import from diffusers.models -models_to_import = [ - "FluxTransformer2DModel", -] +models_to_import = ["FluxTransformer2DModel", "SD3Transformer2DModel"] for model in models_to_import: globals()[model] = import_from_diffusers(model, "diffusers.models") @@ -324,3 +322,203 @@ def optimize(self, onnx_graph): if self.int8: return super().optimize(onnx_graph, fuse_mha_qkv_int8=True) return super().optimize(onnx_graph) + + +class UpcastLayer(torch.nn.Module): + def __init__(self, base_layer: torch.nn.Module, upcast_to: torch.dtype): + super().__init__() + self.output_dtype = next(base_layer.parameters()).dtype + self.upcast_to = upcast_to + + base_layer = base_layer.to(dtype=self.upcast_to) + self.base_layer = base_layer + + def forward(self, *inputs, **kwargs): + casted_inputs = tuple( + in_val.to(self.upcast_to) if isinstance(in_val, torch.Tensor) else in_val for in_val in inputs + ) + + kwarg_casted = {} + for name, val in kwargs.items(): + kwarg_casted[name] = val.to(dtype=self.upcast_to) if isinstance(val, torch.Tensor) else val + + output = self.base_layer(*casted_inputs, **kwarg_casted) + if isinstance(output, tuple): + output = tuple(out.to(self.output_dtype) if isinstance(out, torch.Tensor) else out for out in output) + else: + output = output.to(dtype=self.output_dtype) + return output + + +class SD3TransformerModel(base_model.BaseModel): + + def __init__( + self, + version, + pipeline, + device, + hf_token, + verbose, + framework_model_dir, + fp16=False, + tf32=False, + bf16=False, + max_batch_size=16, + text_maxlen=256, + build_strongly_typed=False, + weight_streaming=False, + weight_streaming_budget_percentage=None, + do_classifier_free_guidance=False, + ): + super(SD3TransformerModel, self).__init__( + version, + pipeline, + device=device, + hf_token=hf_token, + verbose=verbose, + framework_model_dir=framework_model_dir, + fp16=fp16, + tf32=tf32, + bf16=bf16, + max_batch_size=max_batch_size, + text_maxlen=text_maxlen, + ) + self.subfolder = "transformer" + self.transformer_model_dir = load.get_checkpoint_dir( + self.framework_model_dir, self.version, self.pipeline, self.subfolder + ) + if not os.path.exists(self.transformer_model_dir): + self.config = SD3Transformer2DModel.load_config(self.path, subfolder=self.subfolder, token=self.hf_token) + else: + print(f"[I] Load SD3Transformer2DModel config from: {self.transformer_model_dir}") + self.config = SD3Transformer2DModel.load_config(self.transformer_model_dir) + self.build_strongly_typed = build_strongly_typed + self.weight_streaming = weight_streaming + self.weight_streaming_budget_percentage = weight_streaming_budget_percentage + self.out_channels = self.config.get("out_channels") + self.xB = 2 if do_classifier_free_guidance else 1 # batch multiplier + + def get_model(self, torch_inference=""): + model_opts = ( + {"torch_dtype": torch.float16} if self.fp16 else {"torch_dtype": torch.bfloat16} if self.bf16 else {} + ) + if not load.is_model_cached(self.transformer_model_dir, model_opts, self.hf_safetensor): + model = SD3Transformer2DModel.from_pretrained( + self.path, + subfolder=self.subfolder, + use_safetensors=self.hf_safetensor, + token=self.hf_token, + **model_opts, + ).to(self.device) + model.save_pretrained(self.transformer_model_dir, **model_opts) + else: + print(f"[I] Load SD3Transformer2DModel model from: {self.transformer_model_dir}") + model = SD3Transformer2DModel.from_pretrained(self.transformer_model_dir, **model_opts).to(self.device) + + if self.version == "3.5-large": + model.transformer_blocks[35] = UpcastLayer(model.transformer_blocks[35], torch.float32) + + if torch_inference: + model.to(memory_format=torch.channels_last) + model = optimizer.optimize_checkpoint(model, torch_inference) + return model + + def get_input_names(self): + return [ + "hidden_states", + "encoder_hidden_states", + "pooled_projections", + "timestep", + ] + + def get_output_names(self): + return ["latent"] + + def get_dynamic_axes(self): + xB = "2B" if self.xB == 2 else "B" + dynamic_axes = { + "hidden_states": {0: xB, 2: "H", 3: "W"}, + "encoder_hidden_states": {0: xB}, + "pooled_projections": {0: xB}, + "timestep": {0: xB}, + "latent": {0: xB, 2: "H", 3: "W"}, + } + return dynamic_axes + + def get_input_profile( + self, + batch_size: int, + image_height: int, + image_width: int, + static_batch: bool, + static_shape: bool, + ): + latent_height, latent_width = self.check_dims(batch_size, image_height, image_width) + ( + min_batch, + max_batch, + _, + _, + _, + _, + min_latent_height, + max_latent_height, + min_latent_width, + max_latent_width, + ) = self.get_minmax_dims(batch_size, image_height, image_width, static_batch, static_shape) + + input_profile = { + "hidden_states": [ + (self.xB * min_batch, self.config["in_channels"], min_latent_height, min_latent_width), + (self.xB * batch_size, self.config["in_channels"], latent_height, latent_width), + (self.xB * max_batch, self.config["in_channels"], max_latent_height, max_latent_width), + ], + "encoder_hidden_states": [ + (self.xB * min_batch, self.text_maxlen, self.config["joint_attention_dim"]), + (self.xB * batch_size, self.text_maxlen, self.config["joint_attention_dim"]), + (self.xB * max_batch, self.text_maxlen, self.config["joint_attention_dim"]), + ], + "pooled_projections": [ + (self.xB * min_batch, self.config["pooled_projection_dim"]), + (self.xB * batch_size, self.config["pooled_projection_dim"]), + (self.xB * max_batch, self.config["pooled_projection_dim"]), + ], + "timestep": [(self.xB * min_batch,), (self.xB * batch_size,), (self.xB * max_batch,)], + } + return input_profile + + def get_shape_dict(self, batch_size, image_height, image_width): + latent_height, latent_width = self.check_dims(batch_size, image_height, image_width) + shape_dict = { + "hidden_states": (self.xB * batch_size, self.config["in_channels"], latent_height, latent_width), + "encoder_hidden_states": (self.xB * batch_size, self.text_maxlen, self.config["joint_attention_dim"]), + "pooled_projections": (self.xB * batch_size, self.config["pooled_projection_dim"]), + "timestep": (self.xB * batch_size,), + "latent": (self.xB * batch_size, self.out_channels, latent_height, latent_width), + } + return shape_dict + + def get_sample_input(self, batch_size, image_height, image_width, static_shape): + assert not (self.fp16 and self.bf16), "fp16 and bf16 cannot be enabled simultaneously" + dtype = torch.float16 if self.fp16 else torch.bfloat16 if self.bf16 else torch.float32 + latent_height, latent_width = self.check_dims(batch_size, image_height, image_width) + sample_input = ( + torch.randn( + self.xB * batch_size, + self.config["in_channels"], + latent_height, + latent_width, + dtype=dtype, + device=self.device, + ), + torch.randn( + self.xB * batch_size, + self.text_maxlen, + self.config["joint_attention_dim"], + dtype=dtype, + device=self.device, + ), + torch.randn(self.xB * batch_size, self.config["pooled_projection_dim"], dtype=dtype, device=self.device), + torch.randn(self.xB * batch_size, dtype=torch.float32, device=self.device), + ) + return sample_input diff --git a/demo/Diffusion/demo_diffusion/model/load.py b/demo/Diffusion/demo_diffusion/model/load.py index 23346ea81..e33fdd28d 100644 --- a/demo/Diffusion/demo_diffusion/model/load.py +++ b/demo/Diffusion/demo_diffusion/model/load.py @@ -25,9 +25,8 @@ import sys from typing import List, Optional -import torch - import onnx +import torch def onnx_graph_needs_external_data(onnx_graph: onnx.ModelProto) -> bool: @@ -74,6 +73,10 @@ def get_path(version: str, pipeline: "pipeline.DiffusionPipeline", controlnets: return "stabilityai/sdxl-turbo" elif version == "sd3": return "stabilityai/stable-diffusion-3-medium" + elif version == "3.5-medium": + return "stabilityai/stable-diffusion-3.5-medium" + elif version == "3.5-large": + return "stabilityai/stable-diffusion-3.5-large" elif version == "svd-xt-1.1" and pipeline.is_img2vid(): return "stabilityai/stable-video-diffusion-img2vid-xt-1-1" elif version == "cascade": diff --git a/demo/Diffusion/demo_diffusion/path/resolve_path.py b/demo/Diffusion/demo_diffusion/path/resolve_path.py index b1807f8cc..292344259 100644 --- a/demo/Diffusion/demo_diffusion/path/resolve_path.py +++ b/demo/Diffusion/demo_diffusion/path/resolve_path.py @@ -102,7 +102,7 @@ def _is_quantized() -> bool: if _is_quantized(): if args.int8 or args.fp8: quantization_config_uid = ( - f"{'int8' if args.int8 else 'fp8'}.l{args.quantization_level}.bs2.s{args.denoising_steps}" + f"{'int8' if args.int8 else 'fp8'}.l{args.quantization_level}.bs2" f".c{args.calibration_size}.p{args.quantization_percentile}.a{args.quantization_alpha}" ) else: diff --git a/demo/Diffusion/demo_diffusion/pipeline/__init__.py b/demo/Diffusion/demo_diffusion/pipeline/__init__.py index 6a1d7bb00..c77059f08 100644 --- a/demo/Diffusion/demo_diffusion/pipeline/__init__.py +++ b/demo/Diffusion/demo_diffusion/pipeline/__init__.py @@ -19,6 +19,9 @@ from demo_diffusion.pipeline.flux_pipeline import FluxPipeline from demo_diffusion.pipeline.stable_cascade_pipeline import StableCascadePipeline from demo_diffusion.pipeline.stable_diffusion_3_pipeline import StableDiffusion3Pipeline +from demo_diffusion.pipeline.stable_diffusion_35_pipeline import ( + StableDiffusion35Pipeline, +) from demo_diffusion.pipeline.stable_diffusion_pipeline import StableDiffusionPipeline from demo_diffusion.pipeline.stable_video_diffusion_pipeline import ( StableVideoDiffusionPipeline, @@ -30,6 +33,7 @@ "FluxPipeline", "StableCascadePipeline", "StableDiffusion3Pipeline", + "StableDiffusion35Pipeline", "StableDiffusionPipeline", "StableVideoDiffusionPipeline", "PIPELINE_TYPE", diff --git a/demo/Diffusion/demo_diffusion/pipeline/diffusion_pipeline.py b/demo/Diffusion/demo_diffusion/pipeline/diffusion_pipeline.py index d9b44d1c5..18fc146b3 100755 --- a/demo/Diffusion/demo_diffusion/pipeline/diffusion_pipeline.py +++ b/demo/Diffusion/demo_diffusion/pipeline/diffusion_pipeline.py @@ -54,6 +54,7 @@ unload_torch_model, ) from demo_diffusion.pipeline.calibrate import load_calib_prompts +from demo_diffusion.pipeline.model_memory_manager import ModelMemoryManager from demo_diffusion.pipeline.type import PIPELINE_TYPE from demo_diffusion.utils_modelopt import ( SD_FP8_BF16_FLUX_MMDIT_BMM2_FP8_OUTPUT_CONFIG, @@ -91,11 +92,13 @@ class DiffusionPipeline(ABC): "xl-turbo", "svd-xt-1.1", "sd3", + "3.5-medium", + "3.5-large", "cascade", "flux.1-dev", "flux.1-dev-canny", "flux.1-dev-depth", - "flux.1-schnell" + "flux.1-schnell", ) SCHEDULER_DEFAULTS = { "1.4": "PNDM", @@ -105,14 +108,16 @@ class DiffusionPipeline(ABC): "2.0": "DDIM", "2.1-base": "PNDM", "2.1": "DDIM", - "xl-1.0" : "Euler", + "xl-1.0": "Euler", "xl-turbo": "EulerA", + "3.5-large": "FlowMatchEuler", + "3.5-medium": "FlowMatchEuler", "svd-xt-1.1": "Euler", "cascade": "DDPMWuerstchen", "flux.1-dev": "FlowMatchEuler", "flux.1-dev-canny": "FlowMatchEuler", "flux.1-dev-depth": "FlowMatchEuler", - "flux.1-schnell": "FlowMatchEuler" + "flux.1-schnell": "FlowMatchEuler", } def __init__( @@ -266,6 +271,7 @@ def __init__( self.engine = {} self.shape_dicts = {} self.shared_device_memory = None + self.lora_loader = None # initialized in load_resources() self.events = {} @@ -275,6 +281,9 @@ def __init__( self.stream = None self.tokenizer = None + def model_memory_manager(self, model_names, low_vram=False): + return ModelMemoryManager(self, model_names, low_vram) + @classmethod @abc.abstractmethod def FromArgs(cls, args: argparse.Namespace, pipeline_type: PIPELINE_TYPE) -> DiffusionPipeline: @@ -288,16 +297,16 @@ def get_model_names(cls, pipeline_type: PIPELINE_TYPE) -> List[str]: raise NotImplementedError("get_model_names cannot be called from the abstract base class.") @classmethod - def _get_pipeline_uid(cls, pipeline_type: PIPELINE_TYPE, version: str) -> str: + def _get_pipeline_uid(cls, version: str) -> str: """Return the unique ID of this pipeline. This is typically used to determine the default path for things like engine files, artifacts caches, etc. """ - return f"{cls.__name__}_{pipeline_type.name}_{version}" + return f"{cls.__name__}_{version}" - def profile_start(self, name, color='blue'): + def profile_start(self, name, color="blue", domain=None): if self.nvtx_profile: - self.markers[name] = nvtx.start_range(message=name, color=color) + self.markers[name] = nvtx.start_range(message=name, color=color, domain=domain) if name in self.events: cudart.cudaEventRecord(self.events[name][0], 0) @@ -658,24 +667,24 @@ def _build_engine(self, obj, engine, model_config, opt_batch_size, opt_image_hei weight_streaming = getattr(obj, 'weight_streaming', False) int8amp = model_config.get('use_int8', False) precision_constraints = 'prefer' if int8amp else 'none' - engine.build(model_config['onnx_opt_path'], + engine.build( + model_config["onnx_opt_path"], strongly_typed=strongly_typed, fp16=fp16amp, tf32=tf32amp, bf16=bf16amp, int8=int8amp, input_profile=obj.get_input_profile( - opt_batch_size, opt_image_height, opt_image_width, - static_batch=static_batch, static_shape=static_shape + opt_batch_size, opt_image_height, opt_image_width, static_batch=static_batch, static_shape=static_shape ), - enable_refit=model_config['do_engine_refit'], + enable_refit=model_config["do_engine_refit"], enable_all_tactics=enable_all_tactics, timing_cache=timing_cache, update_output_names=update_output_names, weight_streaming=weight_streaming, verbose=self.verbose, builder_optimization_level=optimization_level, - precision_constraints=precision_constraints + precision_constraints=precision_constraints, ) def _refit_engine(self, obj, model_name, model_config): @@ -903,7 +912,6 @@ def teardown(self): del self.stream def initialize_latents(self, batch_size, unet_channels, latent_height, latent_width, latents_dtype=torch.float32): - latents_dtype = latents_dtype # text_embeddings.dtype latents_shape = (batch_size, unet_channels, latent_height, latent_width) latents = torch.randn(latents_shape, device=self.device, dtype=latents_dtype, generator=self.generator) # Scale the initial noise by the standard deviation required by the scheduler diff --git a/demo/Diffusion/demo_diffusion/pipeline/flux_pipeline.py b/demo/Diffusion/demo_diffusion/pipeline/flux_pipeline.py index 256a8a5d5..57241c613 100644 --- a/demo/Diffusion/demo_diffusion/pipeline/flux_pipeline.py +++ b/demo/Diffusion/demo_diffusion/pipeline/flux_pipeline.py @@ -129,7 +129,7 @@ def FromArgs(cls, args: argparse.Namespace, pipeline_type: PIPELINE_TYPE) -> Flu # Resolve all paths. dd_path = path_module.resolve_path( - cls.get_model_names(pipeline_type), args, pipeline_type, cls._get_pipeline_uid(pipeline_type, args.version) + cls.get_model_names(pipeline_type), args, pipeline_type, cls._get_pipeline_uid(args.version) ) return cls( @@ -704,42 +704,6 @@ def infer( torch.cuda.synchronize() e2e_tic = time.perf_counter() - class LoadModelContext: - def __init__(ctx, model_names, low_vram=False): - ctx.model_names = model_names - ctx.low_vram = low_vram - def __enter__(ctx): - if not ctx.low_vram: - return - for model_name in ctx.model_names: - if not self.torch_fallback[model_name]: - # creating engine object (load from plan file) - self.engine[model_name].load() - # allocate device memory - _, shared_device_memory = cudart.cudaMalloc(self.device_memory_sizes[model_name]) - self.shared_device_memory = shared_device_memory - # creating context - self.engine[model_name].activate(device_memory=self.shared_device_memory) - # creating input and output buffer - self.engine[model_name].allocate_buffers(shape_dict=self.shape_dicts[model_name], device=self.device) - else: - print(f"[I] Reloading torch model {model_name} from cpu.") - self.torch_models[model_name] = self.torch_models[model_name].to('cuda') - - def __exit__(ctx, exc_type, exc_val, exc_tb): - if not ctx.low_vram: - return - for model_name in ctx.model_names: - if not self.torch_fallback[model_name]: - self.engine[model_name].deallocate_buffers() - self.engine[model_name].deactivate() - self.engine[model_name].unload() - cudart.cudaFree(self.shared_device_memory) - else: - print(f"[I] Offloading torch model {model_name} to cpu.") - self.torch_models[model_name] = self.torch_models[model_name].to('cpu') - torch.cuda.empty_cache() - num_channels_latents = self.models["transformer"].config["in_channels"] // 4 if control_image: num_channels_latents = self.models["transformer"].config["in_channels"] // 8 @@ -756,7 +720,7 @@ def __exit__(ctx, exc_type, exc_val, exc_tb): ) if control_image.ndim == 4: - with LoadModelContext(["vae_encoder"], low_vram=self.low_vram): + with self.model_memory_manager(["vae_encoder"], low_vram=self.low_vram): control_image = self.encode_image(control_image) height_control_image, width_control_image = control_image.shape[2:] @@ -769,7 +733,7 @@ def __exit__(ctx, exc_type, exc_val, exc_tb): ) # CLIP and T5 text encoder(s) - with LoadModelContext(["clip","t5"], low_vram=self.low_vram): + with self.model_memory_manager(["clip", "t5"], low_vram=self.low_vram): pooled_embeddings = self.encode_prompt(prompt, pooled_output=True) text_embeddings = self.encode_prompt( prompt2, encoder="t5", max_sequence_length=self.max_sequence_length @@ -809,7 +773,7 @@ def __exit__(ctx, exc_type, exc_val, exc_tb): # Pre-process input image and timestep for the img2img pipeline if input_image: input_image = self.image_processor.preprocess(input_image, height=image_height, width=image_width).to(self.device) - with LoadModelContext(["vae_encoder"], low_vram=self.low_vram): + with self.model_memory_manager(["vae_encoder"], low_vram=self.low_vram): image_latents = self.encode_image(input_image) timesteps, num_inference_steps = self.get_timesteps(self.denoising_steps, image_strength) @@ -833,7 +797,7 @@ def __exit__(ctx, exc_type, exc_val, exc_tb): ) # DiT denoiser - with LoadModelContext(["transformer"], low_vram=self.low_vram): + with self.model_memory_manager(["transformer"], low_vram=self.low_vram): latents = self.denoise_latent( latents, timesteps, @@ -845,7 +809,7 @@ def __exit__(ctx, exc_type, exc_val, exc_tb): ) # VAE decode latent - with LoadModelContext(["vae"], low_vram=self.low_vram): + with self.model_memory_manager(["vae"], low_vram=self.low_vram): latents = self._unpack_latents( latents, image_height, image_width, self.vae_scale_factor ) diff --git a/demo/Diffusion/demo_diffusion/pipeline/model_memory_manager.py b/demo/Diffusion/demo_diffusion/pipeline/model_memory_manager.py new file mode 100644 index 000000000..664447f4a --- /dev/null +++ b/demo/Diffusion/demo_diffusion/pipeline/model_memory_manager.py @@ -0,0 +1,73 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import torch +from cuda import cudart + + +class ModelMemoryManager: + """ + Context manager for efficiently loading and unloading models to optimize VRAM usage. + + This class provides a context to temporarily load models into GPU memory for inference + and automatically unload them afterward. It's especially useful in low VRAM environments + where models need to be swapped in and out of GPU memory. + + Args: + parent: The parent class instance that contains the model references and resources. + model_names (list): List of model names to load and unload. + low_vram (bool, optional): If True, enables VRAM optimization. If False, the context manager does nothing. Defaults to False. + """ + + def __init__(self, parent, model_names, low_vram=False): + self.parent = parent + self.model_names = model_names + self.low_vram = low_vram + + def __enter__(self): + if not self.low_vram: + return + for model_name in self.model_names: + if not self.parent.torch_fallback[model_name]: + # creating engine object (load from plan file) + self.parent.engine[model_name].load() + # allocate device memory + _, shared_device_memory = cudart.cudaMalloc(self.parent.device_memory_sizes[model_name]) + self.parent.shared_device_memory = shared_device_memory + # creating context + self.parent.engine[model_name].activate(device_memory=self.parent.shared_device_memory) + # creating input and output buffer + self.parent.engine[model_name].allocate_buffers( + shape_dict=self.parent.shape_dicts[model_name], device=self.parent.device + ) + else: + print(f"[I] Reloading torch model {model_name} from cpu.") + self.parent.torch_models[model_name] = self.parent.torch_models[model_name].to("cuda") + + def __exit__(self, exc_type, exc_val, exc_tb): + if not self.low_vram: + return + for model_name in self.model_names: + if not self.parent.torch_fallback[model_name]: + self.parent.engine[model_name].deallocate_buffers() + self.parent.engine[model_name].deactivate() + self.parent.engine[model_name].unload() + cudart.cudaFree(self.parent.shared_device_memory) + else: + print(f"[I] Offloading torch model {model_name} to cpu.") + self.parent.torch_models[model_name] = self.parent.torch_models[model_name].to("cpu") + torch.cuda.empty_cache() diff --git a/demo/Diffusion/demo_diffusion/pipeline/stable_diffusion_35_pipeline.py b/demo/Diffusion/demo_diffusion/pipeline/stable_diffusion_35_pipeline.py new file mode 100644 index 000000000..b5e692d0e --- /dev/null +++ b/demo/Diffusion/demo_diffusion/pipeline/stable_diffusion_35_pipeline.py @@ -0,0 +1,770 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import argparse +import inspect +import time +from typing import Any, List + +import tensorrt as trt +import torch +from cuda import cudart +from transformers import PreTrainedTokenizerBase + +from demo_diffusion import path as path_module +from demo_diffusion.model import ( + CLIPWithProjModel, + SD3TransformerModel, + T5Model, + VAEEncoderModel, + VAEModel, + make_tokenizer, +) +from demo_diffusion.pipeline.diffusion_pipeline import DiffusionPipeline +from demo_diffusion.pipeline.type import PIPELINE_TYPE + +TRT_LOGGER = trt.Logger(trt.Logger.ERROR) + +class StableDiffusion35Pipeline(DiffusionPipeline): + """ + Application showcasing the acceleration of Stable Diffusion 3.5 pipelines using Nvidia TensorRT. + """ + + def __init__( + self, + version: str, + pipeline_type=PIPELINE_TYPE.TXT2IMG, + guidance_scale: float = 7.0, + max_sequence_length: int = 256, + **kwargs, + ): + """ + Initializes the Stable Diffusion 3.5 pipeline. + + Args: + version (str): + The version of the pipeline. Should be one of ['3.5-medium', '3.5-large'] + pipeline_type (PIPELINE_TYPE): + Type of current pipeline. + guidance_scale (`float`, defaults to 7.0): + Guidance scale is enabled by setting as > 1. + Higher guidance scale encourages to generate images that are closely linked to the text prompt, usually at the expense of lower image quality. + max_sequence_length (`int`, defaults to 256): + Maximum sequence length to use with the `prompt`. + """ + super().__init__( + version=version, + pipeline_type=pipeline_type, + **kwargs + ) + + self.fp16 = True if not self.bf16 else False + + self.force_weakly_typed_t5 = False + self.config["clip_hidden_states"] = True + + self.guidance_scale = guidance_scale + self.do_classifier_free_guidance = self.guidance_scale > 1 + self.max_sequence_length = max_sequence_length + + @classmethod + def FromArgs(cls, args: argparse.Namespace, pipeline_type: PIPELINE_TYPE) -> StableDiffusion35Pipeline: + """Factory method to construct a `StableDiffusion35Pipeline` object from parsed arguments. + + Overrides: + DiffusionPipeline.FromArgs + """ + MAX_BATCH_SIZE = 4 + DEVICE = "cuda" + DO_RETURN_LATENTS = False + + # Resolve all paths. + dd_path = path_module.resolve_path( + cls.get_model_names(pipeline_type), args, pipeline_type, cls._get_pipeline_uid(args.version) + ) + + return cls( + dd_path=dd_path, + version=args.version, + pipeline_type=pipeline_type, + guidance_scale=args.guidance_scale, + max_sequence_length=args.max_sequence_length, + bf16=args.bf16, + low_vram=args.low_vram, + torch_fallback=args.torch_fallback, + weight_streaming=args.ws, + max_batch_size=MAX_BATCH_SIZE, + denoising_steps=args.denoising_steps, + scheduler=args.scheduler, + device=DEVICE, + output_dir=args.output_dir, + hf_token=args.hf_token, + verbose=args.verbose, + nvtx_profile=args.nvtx_profile, + use_cuda_graph=args.use_cuda_graph, + framework_model_dir=args.framework_model_dir, + return_latents=DO_RETURN_LATENTS, + torch_inference=args.torch_inference, + ) + + @classmethod + def get_model_names(cls, pipeline_type: PIPELINE_TYPE) -> List[str]: + """Return a list of model names used by this pipeline. + + Overrides: + DiffusionPipeline.get_model_names + """ + return ["clip_l", "clip_g", "t5", "transformer", "vae"] + + def download_onnx_models(self, model_name: str, model_config: dict[str, Any]) -> None: + raise ValueError("ONNX models download is not supported for the Stable Diffusion 3.5 pipeline") + + def load_resources( + self, + image_height: int, + image_width: int, + batch_size: int, + seed: int, + ): + super().load_resources(image_height, image_width, batch_size, seed) + + def _initialize_models(self, framework_model_dir, int8, fp8, fp4): + # Load text tokenizer(s) + self.tokenizer = make_tokenizer( + self.version, + self.pipeline_type, + self.hf_token, + framework_model_dir, + ) + self.tokenizer2 = make_tokenizer( + self.version, + self.pipeline_type, + self.hf_token, + framework_model_dir, + subfolder="tokenizer_2", + ) + self.tokenizer3 = make_tokenizer( + self.version, + self.pipeline_type, + self.hf_token, + framework_model_dir, + subfolder="tokenizer_3", + tokenizer_type="t5", + ) + + # Load pipeline models + models_args = { + "version": self.version, + "pipeline": self.pipeline_type, + "device": self.device, + "hf_token": self.hf_token, + "verbose": self.verbose, + "framework_model_dir": framework_model_dir, + "max_batch_size": self.max_batch_size, + } + + self.bf16 = True if int8 or fp8 or fp4 else self.bf16 + self.fp16 = True if not self.bf16 else False + if "clip_l" in self.stages: + self.models["clip_l"] = CLIPWithProjModel( + **models_args, + fp16=self.fp16, + bf16=self.bf16, + subfolder="text_encoder", + output_hidden_states=self.config.get("clip_hidden_states", False), + ) + + if "clip_g" in self.stages: + self.models["clip_g"] = CLIPWithProjModel( + **models_args, + fp16=self.fp16, + bf16=self.bf16, + subfolder="text_encoder_2", + output_hidden_states=self.config.get("clip_hidden_states", False), + ) + + if "t5" in self.stages: + # Known accuracy issues with FP16 + self.models["t5"] = T5Model( + **models_args, + fp16=self.fp16, + bf16=self.bf16, + subfolder="text_encoder_3", + text_maxlen=self.max_sequence_length, + build_strongly_typed=True, + weight_streaming=self.weight_streaming, + weight_streaming_budget_percentage=self.text_encoder_weight_streaming_budget_percentage, + ) + + if "transformer" in self.stages: + self.models["transformer"] = SD3TransformerModel( + **models_args, + bf16=self.bf16, + fp16=self.fp16, + text_maxlen=self.models["t5"].text_maxlen + self.models["clip_g"].text_maxlen, + build_strongly_typed=True, + weight_streaming=self.weight_streaming, + weight_streaming_budget_percentage=self.denoiser_weight_streaming_budget_percentage, + do_classifier_free_guidance=self.do_classifier_free_guidance, + ) + + if "vae" in self.stages: + self.models["vae"] = VAEModel(**models_args, fp16=self.fp16, tf32=True, bf16=self.bf16) + + self.vae_scale_factor = ( + 2 ** (len(self.models["vae"].config["block_out_channels"]) - 1) if "vae" in self.models else 8 + ) + self.patch_size = ( + self.models["transformer"].config["patch_size"] + if "transformer" in self.stages and self.models["transformer"] is not None + else 2 + ) + + if "vae_encoder" in self.stages: + self.models["vae_encoder"] = VAEEncoderModel(**models_args, fp16=False, tf32=self.tf32, bf16=self.bf16) + self.vae_latent_channels = ( + self.models["vae"].config["latent_channels"] + if "vae" in self.stages and self.models["vae"] is not None + else 16 + ) + + def print_summary(self, denoising_steps, walltime_ms): + print("|-----------------|--------------|") + print("| {:^15} | {:^12} |".format("Module", "Latency")) + print("|-----------------|--------------|") + if "vae_encoder" in self.stages: + print( + "| {:^15} | {:>9.2f} ms |".format( + "VAE Encoder", + cudart.cudaEventElapsedTime(self.events["vae_encode"][0], self.events["vae_encode"][1])[1], + ) + ) + print( + "| {:^15} | {:>9.2f} ms |".format( + "CLIP-G", cudart.cudaEventElapsedTime(self.events["clip_g"][0], self.events["clip_g"][1])[1] + ) + ) + print( + "| {:^15} | {:>9.2f} ms |".format( + "CLIP-L", cudart.cudaEventElapsedTime(self.events["clip_l"][0], self.events["clip_l"][1])[1] + ) + ) + print( + "| {:^15} | {:>9.2f} ms |".format( + "T5", cudart.cudaEventElapsedTime(self.events["t5"][0], self.events["t5"][1])[1] + ) + ) + print( + "| {:^15} | {:>9.2f} ms |".format( + "MMDiT" + " x " + str(denoising_steps), + cudart.cudaEventElapsedTime(self.events["transformer"][0], self.events["transformer"][1])[1], + ) + ) + print( + "| {:^15} | {:>9.2f} ms |".format( + "VAE Decoder", + cudart.cudaEventElapsedTime(self.events["vae"][0], self.events["vae"][1])[1], + ) + ) + print("|-----------------|--------------|") + print("| {:^15} | {:>9.2f} ms |".format("Pipeline", walltime_ms)) + print("|-----------------|--------------|") + print("Throughput: {:.2f} image/s".format(self.batch_size * 1000.0 / walltime_ms)) + + @staticmethod + def _tokenize( + tokenizer: PreTrainedTokenizerBase, + prompt: list[str], + max_sequence_length: int, + device: torch.device, + ): + text_input_ids = tokenizer( + prompt, + padding="max_length", + max_length=max_sequence_length, + truncation=True, + add_special_tokens=True, + return_tensors="pt", + ).input_ids + text_input_ids = text_input_ids.type(torch.int32) + + untruncated_ids = tokenizer( + prompt, + padding="longest", + return_tensors="pt", + ).input_ids.type(torch.int32) + + if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(text_input_ids, untruncated_ids): + removed_text = tokenizer.batch_decode(untruncated_ids[:, max_sequence_length - 1 : -1]) + TRT_LOGGER.warning( + "The following part of your input was truncated because `max_sequence_length` is set to " + f" {max_sequence_length} tokens: {removed_text}" + ) + text_input_ids = text_input_ids.to(device) + return text_input_ids + + def _get_prompt_embed( + self, + prompt: list[str], + encoder_name: str, + domain="positive_prompt", + ): + if encoder_name == "clip_l": + tokenizer = self.tokenizer + max_sequence_length = tokenizer.model_max_length + output_hidden_states = True + elif encoder_name == "clip_g": + tokenizer = self.tokenizer2 + max_sequence_length = tokenizer.model_max_length + output_hidden_states = True + elif encoder_name == "t5": + tokenizer = self.tokenizer3 + max_sequence_length = self.max_sequence_length + output_hidden_states = False + else: + raise NotImplementedError(f"encoder not found: {encoder_name}") + + self.profile_start(encoder_name, color="green", domain=domain) + + text_input_ids = self._tokenize( + tokenizer=tokenizer, + prompt=prompt, + device=self.device, + max_sequence_length=max_sequence_length, + ) + + text_hidden_states = None + if self.torch_inference or self.torch_fallback[encoder_name]: + outputs = self.torch_models[encoder_name]( + text_input_ids, + output_hidden_states=output_hidden_states, + ) + text_embeddings = outputs[0].clone() + if output_hidden_states: + text_hidden_states = outputs["hidden_states"][-2].clone() + else: + # NOTE: output tensor for the encoder must be cloned because it will be overwritten when called again for prompt2 + outputs = self.run_engine(encoder_name, {"input_ids": text_input_ids}) + text_embeddings = outputs["text_embeddings"].clone() + if output_hidden_states: + text_hidden_states = outputs["hidden_states"].clone() + + self.profile_stop(encoder_name) + return text_hidden_states, text_embeddings + + @staticmethod + def _duplicate_text_embed( + prompt_embed: torch.Tensor, + batch_size: int, + num_images_per_prompt: int, + pooled_prompt_embed: torch.Tensor | None = None, + ): + _, seq_len, _ = prompt_embed.shape + # duplicate text embeddings for each generation per prompt, using mps friendly method + prompt_embed = prompt_embed.repeat(1, num_images_per_prompt, 1) + prompt_embed = prompt_embed.view(batch_size * num_images_per_prompt, seq_len, -1) + + if pooled_prompt_embed is not None: + pooled_prompt_embed = pooled_prompt_embed.repeat(1, num_images_per_prompt, 1) + pooled_prompt_embed = pooled_prompt_embed.view(batch_size * num_images_per_prompt, -1) + + return prompt_embed, pooled_prompt_embed + + def encode_prompt( + self, + prompt: list[str], + negative_prompt: list[str] | None = None, + num_images_per_prompt: int = 1, + ): + clip_l_prompt_embed, clip_l_pooled_embed = self._get_prompt_embed( + prompt=prompt, + encoder_name="clip_l", + ) + prompt_embed, pooled_prompt_embed = self._duplicate_text_embed( + prompt_embed=clip_l_prompt_embed.clone(), + pooled_prompt_embed=clip_l_pooled_embed.clone(), + num_images_per_prompt=num_images_per_prompt, + batch_size=self.batch_size, + ) + + clip_g_prompt_embed, clip_g_pooled_embed = self._get_prompt_embed( + prompt=prompt, + encoder_name="clip_g", + ) + prompt_2_embed, pooled_prompt_2_embed = self._duplicate_text_embed( + prompt_embed=clip_g_prompt_embed.clone(), + pooled_prompt_embed=clip_g_pooled_embed.clone(), + batch_size=self.batch_size, + num_images_per_prompt=num_images_per_prompt, + ) + + _, t5_prompt_embed = self._get_prompt_embed( + prompt=prompt, + encoder_name="t5", + ) + + t5_prompt_embed, _ = self._duplicate_text_embed( + prompt_embed=t5_prompt_embed.clone(), + batch_size=self.batch_size, + num_images_per_prompt=num_images_per_prompt, + ) + + clip_prompt_embeds = torch.cat([prompt_embed, prompt_2_embed], dim=-1) + clip_prompt_embeds = torch.nn.functional.pad( + clip_prompt_embeds, (0, t5_prompt_embed.shape[-1] - clip_prompt_embeds.shape[-1]) + ) + prompt_embeds = torch.cat([clip_prompt_embeds, t5_prompt_embed], dim=-2) + pooled_prompt_embeds = torch.cat([pooled_prompt_embed, pooled_prompt_2_embed], dim=-1) + + if negative_prompt is None: + negative_prompt = "" + + clip_l_negative_prompt_embed, clip_l_negative_pooled_embed = self._get_prompt_embed( + prompt=negative_prompt, + encoder_name="clip_l", + ) + negative_prompt_embed, negative_pooled_prompt_embed = self._duplicate_text_embed( + prompt_embed=clip_l_negative_prompt_embed.clone(), + pooled_prompt_embed=clip_l_negative_pooled_embed.clone(), + batch_size=self.batch_size, + num_images_per_prompt=num_images_per_prompt, + ) + + clip_g_negative_prompt_embed, clip_g_negative_pooled_embed = self._get_prompt_embed( + prompt=negative_prompt, + encoder_name="clip_g", + ) + negative_prompt_2_embed, negative_pooled_prompt_2_embed = self._duplicate_text_embed( + prompt_embed=clip_g_negative_prompt_embed.clone(), + pooled_prompt_embed=clip_g_negative_pooled_embed.clone(), + batch_size=self.batch_size, + num_images_per_prompt=num_images_per_prompt, + ) + + _, t5_negative_prompt_embed = self._get_prompt_embed( + prompt=negative_prompt, + encoder_name="t5", + ) + + t5_negative_prompt_embed, _ = self._duplicate_text_embed( + prompt_embed=t5_negative_prompt_embed.clone(), + batch_size=self.batch_size, + num_images_per_prompt=num_images_per_prompt, + ) + + negative_clip_prompt_embeds = torch.cat([negative_prompt_embed, negative_prompt_2_embed], dim=-1) + negative_clip_prompt_embeds = torch.nn.functional.pad( + negative_clip_prompt_embeds, + (0, t5_negative_prompt_embed.shape[-1] - negative_clip_prompt_embeds.shape[-1]), + ) + negative_prompt_embeds = torch.cat([negative_clip_prompt_embeds, t5_negative_prompt_embed], dim=-2) + negative_pooled_prompt_embeds = torch.cat( + [negative_pooled_prompt_embed, negative_pooled_prompt_2_embed], dim=-1 + ) + + return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds + + @staticmethod + def initialize_latents( + batch_size: int, + num_channels_latents: int, + latent_height: int, + latent_width: int, + device: torch.device, + generator: torch.Generator, + dtype=torch.float32, + layout=torch.strided, + ) -> torch.Tensor: + latents_shape = (batch_size, num_channels_latents, latent_height, latent_width) + latents = torch.randn( + latents_shape, + dtype=dtype, + device="cuda", + generator=generator, + layout=layout, + ).to(device) + return latents + + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.retrieve_timesteps + @staticmethod + def retrieve_timesteps( + scheduler, + num_inference_steps: int | None = None, + device: str | torch.device | None = None, + timesteps: list[int] | None = None, + sigmas: list[float] | None = None, + **kwargs, + ): + r""" + Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles + custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`. + + Args: + scheduler (`SchedulerMixin`): + The scheduler to get timesteps from. + num_inference_steps (`int`): + The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps` + must be `None`. + device (`str` or `torch.device`, *optional*): + The device to which the timesteps should be moved to. If `None`, the timesteps are not moved. + timesteps (`List[int]`, *optional*): + Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed, + `num_inference_steps` and `sigmas` must be `None`. + sigmas (`List[float]`, *optional*): + Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed, + `num_inference_steps` and `timesteps` must be `None`. + + Returns: + `Tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the + second element is the number of inference steps. + """ + if timesteps is not None and sigmas is not None: + raise ValueError( + "Only one of `timesteps` or `sigmas` can be passed. Please choose one to set custom values" + ) + if timesteps is not None: + accepts_timesteps = "timesteps" in set(inspect.signature(scheduler.set_timesteps).parameters.keys()) + if not accepts_timesteps: + raise ValueError( + f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom" + f" timestep schedules. Please check whether you are using the correct scheduler." + ) + scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs) + timesteps = scheduler.timesteps + num_inference_steps = len(timesteps) + elif sigmas is not None: + accept_sigmas = "sigmas" in set(inspect.signature(scheduler.set_timesteps).parameters.keys()) + if not accept_sigmas: + raise ValueError( + f"The current scheduler class {scheduler.__class__}'s `set_timesteps` does not support custom" + f" sigmas schedules. Please check whether you are using the correct scheduler." + ) + scheduler.set_timesteps(sigmas=sigmas, device=device, **kwargs) + timesteps = scheduler.timesteps + num_inference_steps = len(timesteps) + else: + scheduler.set_timesteps(num_inference_steps, device=device, **kwargs) + timesteps = scheduler.timesteps + return timesteps, num_inference_steps + + def denoise_latents( + self, + latents: torch.Tensor, + prompt_embeds: torch.Tensor, + pooled_prompt_embeds: torch.Tensor, + timesteps: torch.FloatTensor, + guidance_scale: float, + denoiser="transformer", + ) -> torch.Tensor: + do_autocast = self.torch_inference != "" and self.models[denoiser].fp16 + with torch.autocast("cuda", enabled=do_autocast): + self.profile_start(denoiser, color="blue") + + for step_index, timestep in enumerate(timesteps): + # expand the latents as we are doing classifier free guidance + latents_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents + # broadcast to batch dimension in a way that's compatible with ONNX/Core ML + timestep_inp = timestep.expand(latents_model_input.shape[0]) + + params = { + "hidden_states": latents_model_input, + "timestep": timestep_inp, + "encoder_hidden_states": prompt_embeds, + "pooled_projections": pooled_prompt_embeds, + } + # Predict the noise residual + if self.torch_inference or self.torch_fallback[denoiser]: + noise_pred = self.torch_models[denoiser](**params)["sample"] + else: + noise_pred = self.run_engine(denoiser, params)["latent"] + + # perform guidance + if self.do_classifier_free_guidance: + noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + + # compute the previous noisy sample x_t -> x_t-1 + latents = self.scheduler.step(noise_pred, timestep, latents, return_dict=False)[0] + + self.profile_stop(denoiser) + return latents + + def decode_latents(self, latents: torch.Tensor, decoder="vae") -> torch.Tensor: + cast_to = ( + torch.float16 + if self.models[decoder].fp16 + else torch.bfloat16 + if self.models[decoder].bf16 + else torch.float32 + ) + latents = latents.to(dtype=cast_to) + self.profile_start(decoder, color="red") + if self.torch_inference or self.torch_fallback[decoder]: + images = self.torch_models[decoder](latents, return_dict=False)[0] + else: + images = self.run_engine(decoder, {"latent": latents})["images"] + self.profile_stop(decoder) + return images + + def infer( + self, + prompt: list[str], + negative_prompt: list[str], + image_height: int, + image_width: int, + warmup=False, + save_image=True, + ): + """ + Run the diffusion pipeline. + + Args: + prompt (list[str]): + The text prompt to guide image generation. + negative_prompt (list[str]): + The prompt not to guide the image generation. + image_height (int): + Height (in pixels) of the image to be generated. Must be a multiple of 8. + image_width (int): + Width (in pixels) of the image to be generated. Must be a multiple of 8. + warmup (bool): + Indicate if this is a warmup run. + save_image (bool): + Save the generated image (if applicable) + """ + assert len(prompt) == len(negative_prompt) + self.batch_size = len(prompt) + + # Spatial dimensions of latent tensor + assert image_height % (self.vae_scale_factor * self.patch_size) == 0, ( + f"image height not supported {image_height}" + ) + assert image_width % (self.vae_scale_factor * self.patch_size) == 0, f"image width not supported {image_width}" + latent_height = int(image_height) // self.vae_scale_factor + latent_width = int(image_width) // self.vae_scale_factor + + if self.generator and self.seed: + self.generator.manual_seed(self.seed) + + with torch.inference_mode(), trt.Runtime(TRT_LOGGER): + torch.cuda.synchronize() + e2e_tic = time.perf_counter() + + # 3. encode inputs + with self.model_memory_manager(["clip_g", "clip_l", "t5"], low_vram=self.low_vram): + ( + prompt_embeds, + negative_prompt_embeds, + pooled_prompt_embeds, + negative_pooled_prompt_embeds, + ) = self.encode_prompt( + prompt=prompt, + negative_prompt=negative_prompt, + num_images_per_prompt=1, + ) + # do classifier free guidance + if self.do_classifier_free_guidance: + prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0) + pooled_prompt_embeds = torch.cat([negative_pooled_prompt_embeds, pooled_prompt_embeds], dim=0) + + # 4. Prepare latent variables + num_channels_latents = self.models["transformer"].config["in_channels"] + latents = self.initialize_latents( + batch_size=self.batch_size, + num_channels_latents=num_channels_latents, + latent_height=latent_height, + latent_width=latent_width, + device=prompt_embeds.device, + generator=self.generator, + dtype=torch.float16 if self.fp16 else torch.bfloat16 if self.bf16 else torch.float32, + ) + + # 5. Prepare timesteps + timesteps, num_inference_steps = self.retrieve_timesteps( + scheduler=self.scheduler, + num_inference_steps=self.denoising_steps, + device=self.device, + sigmas=None, + ) + + # 7 Denoise + with self.model_memory_manager(["transformer"], low_vram=self.low_vram): + latents = self.denoise_latents( + latents=latents, + prompt_embeds=prompt_embeds, + pooled_prompt_embeds=pooled_prompt_embeds, + timesteps=timesteps, + guidance_scale=self.guidance_scale, + ) + + # Decode Latents + latents = (latents / self.models["vae"].config["scaling_factor"]) + self.models["vae"].config[ + "shift_factor" + ] + with self.model_memory_manager(["vae"], low_vram=self.low_vram): + images = self.decode_latents(latents) + + torch.cuda.synchronize() + e2e_toc = time.perf_counter() + + walltime_ms = (e2e_toc - e2e_tic) * 1000.0 + if not warmup: + self.print_summary( + num_inference_steps, + walltime_ms, + ) + if save_image: + # post-process images + images = ( + ((images + 1) * 255 / 2) + .clamp(0, 255) + .detach() + .permute(0, 2, 3, 1) + .round() + .type(torch.uint8) + .cpu() + .numpy() + ) + self.save_image(images, self.pipeline_type.name.lower(), prompt, self.seed) + + return images, walltime_ms + + def run( + self, + prompt: list[str], + negative_prompt: list[str], + height: int, + width: int, + batch_count: int, + num_warmup_runs: int, + use_cuda_graph: bool, + **kwargs, + ): + num_warmup_runs = max(1, num_warmup_runs) if use_cuda_graph else num_warmup_runs + if num_warmup_runs > 0: + print("[I] Warming up ..") + for _ in range(num_warmup_runs): + self.infer(prompt, negative_prompt, height, width, warmup=True, **kwargs) + + for _ in range(batch_count): + print("[I] Running StableDiffusion 3.5 pipeline") + if self.nvtx_profile: + cudart.cudaProfilerStart() + self.infer(prompt, negative_prompt, height, width, warmup=False, **kwargs) + if self.nvtx_profile: + cudart.cudaProfilerStop() diff --git a/demo/Diffusion/demo_txt2img_sd35.py b/demo/Diffusion/demo_txt2img_sd35.py new file mode 100644 index 000000000..6fbf9c91d --- /dev/null +++ b/demo/Diffusion/demo_txt2img_sd35.py @@ -0,0 +1,131 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse + +from cuda import cudart + +from demo_diffusion import dd_argparse +from demo_diffusion import pipeline as pipeline_module + + +def parseArgs(): + # Stable Diffusion 3.5 configuration + parser = argparse.ArgumentParser( + description="Options for Stable Diffusion 3.5 Txt2Img Demo", conflict_handler="resolve" + ) + parser = dd_argparse.add_arguments(parser) + parser.add_argument( + "--version", + type=str, + default="3.5-medium", + choices={"3.5-medium", "3.5-large"}, + help="Version of Stable Diffusion 3.5", + ) + parser.add_argument("--height", type=int, default=1024, help="Height of image to generate (must be multiple of 8)") + parser.add_argument("--width", type=int, default=1024, help="Height of image to generate (must be multiple of 8)") + parser.add_argument( + "--guidance-scale", + type=float, + default=7.0, + help="Value of classifier-free guidance scale (must be greater than 1)", + ) + parser.add_argument( + "--max-sequence-length", + type=int, + default=256, + help="Maximum sequence length to use with the prompt.", + ) + parser.add_argument("--denoising-steps", type=int, default=50, help="Number of denoising steps") + + return parser.parse_args() + +def process_demo_args(args): + batch_size = args.batch_size + prompt = args.prompt + negative_prompt = args.negative_prompt + # Process prompt + if not isinstance(prompt, list): + raise ValueError(f"`prompt` must be of type `str` list, but is {type(prompt)}") + prompt = prompt * batch_size + + if not isinstance(negative_prompt, list): + raise ValueError(f"`--negative-prompt` must be of type `str` list, but is {type(negative_prompt)}") + if len(negative_prompt) == 1: + negative_prompt = negative_prompt * batch_size + + if args.height % 8 != 0 or args.width % 8 != 0: + raise ValueError( + f"Image height and width have to be divisible by 8 but specified as: {args.image_height} and {args.width}." + ) + + max_batch_size = 4 + if args.batch_size > max_batch_size: + raise ValueError(f"Batch size {args.batch_size} is larger than allowed {max_batch_size}.") + + if args.use_cuda_graph and (not args.build_static_batch or args.build_dynamic_shape): + raise ValueError( + "Using CUDA graph requires static dimensions. Enable `--build-static-batch` and do not specify `--build-dynamic-shape`" + ) + + kwargs_run_demo = { + "prompt": prompt, + "negative_prompt": negative_prompt, + "height": args.height, + "width": args.width, + "batch_count": args.batch_count, + "num_warmup_runs": args.num_warmup_runs, + "use_cuda_graph": args.use_cuda_graph, + } + + return kwargs_run_demo + + +if __name__ == "__main__": + print("[I] Initializing Stable Diffusion 3.5 demo using TensorRT") + args = parseArgs() + + _, kwargs_load_engine, _ = dd_argparse.process_pipeline_args(args) + kwargs_run_demo = process_demo_args(args) + + # Initialize demo + demo = pipeline_module.StableDiffusion35Pipeline.FromArgs(args, pipeline_type=pipeline_module.PIPELINE_TYPE.TXT2IMG) + + # Load TensorRT engines and pytorch modules + demo.load_engines( + framework_model_dir=args.framework_model_dir, + **kwargs_load_engine, + ) + + if demo.low_vram: + demo.device_memory_sizes = demo.get_device_memory_sizes() + else: + _, shared_device_memory = cudart.cudaMalloc(demo.calculate_max_device_memory()) + demo.activate_engines(shared_device_memory) + + # Load resources + demo.load_resources( + image_height=args.height, + image_width=args.width, + batch_size=args.batch_size, + seed=args.seed, + ) + + # Run inference + demo.run(**kwargs_run_demo) + + demo.teardown() diff --git a/demo/Diffusion/docs/support_matrix.md b/demo/Diffusion/docs/support_matrix.md index 8044e381c..1d1a313ba 100644 --- a/demo/Diffusion/docs/support_matrix.md +++ b/demo/Diffusion/docs/support_matrix.md @@ -21,6 +21,8 @@ This demo supports Diffusion models that are popular in the Generative AI commun | Stable Diffusion | [XL 1.0-refiner](../README.md#generate-an-image-with-stable-diffusion-xl-guided-by-a-single-text-prompt) | | FP16 | N/A | [stabilityai/stable-diffusion-xl-refiner-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-refiner-1.0) | | Stable Diffusion | [XL-Turbo](../README.md#faster-text-to-image-using-sdxl-turbo) | | FP16 | N/A | [stabilityai/sdxl-turbo](https://huggingface.co/stabilityai/sdxl-turbo) | | Stable Diffusion | [3](../README.md#generate-an-image-guided-by-a-text-prompt-using-stable-diffusion-3) | | FP16 | N/A | [stabilityai/stable-diffusion-3-medium](https://huggingface.co/stabilityai/stable-diffusion-3-medium) | +| Stable Diffusion | [3.5-medium](../README.md#generate-an-image-guided-by-a-text-prompt-using-stable-diffusion-3) | | FP16, BF16 | N/A | [stabilityai/stable-diffusion-3-medium](https://huggingface.co/stabilityai/stable-diffusion-3-medium) | +| Stable Diffusion | [3.5-large](../README.md#generate-an-image-guided-by-a-text-prompt-using-stable-diffusion-3) | | FP16, BF16 | N/A | [stabilityai/stable-diffusion-3-large](https://huggingface.co/stabilityai/stable-diffusion-3-large) | | ControlNet | [1.5](../README.md#generate-an-image-with-controlnet-guided-by-images-and-text-prompts) | | FP16 | N/A | | | ControlNet | [XL 1.0-base](../README.md#generate-an-image-with-stable-diffusion-xl-guided-by-a-single-text-prompt) | | FP16 | N/A | [diffusers/controlnet-canny-sdxl-1.0](https://huggingface.co/diffusers/controlnet-canny-sdxl-1.0) | | Stable Video Diffusion | [XT-1.1](../README.md#generate-a-video-guided-by-an-initial-image-using-stable-video-diffusion) | | FP16, FP8 | N/A | [stabilityai/stable-video-diffusion-img2vid-xt-1-1](https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt-1-1) | diff --git a/docker/rockylinux8.Dockerfile b/docker/rockylinux8.Dockerfile index d9ebab001..8995f88ef 100644 --- a/docker/rockylinux8.Dockerfile +++ b/docker/rockylinux8.Dockerfile @@ -20,7 +20,7 @@ ARG CUDA_VERSION=12.9.0 FROM nvidia/cuda:${CUDA_VERSION}-devel-rockylinux8 LABEL maintainer="NVIDIA CORPORATION" -ENV TRT_VERSION 10.10.0.31 +ENV TRT_VERSION 10.11.0.33 SHELL ["/bin/bash", "-c"] # Setup user account @@ -51,15 +51,15 @@ RUN dnf install -y python38 python38-devel &&\ # Install TensorRT RUN if [ "${CUDA_VERSION:0:2}" = "11" ]; then \ - wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.10.0/tars/TensorRT-10.10.0.31.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - && tar -xf TensorRT-10.10.0.31.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - && cp -a TensorRT-10.10.0.31/lib/*.so* /usr/lib64 \ - && pip install TensorRT-10.10.0.31/python/tensorrt-10.10.0.31-cp38-none-linux_x86_64.whl ;\ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && tar -xf TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && cp -a TensorRT-10.11.0.33/lib/*.so* /usr/lib64 \ + && pip install TensorRT-10.11.0.33/python/tensorrt-10.11.0.33-cp38-none-linux_x86_64.whl ;\ elif [ "${CUDA_VERSION:0:2}" = "12" ]; then \ - wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.10.0/tars/TensorRT-10.10.0.31.Linux.x86_64-gnu.cuda-12.9.tar.gz \ - && tar -xf TensorRT-10.10.0.31.Linux.x86_64-gnu.cuda-12.9.tar.gz \ - && cp -a TensorRT-10.10.0.31/lib/*.so* /usr/lib64 \ - && pip install TensorRT-10.10.0.31/python/tensorrt-10.10.0.31-cp38-none-linux_x86_64.whl ;\ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-12.9.tar.gz \ + && tar -xf TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-12.9.tar.gz \ + && cp -a TensorRT-10.11.0.33/lib/*.so* /usr/lib64 \ + && pip install TensorRT-10.11.0.33/python/tensorrt-10.11.0.33-cp38-none-linux_x86_64.whl ;\ else \ echo "Invalid CUDA_VERSION"; \ exit 1; \ diff --git a/docker/rockylinux9.Dockerfile b/docker/rockylinux9.Dockerfile index cbb8f36df..7100e4c2a 100644 --- a/docker/rockylinux9.Dockerfile +++ b/docker/rockylinux9.Dockerfile @@ -20,7 +20,7 @@ ARG CUDA_VERSION=12.9.0 FROM nvidia/cuda:${CUDA_VERSION}-devel-rockylinux9 LABEL maintainer="NVIDIA CORPORATION" -ENV TRT_VERSION 10.10.0.31 +ENV TRT_VERSION 10.11.0.33 SHELL ["/bin/bash", "-c"] # Setup user account @@ -56,15 +56,15 @@ RUN dnf -y install \ # Install TensorRT RUN if [ "${CUDA_VERSION:0:2}" = "11" ]; then \ - wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.10.0/tars/TensorRT-10.10.0.31.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - && tar -xf TensorRT-10.10.0.31.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - && cp -a TensorRT-10.10.0.31/lib/*.so* /usr/lib64 \ - && pip install TensorRT-10.10.0.31/python/tensorrt-10.10.0.31-cp39-none-linux_x86_64.whl ;\ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && tar -xf TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && cp -a TensorRT-10.11.0.33/lib/*.so* /usr/lib64 \ + && pip install TensorRT-10.11.0.33/python/tensorrt-10.11.0.33-cp39-none-linux_x86_64.whl ;\ elif [ "${CUDA_VERSION:0:2}" = "12" ]; then \ - wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.10.0/tars/TensorRT-10.10.0.31.Linux.x86_64-gnu.cuda-12.9.tar.gz \ - && tar -xf TensorRT-10.10.0.31.Linux.x86_64-gnu.cuda-12.9.tar.gz \ - && cp -a TensorRT-10.10.0.31/lib/*.so* /usr/lib64 \ - && pip install TensorRT-10.10.0.31/python/tensorrt-10.10.0.31-cp39-none-linux_x86_64.whl ;\ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-12.9.tar.gz \ + && tar -xf TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-12.9.tar.gz \ + && cp -a TensorRT-10.11.0.33/lib/*.so* /usr/lib64 \ + && pip install TensorRT-10.11.0.33/python/tensorrt-10.11.0.33-cp39-none-linux_x86_64.whl ;\ else \ echo "Invalid CUDA_VERSION"; \ exit 1; \ diff --git a/docker/ubuntu-20.04.Dockerfile b/docker/ubuntu-20.04.Dockerfile index f06356f2b..940106d16 100644 --- a/docker/ubuntu-20.04.Dockerfile +++ b/docker/ubuntu-20.04.Dockerfile @@ -20,7 +20,7 @@ ARG CUDA_VERSION=12.9.0 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu20.04 LABEL maintainer="NVIDIA CORPORATION" -ENV TRT_VERSION 10.10.0.31 +ENV TRT_VERSION 10.11.0.33 SHELL ["/bin/bash", "-c"] # Setup user account @@ -70,15 +70,15 @@ RUN apt-get install -y --no-install-recommends \ # Install TensorRT RUN if [ "${CUDA_VERSION:0:2}" = "11" ]; then \ - wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.10.0/tars/TensorRT-10.10.0.31.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - && tar -xf TensorRT-10.10.0.31.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - && cp -a TensorRT-10.10.0.31/lib/*.so* /usr/lib/x86_64-linux-gnu \ - && pip install TensorRT-10.10.0.31/python/tensorrt-10.10.0.31-cp38-none-linux_x86_64.whl ;\ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && tar -xf TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && cp -a TensorRT-10.11.0.33/lib/*.so* /usr/lib/x86_64-linux-gnu \ + && pip install TensorRT-10.11.0.33/python/tensorrt-10.11.0.33-cp38-none-linux_x86_64.whl ;\ elif [ "${CUDA_VERSION:0:2}" = "12" ]; then \ - wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.10.0/tars/TensorRT-10.10.0.31.Linux.x86_64-gnu.cuda-12.9.tar.gz \ - && tar -xf TensorRT-10.10.0.31.Linux.x86_64-gnu.cuda-12.9.tar.gz \ - && cp -a TensorRT-10.10.0.31/lib/*.so* /usr/lib/x86_64-linux-gnu \ - && pip install TensorRT-10.10.0.31/python/tensorrt-10.10.0.31-cp38-none-linux_x86_64.whl ;\ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-12.9.tar.gz \ + && tar -xf TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-12.9.tar.gz \ + && cp -a TensorRT-10.11.0.33/lib/*.so* /usr/lib/x86_64-linux-gnu \ + && pip install TensorRT-10.11.0.33/python/tensorrt-10.11.0.33-cp38-none-linux_x86_64.whl ;\ else \ echo "Invalid CUDA_VERSION"; \ exit 1; \ diff --git a/docker/ubuntu-22.04-aarch64.Dockerfile b/docker/ubuntu-22.04-aarch64.Dockerfile index 3f2196214..970b3a987 100644 --- a/docker/ubuntu-22.04-aarch64.Dockerfile +++ b/docker/ubuntu-22.04-aarch64.Dockerfile @@ -20,7 +20,7 @@ ARG CUDA_VERSION=12.9.0 # Multi-arch container support available in non-cudnn containers. FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 -ENV TRT_VERSION 10.10.0.31 +ENV TRT_VERSION 10.11.0.33 SHELL ["/bin/bash", "-c"] # Setup user account diff --git a/docker/ubuntu-22.04.Dockerfile b/docker/ubuntu-22.04.Dockerfile index 152c5a7b5..83760e75c 100644 --- a/docker/ubuntu-22.04.Dockerfile +++ b/docker/ubuntu-22.04.Dockerfile @@ -20,7 +20,7 @@ ARG CUDA_VERSION=12.9.0 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 LABEL maintainer="NVIDIA CORPORATION" -ENV TRT_VERSION 10.10.0.31 +ENV TRT_VERSION 10.11.0.33 SHELL ["/bin/bash", "-c"] # Setup user account @@ -70,15 +70,15 @@ RUN apt-get install -y --no-install-recommends \ # Install TensorRT RUN if [ "${CUDA_VERSION:0:2}" = "11" ]; then \ - wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.10.0/tars/TensorRT-10.10.0.31.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - && tar -xf TensorRT-10.10.0.31.Linux.x86_64-gnu.cuda-11.8.tar.gz \ - && cp -a TensorRT-10.10.0.31/lib/*.so* /usr/lib/x86_64-linux-gnu \ - && pip install TensorRT-10.10.0.31/python/tensorrt-10.10.0.31-cp310-none-linux_x86_64.whl ;\ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && tar -xf TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-11.8.tar.gz \ + && cp -a TensorRT-10.11.0.33/lib/*.so* /usr/lib/x86_64-linux-gnu \ + && pip install TensorRT-10.11.0.33/python/tensorrt-10.11.0.33-cp310-none-linux_x86_64.whl ;\ elif [ "${CUDA_VERSION:0:2}" = "12" ]; then \ - wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.10.0/tars/TensorRT-10.10.0.31.Linux.x86_64-gnu.cuda-12.9.tar.gz \ - && tar -xf TensorRT-10.10.0.31.Linux.x86_64-gnu.cuda-12.9.tar.gz \ - && cp -a TensorRT-10.10.0.31/lib/*.so* /usr/lib/x86_64-linux-gnu \ - && pip install TensorRT-10.10.0.31/python/tensorrt-10.10.0.31-cp310-none-linux_x86_64.whl ;\ + wget https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.11.0/tars/TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-12.9.tar.gz \ + && tar -xf TensorRT-10.11.0.33.Linux.x86_64-gnu.cuda-12.9.tar.gz \ + && cp -a TensorRT-10.11.0.33/lib/*.so* /usr/lib/x86_64-linux-gnu \ + && pip install TensorRT-10.11.0.33/python/tensorrt-10.11.0.33-cp310-none-linux_x86_64.whl ;\ else \ echo "Invalid CUDA_VERSION"; \ exit 1; \ diff --git a/docker/ubuntu-cross-aarch64.Dockerfile b/docker/ubuntu-cross-aarch64.Dockerfile index f5b361361..bc225d6a8 100644 --- a/docker/ubuntu-cross-aarch64.Dockerfile +++ b/docker/ubuntu-cross-aarch64.Dockerfile @@ -21,7 +21,7 @@ ARG OS_VERSION=22.04 FROM nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${OS_VERSION} LABEL maintainer="NVIDIA CORPORATION" -ENV TRT_VERSION 10.10.0.31 +ENV TRT_VERSION 10.11.0.33 ENV DEBIAN_FRONTEND=noninteractive ARG uid=1000 diff --git a/include/NvInfer.h b/include/NvInfer.h index cfcd75e9c..c85aad73f 100644 --- a/include/NvInfer.h +++ b/include/NvInfer.h @@ -176,8 +176,6 @@ struct EnumMaxImpl //! must be less than 1GB in size to fit into a single subgraph. If the build option kGPU_FALLBACK is specified, then //! multiple subgraphs can be created, with each subgraph limited to less than 1GB of internal tensors data. //! -//! \warning The volume of the tensor must be less than 2^31 elements. If the tensor is a shape tensor, -//! its volume must not exceed 64. //! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and //! ABI. //! @@ -224,7 +222,7 @@ class ITensor : public INoCopy //! in the network, the dimensions of all dependent tensors will be recomputed. //! //! This call is only legal for network input tensors, since the dimensions of layer output tensors are inferred - //! based on layer inputs and parameters. The volume must be less than 2^31 elements. + //! based on layer inputs and parameters. //! //! \param dimensions The dimensions of the tensor. //! @@ -252,13 +250,32 @@ class ITensor : public INoCopy //! //! \brief Set the data type of a tensor. //! - //! \param type The data type of the tensor. + //! \param type The data type of the tensor when the type is not inferred. //! - //! The type is unchanged if the tensor is not a network input tensor, or marked as an output tensor or shape - //! output tensor. + //! For strongly typed networks, this method should be used only for network inputs, + //! since the types of all other tensors are inferred. Setting the type of a network + //! output is tolerated if the type equals the inferred type, otherwise an error occurs + //! and the type is not updated. + //! + //! For weakly typed networks, this method can be used for network outputs too, but + //! the type merely has to be implicitly convertible from the inferred type to the + //! specified type. In this case it does not matter whether the type is set first + //! or the tensor is marked as an output first (via `INetworkDefinition::markOutput` + //! or `INetworkDefinition::markOutputForShapes`). + //! + //! However, marking it first has two advantages: + //! + //! * It avoids warnings that the tensor is not yet a network I/O tensor. + //! * It causes method `getType()` to return the type that was set instead of the inferred type. //! //! \see getType() //! + //! \note This function does more than just set the type, so `t.setType(t.getType())` is not necessarily a no-op, + //! particularly for input and output tensors! + //! + //! \note Repeated consecutive applications of `t.setType(t.getType())` + //! would be idempotent, provided the state of the `ITensor` isn't changed between calls. + //! void setType(DataType type) noexcept { mImpl->setType(type); @@ -269,6 +286,9 @@ class ITensor : public INoCopy //! //! \return The data type of the tensor. //! + //! The type is the type set by `setType` if the tensor is a network input or output. + //! Otherwise the type is the inferred type. + //! //! \see setType() //! DataType getType() const noexcept @@ -3768,6 +3788,15 @@ class IRaggedSoftMaxLayer : public ILayer //! //! \brief A layer that represents the identity function. //! +//! For a strongly typed network, the layer is an identity function, i.e. the output +//! tensor elements are identical to the input tensor elements, possibly with a change +//! in layout. For example, if a network consists of a single IIdentityLayer, the network +//! input and output must have the same type, but the input can have NCHW layout and +//! the output can have NHWC layout. +//! +//! If the network is weakly typed, the layer is additionally permitted some type conversions +//! as described below. +//! //! If the output type is explicitly specified via setOutputType, IIdentityLayer can be //! used to convert from one type to another. Other than conversions between the same //! type (kFLOAT -> kFLOAT for example), the only valid conversions are: @@ -3783,10 +3812,18 @@ class IRaggedSoftMaxLayer : public ILayer //! //! Two types are compatible if they are identical, or are both in {kFLOAT, kHALF}. //! Implicit conversion between incompatible types, i.e. without using setOutputType, -//! is recognized as incorrect as of TensorRT 8.4, but is retained for API compatibility -//! within TensorRT 8.x releases. TensorRT 10.0 onwards it is an error if the network output tensor type is incompatible -//! with the layer output type. E.g., implicit conversion from kFLOAT to kINT32 is not allowed, Use -//! setOutputType(DataType::kINT32) to explict convert kFLOAT to kINT32. +//! was recognized as incorrect as of TensorRT 8.4, but was retained for API compatibility +//! within TensorRT 8.x releases. In TensorRT 10.0 onwards it is an error if the network +//! output tensor type is incompatible with the layer output type. E.g., implicit conversion +//! from kFLOAT to kINT32 is not allowed. +//! +//! To explicitly convert kFLOAT to kINT32: +//! +//! * Preferred: use ICastLayer. +//! +//! * Legacy alternative: use IIdentityLayer and setOutputType(DataType::kINT32). +//! +//! Similar advice applies for explicit conversion in the other direction. //! //! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI. //! @@ -4525,8 +4562,9 @@ class IIfConditionalInputLayer : public IIfConditionalBoundaryLayer //! The following constraints apply to If-conditionals: //! - Both the trueSubgraph and falseSubgraph must be defined. //! - The number of output tensors in both subgraphs is the same. -//! - Corresponding output tensors from the true/false subgraphs have the same type and shape. +//! - Corresponding output tensors from the true/false subgraphs have the same type and rank. //! +//! The subgraphs may directly use tensors defined outside of the IIfConditional. class IIfConditional : public INoCopy { public: @@ -4553,7 +4591,7 @@ class IIfConditional : public INoCopy //! Each output layer of an IIfConditional represents a single output of either the true-subgraph or the //! false-subgraph of an IIfConditional, depending on which subgraph was executed. //! - //! The shapes of the two tensors must be equal unless the condition is a build-time constant. + //! The ranks of the two tensors must be equal unless the condition is a build-time constant. //! //! \see IIfConditionalOutputLayer //! @@ -4815,6 +4853,7 @@ class IIteratorLayer : public ILoopBoundaryLayer //! which are crucial for iterative computations, such as RNNs for natural language processing and //! time-series analysis. //! +//! The subgraph may directly use tensors defined outside of the ILoop. class ILoop : public INoCopy { public: @@ -6639,7 +6678,6 @@ class INetworkDefinition : public INoCopy //! \brief Add an input tensor to the network. //! //! Each input and output tensor must have a unique name. - //! The volume must be less than 2^31 elements. //! //! For networks with wildcard dimensions, the volume //! is based on the maxima specified by an IOptimizationProfile.Dimensions are normally non-negative integers. The diff --git a/include/NvInferImpl.h b/include/NvInferImpl.h index 4eec6e809..4a0049f45 100644 --- a/include/NvInferImpl.h +++ b/include/NvInferImpl.h @@ -110,6 +110,7 @@ class IPluginFactory; class IPluginLayer; class IPluginRegistry; class IPluginV2Layer; +class IRuntimeConfig; namespace v_1_0 { @@ -208,6 +209,7 @@ enum class ExecutionContextAllocationStrategy : int32_t; enum class RuntimePlatform : int32_t; enum class TilingOptimizationLevel : int32_t; + using TacticSources = uint32_t; using TensorFormats = uint32_t; using BuilderFlags = uint32_t; @@ -331,6 +333,11 @@ class VOptimizationProfile : public VRoot virtual bool setExtraMemoryTarget(float target) noexcept = 0; virtual float getExtraMemoryTarget() const noexcept = 0; virtual bool isValid() const noexcept = 0; + // Added in TensorRT 10.11 + TRT_NODISCARD virtual bool setShapeValuesV2( + char const* inputName, OptProfileSelector select, int64_t const* values, int32_t nbValues) noexcept = 0; + TRT_NODISCARD virtual int64_t const* getShapeValuesV2( + char const* inputName, OptProfileSelector select) const noexcept = 0; }; class VCudaEngine : public VRoot @@ -397,6 +404,12 @@ class VCudaEngine : public VRoot virtual int64_t getWeightStreamingScratchMemorySize() const noexcept = 0; virtual int64_t getDeviceMemorySizeV2() const noexcept = 0; virtual int64_t getDeviceMemorySizeForProfileV2(int32_t profileIndex) const noexcept = 0; + // Added in TensorRT 10.11 + TRT_NODISCARD virtual int64_t const* getProfileTensorValuesV2( + char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept = 0; + TRT_NODISCARD virtual IExecutionContext* createExecutionContextWithRuntimeConfig( + IRuntimeConfig* runtimeConfig) noexcept = 0; + TRT_NODISCARD virtual IRuntimeConfig* createRuntimeConfig() noexcept = 0; }; class VExecutionContext : public VRoot @@ -452,6 +465,7 @@ class VExecutionContext : public VRoot // Added in TensorRT 10.1 virtual void setDeviceMemoryV2(void* memory, int64_t size) noexcept = 0; + TRT_NODISCARD virtual IRuntimeConfig* getRuntimeConfig() const noexcept = 0; }; class VEngineInspector : public VRoot @@ -1284,6 +1298,15 @@ class VBuilder : public VRoot virtual ICudaEngine* buildEngineWithConfig(INetworkDefinition& network, IBuilderConfig& config) noexcept = 0; }; +class VRuntimeConfig : public VRoot +{ +public: + virtual IRuntimeConfig* getPImpl() noexcept = 0; + virtual void setExecutionContextAllocationStrategy(ExecutionContextAllocationStrategy strategy) noexcept = 0; + virtual ExecutionContextAllocationStrategy getExecutionContextAllocationStrategy() const noexcept = 0; +}; + + } // namespace apiv } // namespace nvinfer1 diff --git a/include/NvInferRuntime.h b/include/NvInferRuntime.h index dd14c9f59..73dd97584 100644 --- a/include/NvInferRuntime.h +++ b/include/NvInferRuntime.h @@ -983,7 +983,7 @@ class IPluginV3OneBuild : public IPluginCapability //! For each format combination provided through configurePlugin(), up to a maximum of getFormatCombinationLimit(), //! the plugin will be timed for each tactic advertised through this method for that format combination. i.e. The //! plugin will be timed \f$N = \sum_{i=0}^{i using AllocatorFlags = uint32_t; //! DO NOT REFER TO namespace v_1_0 IN CODE. ALWAYS USE nvinfer1 INSTEAD. -//! The name v_1_0 may change in future versions of TensoRT. +//! The name v_1_0 may change in future versions of TensorRT. //! //! \class ILogger @@ -2572,7 +2572,7 @@ class IRefitter : public INoCopy //! The minimum and maximum specify the permitted range that is supported at runtime, while the optimum value //! is used for the kernel selection. This should be the "typical" value that is expected to occur at runtime. //! -//! \see IOptimizationProfile::setDimensions(), IOptimizationProfile::setShapeValues() +//! \see IOptimizationProfile::setDimensions(), IOptimizationProfile::setShapeValuesV2(), IOptimizationProfile::setShapeValues() //! enum class OptProfileSelector : int32_t { @@ -2674,7 +2674,7 @@ class IOptimizationProfile : public INoCopy //! i = 0, ..., nbValues - 1. Execution of the network must be valid for the optVals. //! //! Shape tensors are tensors that contribute to shape calculations in some way. While input shape tensors can be - //! type kINT32 or kINT64, the values used to set the minimum, optimium, and maximum values must fit in int32_t. + //! type kINT32 or kINT64, the values used to set the minimum, optimum, and maximum values must fit in int32_t. //! //! Examples: //! @@ -2703,7 +2703,12 @@ class IOptimizationProfile : public INoCopy //! //! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator. //! - bool setShapeValues( + //! \warning When setShapeValuesV2 is called after setShapeValues, a following call to getShapeValues will + //! return nullptr. Vice versa, a call to setShapeValues undoes the effects of setShapeValuesV2. + //! + //! \deprecated Deprecated in TensorRT 10.11. Superseded by setShapeValuesV2(). + //! + TRT_DEPRECATED bool setShapeValues( char const* inputName, OptProfileSelector select, int32_t const* values, int32_t nbValues) noexcept { return mImpl->setShapeValues(inputName, select, values, nbValues); @@ -2729,7 +2734,9 @@ class IOptimizationProfile : public INoCopy //! //! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator. //! - int32_t const* getShapeValues(char const* inputName, OptProfileSelector select) const noexcept + //! \deprecated Deprecated in TensorRT 10.11. Superseded by getShapeValuesV2(). + //! + TRT_DEPRECATED int32_t const* getShapeValues(char const* inputName, OptProfileSelector select) const noexcept { return mImpl->getShapeValues(inputName, select); } @@ -2781,6 +2788,69 @@ class IOptimizationProfile : public INoCopy return mImpl->isValid(); } + //! + //! \brief Set the minimum / optimum / maximum values for an input shape tensor. + //! + //! This function must be called three times for every input tensor t that is a shape tensor (t.isShape() == true). + //! This implies that the dimensions of t are fixed at network definition time and the volume does not exceed 64. + //! This function must not be called for any input tensor that is not a shape tensor. + //! + //! Each time this function is called for the same input tensor, the same nbValues must be supplied (either 1 + //! if the tensor rank is 0, or dims.d[0] if the rank is 1). Furthermore, if minVals, optVals, maxVals are the + //! minimum, optimum, and maximum values, it must be true that minVals[i] <= optVals[i] <= maxVals[i] for + //! i = 0, ..., nbValues - 1. Execution of the network must be valid for the optVals. + //! + //! Shape tensors are tensors that contribute to shape calculations in some way. While input shape tensors can be + //! type kINT32 or kINT64, the values used to set the minimum, optimum, and maximum values must fit in int64_t. + //! + //! Examples: + //! + //! * A shape tensor used as the second input to IShuffleLayer can contain a -1 wildcard. + //! The corresponding minVal[i] should be -1. + //! + //! * A shape tensor used as the stride input to ISliceLayer can contain any valid strides. + //! The values could be positive, negative, or zero. + //! + //! * A shape tensor subtracted from zero to compute the size input of an ISliceLayer can + //! contain any non-positive values that yield a valid slice operation. + //! + //! Tightening the minVals and maxVals bounds to cover only values that are necessary may help optimization. + //! + //! \param inputName The input tensor name + //! \param select Whether to set the minimum, optimum, or maximum input values. + //! \param values An array of length nbValues containing the minimum, optimum, or maximum shape tensor elements. + //! For multidimensional tensors, the array is in row-major order. + //! \param nbValues The length of the value array, which must equal the number of shape tensor elements (>= 1) + //! + //! \return false if an inconsistency was detected (e.g. nbValues does not match a previous call for the same + //! tensor), else true. As for setDimensions(), a full validation can only be performed at engine build + //! time. + //! + //! \warning If run on DLA, minimum, optimum, and maximum shape values must to be the same. + //! + //! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator. + //! + //! \warning When setShapeValues is called after setShapeValuesV2, input shape would be overwritten as 32 bit + //! and getShapeValuesV2 would return nullptr. + //! + bool setShapeValuesV2( + char const* inputName, OptProfileSelector select, int64_t const* values, int32_t nbValues) noexcept + { + return mImpl->setShapeValuesV2(inputName, select, values, nbValues); + } + + //! + //! \brief Get the minimum / optimum / maximum values for an input shape tensor. + //! + //! If the shape values have not been set previously with setShapeValuesV2(), this returns nullptr. + //! + //! \warning The string inputName must be null-terminated, and be at most 4096 bytes including the terminator. + //! + int64_t const* getShapeValuesV2(char const* inputName, OptProfileSelector select) const noexcept + { + return mImpl->getShapeValuesV2(inputName, select); + } + protected: apiv::VOptimizationProfile* mImpl; virtual ~IOptimizationProfile() noexcept = default; @@ -2993,6 +3063,43 @@ constexpr inline int32_t EnumMax() noexcept return 3; } + +//! \class IRuntimeConfig +//! +//! \brief A class for runtime configuration. This class is used during execution context creation. +//! +//! \see IRuntime, IBuilderConfig +//! +class IRuntimeConfig : public INoCopy +{ +public: + virtual ~IRuntimeConfig() noexcept = default; + + //! + //! \brief Set the execution context allocation strategy. Default value is kSTATIC. + //! + //! \param strategy The execution context allocation strategy. + //! + void setExecutionContextAllocationStrategy(ExecutionContextAllocationStrategy strategy) noexcept + { + return mImpl->setExecutionContextAllocationStrategy(strategy); + } + + //! + //! \brief Get the execution context allocation strategy. + //! + //! \return The execution context allocation strategy. + //! + ExecutionContextAllocationStrategy getExecutionContextAllocationStrategy() const noexcept + { + return mImpl->getExecutionContextAllocationStrategy(); + } + + +protected: + apiv::VRuntimeConfig* mImpl; +}; // class IRuntimeConfig + //! //! \class ICudaEngine //! @@ -3144,6 +3251,31 @@ class ICudaEngine : public INoCopy return mImpl->createExecutionContextWithoutDeviceMemory(); } + //! + //! \brief Create an execution context with TensorRT JIT runtime config. + //! + //! \param runtimeConfig The runtime config for TensorRT JIT. + //! + //! \see IRuntimeConfig + //! + IExecutionContext* createExecutionContext(IRuntimeConfig* runtimeConfig) noexcept + { + return mImpl->createExecutionContextWithRuntimeConfig(runtimeConfig); + } + + //! + //! \brief Create a runtime config for TensorRT JIT. + //! The caller is responsible for ownership of the returned IRuntimeConfig object. + //! + //! \return A IRuntimeConfig object. + //! + //! \see IRuntimeConfig + //! + IRuntimeConfig* createRuntimeConfig() noexcept + { + return mImpl->createRuntimeConfig(); + } + //! //! \brief Return the maximum device memory required by the context over all profiles. //! @@ -3460,8 +3592,11 @@ class ICudaEngine : public INoCopy //! //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator. //! - int32_t const* getProfileTensorValues(char const* tensorName, int32_t profileIndex, OptProfileSelector select) const - noexcept + //! \deprecated Deprecated in TensorRT 10.11. Superseded by getProfileTensorValuesV2(). + //! \warning If input shapes are set with setShapeValuesV2, getProfileTensorValues will return nullptr + //! + TRT_DEPRECATED int32_t const* getProfileTensorValues( + char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept { return mImpl->getProfileTensorValues(tensorName, profileIndex, select); } @@ -3677,7 +3812,7 @@ class ICudaEngine : public INoCopy //! //! \return true if the memory limit is valid and the call was successful, false otherwise. //! - //! \deprecated Deprecated in TensorRT 10.1. Superceded by setWeightStreamingBudgetV2(). + //! \deprecated Deprecated in TensorRT 10.1. Superseded by setWeightStreamingBudgetV2(). //! //! \see BuilderFlag::kWEIGHT_STREAMING //! \see getWeightStreamingBudget() @@ -3697,7 +3832,7 @@ class ICudaEngine : public INoCopy //! \returns The weight streaming budget in bytes. Please see setWeightStreamingBudget() for the possible //! values. //! - //! \deprecated Deprecated in TensorRT 10.1. Superceded by getWeightStreamingBudgetV2(). + //! \deprecated Deprecated in TensorRT 10.1. Superseded by getWeightStreamingBudgetV2(). //! //! \see BuilderFlag::kWEIGHT_STREAMING, //! \see setWeightStreamingBudget() @@ -3875,6 +4010,31 @@ class ICudaEngine : public INoCopy return mImpl->isDebugTensor(name); } + //! + //! \brief Get the minimum / optimum / maximum values (not dimensions) for an input tensor given + //! its name under an optimization profile. These correspond to the values set using + //! IOptimizationProfile::setShapeValuesV2 when the engine was built. + //! + //! \param tensorName The name of an input tensor. + //! + //! \param profileIndex The profile index, which must be between 0 and getNbOptimizationProfiles()-1. + //! + //! \param select Whether to query the minimum, optimum, or maximum values for this input tensor. + //! + //! \return The minimum / optimum / maximum values for an input tensor in this profile. If the profileIndex is + //! invalid or the provided name does not map to an input tensor, or the tensor is not a shape binding, return + //! nullptr. + //! + //! \warning The string tensorName must be null-terminated, and be at most 4096 bytes including the terminator. + //! + //! \warning If input shapes are set with setShapeValues, getProfileTensorValuesV2 will return nullptr + //! + int64_t const* getProfileTensorValuesV2( + char const* tensorName, int32_t profileIndex, OptProfileSelector select) const noexcept + { + return mImpl->getProfileTensorValuesV2(tensorName, profileIndex, select); + } + protected: apiv::VCudaEngine* mImpl; }; @@ -3898,7 +4058,7 @@ class IOutputAllocator : public IVersionedInterface //! If currentMemory is known to be big enough, one option is to return currentMemory. //! //! \param tensorName name of the output tensor. - //! \param currentMemory points to the address set by IExectionContext::setTensorAddress. + //! \param currentMemory points to the address set by IExecutionContext::setTensorAddress. //! \param size number of bytes required. Always positive, even for an empty tensor. //! \param alignment required alignment of the allocation. //! @@ -4605,7 +4765,7 @@ class IExecutionContext : public INoCopy //! //! \param event The CUDA event that is triggered after all input tensors have been consumed. //! - //! \warning The set event must be valid during the inferece. + //! \warning The set event must be valid during the inference. //! //! \return True on success, false if error occurred. //! @@ -4888,6 +5048,16 @@ class IExecutionContext : public INoCopy return mImpl->getDebugState(name); } + //! + //! \brief Get the runtime config object used during execution context creation. + //! + //! \return The runtime config object. + //! + IRuntimeConfig* getRuntimeConfig() const noexcept + { + return mImpl->getRuntimeConfig(); + } + protected: apiv::VExecutionContext* mImpl; }; // class IExecutionContext diff --git a/include/NvInferRuntimeBase.h b/include/NvInferRuntimeBase.h index c4a768bb0..e653dc03a 100644 --- a/include/NvInferRuntimeBase.h +++ b/include/NvInferRuntimeBase.h @@ -26,6 +26,7 @@ // Items that are marked as deprecated will be removed in a future release. #if __cplusplus >= 201402L #define TRT_DEPRECATED [[deprecated]] +#define TRT_DEPRECATED_BECAUSE(REASON) [[deprecated(REASON)]] #define TRT_DEPRECATED_ENUM TRT_DEPRECATED #ifdef _MSC_VER #define TRT_DEPRECATED_API __declspec(dllexport) @@ -42,6 +43,19 @@ #define TRT_DEPRECATED_ENUM #define TRT_DEPRECATED_API __attribute__((deprecated, visibility("default"))) #endif +#define TRT_DEPRECATED_BECAUSE(REASON) TRT_DEPRECATED +#endif + +//! A stand-in for `[[nodiscard]]` and `[[nodiscard(REASON)]]` that works with older compilers. +#if __cplusplus >= 201907L +#define TRT_NODISCARD [[nodiscard]] +#define TRT_NODISCARD_BECAUSE(REASON) [[nodiscard(REASON)]] +#elif __cplusplus >= 201603L +#define TRT_NODISCARD [[nodiscard]] +#define TRT_NODISCARD_BECAUSE(REASON) [[nodiscard]] +#else +#define TRT_NODISCARD +#define TRT_NODISCARD_BECAUSE(REASON) #endif // Defines which symbols are exported diff --git a/include/NvInferVersion.h b/include/NvInferVersion.h index 7676704a9..9ccebf5dd 100644 --- a/include/NvInferVersion.h +++ b/include/NvInferVersion.h @@ -23,10 +23,14 @@ #ifndef NV_INFER_VERSION_H #define NV_INFER_VERSION_H -#define NV_TENSORRT_MAJOR 10 //!< TensorRT major version. -#define NV_TENSORRT_MINOR 10 //!< TensorRT minor version. -#define NV_TENSORRT_PATCH 0 //!< TensorRT patch version. -#define NV_TENSORRT_BUILD 31 //!< TensorRT build number. +#define TRT_MAJOR_ENTERPRISE 10 +#define TRT_MINOR_ENTERPRISE 11 +#define TRT_PATCH_ENTERPRISE 0 +#define TRT_BUILD_ENTERPRISE 33 +#define NV_TENSORRT_MAJOR TRT_MAJOR_ENTERPRISE //!< TensorRT major version. +#define NV_TENSORRT_MINOR TRT_MINOR_ENTERPRISE //!< TensorRT minor version. +#define NV_TENSORRT_PATCH TRT_PATCH_ENTERPRISE //!< TensorRT patch version. +#define NV_TENSORRT_BUILD TRT_BUILD_ENTERPRISE //!< TensorRT build number. #define NV_TENSORRT_LWS_MAJOR 0 //!< TensorRT LWS major version. #define NV_TENSORRT_LWS_MINOR 0 //!< TensorRT LWS minor version. diff --git a/plugin/CMakeLists.txt b/plugin/CMakeLists.txt index d15256c01..853c78073 100644 --- a/plugin/CMakeLists.txt +++ b/plugin/CMakeLists.txt @@ -19,6 +19,18 @@ if (${TRT_BUILD_ENABLE_NEW_PLUGIN_FLOW}) option(TRT_BUILD_INCLUDE_BERT_QKV_PLUGIN "Build the BERT QKV to Context Plugin and related plugins." ON) +# Create the main object library, which is shared between plugin, plugin_internal, and plugin_static. +add_library(trt_plugins OBJECT) +function(add_plugin_source) + target_sources(trt_plugins PRIVATE ${ARGN}) +endfunction() + +# Create the VC object lib, used by vc and vc_static. +add_library(trt_vc_plugins OBJECT) +function(add_vc_plugin_source) + target_sources(trt_vc_plugins PRIVATE ${ARGN}) +endfunction() + set(TRT_PLUGIN_NAMES batchedNMSPlugin batchTilePlugin @@ -69,20 +81,6 @@ if(${TRT_BUILD_INCLUDE_BERT_QKV_PLUGIN}) ) endif() -add_library(tensorrt_plugins SHARED) -add_library(tensorrt_plugins_internal SHARED) - -function(add_plugin_source) - target_sources(tensorrt_plugins PRIVATE ${ARGN}) - target_sources(tensorrt_plugins_internal PRIVATE ${ARGN}) -endfunction() - -add_library(tensorrt_vc_plugins SHARED) - -function(add_vc_plugin_source) - target_sources(tensorrt_vc_plugins PRIVATE ${ARGN}) -endfunction() - add_subdirectory(api) add_subdirectory(vc) add_subdirectory(common) @@ -91,6 +89,46 @@ foreach(PLUGIN_NAME IN LISTS TRT_PLUGIN_NAMES) add_subdirectory(${PLUGIN_NAME}) endforeach() +set(trt_plugin_include_dirs + ${TensorRT_SOURCE_DIR}/externals + ${CMAKE_CURRENT_LIST_DIR} +) + +target_include_directories(trt_plugins PUBLIC ${trt_plugin_include_dirs}) +target_include_directories(trt_vc_plugins PUBLIC ${trt_plugin_include_dirs}) + +# Use the compile-time dependencies of TRT when compiling the objects before the link stage. +# The final targets will be responsible for selecting the target TRT distribution to use. +target_link_libraries(trt_plugins PRIVATE $) +target_link_libraries(trt_vc_plugins PRIVATE $) + +# Use true link dependencies on the global definitions and cudart_static. +target_link_libraries(trt_plugins PUBLIC trt_global_definitions CUDA::cudart_static) +target_link_libraries(trt_vc_plugins PUBLIC trt_global_definitions CUDA::cudart_static) + +foreach(SM IN LISTS CMAKE_CUDA_ARCHITECTURES) + target_compile_definitions(trt_plugins PUBLIC "ENABLE_SM${SM}") + target_compile_definitions(trt_vc_plugins PUBLIC "ENABLE_SM${SM}") +endforeach() + +target_compile_options(trt_plugins PUBLIC $<$:--expt-relaxed-constexpr>) +target_compile_options(trt_vc_plugins PUBLIC $<$:--expt-relaxed-constexpr>) + +# Create all the library targets, reusing the objects we've compiled in the first step. +add_library(tensorrt_plugins SHARED $) +add_library(tensorrt_plugins_internal SHARED $) +add_library(tensorrt_plugins_static STATIC $) +add_library(tensorrt_vc_plugins SHARED $) +add_library(tensorrt_vc_plugins_static STATIC $) + +target_compile_definitions(tensorrt_vc_plugins PRIVATE + COMPILE_VFC_PLUGIN=1 +) + +target_compile_definitions(tensorrt_vc_plugins_static PRIVATE + COMPILE_VFC_PLUGIN=1 +) + if (NOT MSVC) set(trt_plugins_link_options "LINKER:--version-script=${CMAKE_CURRENT_LIST_DIR}/exports.map" @@ -117,20 +155,9 @@ if(NOT MSVC) ) endif() -set(trt_plugin_include_dirs - ${TensorRT_SOURCE_DIR}/externals - ${CMAKE_CURRENT_LIST_DIR} -) - -set(trt_plugin_compile_options - $<$:--expt-relaxed-constexpr> -) - -# Target properties for tensorrt_plugins -target_include_directories(tensorrt_plugins PRIVATE ${trt_plugin_include_dirs}) +### TRT Plugin Setup target_link_libraries(tensorrt_plugins PRIVATE ${trt_plugin_dependencies}) target_link_options(tensorrt_plugins PRIVATE ${trt_plugins_link_options}) -target_compile_options(tensorrt_plugins PRIVATE ${trt_plugin_compile_options}) set_target_properties( tensorrt_plugins @@ -155,12 +182,11 @@ else() set(trt_plugins_internal_link_options) endif() -# Target properties for tensorrt_plugins_internal +### Internal Plugin Setup # This library is effectively the same as tensorrt_plugins, but without stripped symbols. target_include_directories(tensorrt_plugins_internal PUBLIC ${trt_plugin_include_dirs}) target_link_libraries(tensorrt_plugins_internal PRIVATE ${trt_plugin_dependencies}) target_link_options(tensorrt_plugins_internal PRIVATE ${trt_plugins_internal_link_options}) -target_compile_options(tensorrt_plugins_internal PRIVATE ${trt_plugin_compile_options}) set_target_properties( tensorrt_plugins_internal @@ -172,6 +198,33 @@ set_target_properties( SOVERSION ${TRT_MAJOR} LINK_DEPENDS ${TensorRT_SOURCE_DIR}/Exports-plugin_internal.map) + +### Static Plugin Setup +set(trt_plugin_static_dependencies + tensorrt_static + CUDA::cudart_static + trt_global_definitions +) + +target_include_directories(tensorrt_plugins_static PRIVATE ${trt_plugin_include_dirs}) +target_link_libraries(tensorrt_plugins_static PRIVATE ${trt_plugin_static_dependencies}) + +set_target_properties( + tensorrt_plugins_static + PROPERTIES CXX_VISIBILITY_PRESET hidden + VISIBILITY_INLINES_HIDDEN ON + OUTPUT_NAME nvinfer_plugin_static + VERSION ${TensorRT_VERSION} + SOVERSION ${TRT_MAJOR} + LINK_DEPENDS ${CMAKE_CURRENT_LIST_DIR}/exports.map) + +if(NOT ${TRT_BUILD_ENABLE_STATIC_LIBS}) + set_target_properties(tensorrt_plugins_static + PROPERTIES EXCLUDE_FROM_ALL ON + ) +endif() + +### VC Plugin Setup if (NOT MSVC) set(trt_vc_plugins_link_options "LINKER:--version-script=${CMAKE_CURRENT_LIST_DIR}/exports-vfc_plugin.map" @@ -190,7 +243,6 @@ endif() target_include_directories(tensorrt_vc_plugins PRIVATE ${trt_plugin_include_dirs}) target_link_libraries(tensorrt_vc_plugins PRIVATE ${trt_plugin_dependencies}) target_link_options(tensorrt_vc_plugins PRIVATE ${trt_vc_plugins_link_options}) -target_compile_options(tensorrt_vc_plugins PRIVATE ${trt_plugin_compile_options}) set_target_properties( tensorrt_vc_plugins @@ -201,14 +253,27 @@ set_target_properties( SOVERSION ${TRT_MAJOR} LINK_DEPENDS ${CMAKE_CURRENT_LIST_DIR}/exports-vfc_plugin.map) -foreach(SM IN LISTS CMAKE_CUDA_ARCHITECTURES) - target_compile_definitions(tensorrt_plugins PRIVATE "ENABLE_SM${SM}") - target_compile_definitions(tensorrt_plugins_internal PRIVATE "ENABLE_SM${SM}") - target_compile_definitions(tensorrt_vc_plugins PRIVATE "ENABLE_SM${SM}") -endforeach() +### VC Plugin Static Setup +target_include_directories(tensorrt_vc_plugins_static PRIVATE ${trt_plugin_include_dirs}) +target_link_libraries(tensorrt_vc_plugins_static PRIVATE ${trt_plugin_static_dependencies}) + +set_target_properties( + tensorrt_vc_plugins_static + PROPERTIES CXX_VISIBILITY_PRESET hidden + VISIBILITY_INLINES_HIDDEN ON + OUTPUT_NAME nvinfer_vc_plugin_static + VERSION ${TensorRT_VERSION} + SOVERSION ${TRT_MAJOR} + LINK_DEPENDS ${CMAKE_CURRENT_LIST_DIR}/exports-vfc_plugin.map) + +if(NOT ${TRT_BUILD_ENABLE_STATIC_LIBS}) + set_target_properties(tensorrt_vc_plugins_static + PROPERTIES EXCLUDE_FROM_ALL ON + ) +endif() install( - TARGETS tensorrt_plugins tensorrt_plugins_internal tensorrt_vc_plugins + TARGETS tensorrt_plugins tensorrt_plugins_static tensorrt_plugins_internal tensorrt_vc_plugins tensorrt_vc_plugins_static OPTIONAL ) diff --git a/plugin/README.md b/plugin/README.md index 4f13c98af..999c75b0c 100644 --- a/plugin/README.md +++ b/plugin/README.md @@ -15,7 +15,8 @@ | [cropAndResizePlugin](cropAndResizePlugin) | CropAndResizeDynamic | 1 | | [decodeBbox3DPlugin](decodeBbox3DPlugin) | DecodeBbox3DPlugin | 1 | | [detectionLayerPlugin](detectionLayerPlugin) | DetectionLayer_TRT | 1 | -| [disentangledAttentionPlugin](disentangledAttentionPlugin) | DisentangledAttention_TRT | 1 | +| [disentangledAttentionPlugin](disentangledAttentionPlugin) [DEPRECATED] | DisentangledAttention_TRT | 1 | +| [disentangledAttentionPlugin](disentangledAttentionPlugin) | DisentangledAttention_TRT | 2 | | [efficientNMSPlugin](efficientNMSPlugin) | EfficientNMS_TRT | 1 | | [efficientNMSONNXPlugin](efficientNMSPlugin) [DEPRECATED] | EfficientNMS_ONNX_TRT | 1 | | [embLayerNormPlugin](embLayerNormPlugin) [DEPRECATED]| CustomEmbLayerNormPluginDynamic | 1, 2, 3 | @@ -33,7 +34,8 @@ | [modulatedDeformConvPlugin](modulatedDeformConvPlugin) | ModulatedDeformConv2d | 1 | | [multilevelCropAndResizePlugin](multilevelCropAndResizePlugin) | MultilevelCropAndResize_TRT | 1 | | [multilevelProposeROI](multilevelProposeROI) | MultilevelProposeROI_TRT | 1 | -| [multiscaleDeformableAttnPlugin](multiscaleDeformableAttnPlugin) | MultiscaleDeformableAttnPlugin_TRT | 1 | +| [multiscaleDeformableAttnPlugin](multiscaleDeformableAttnPlugin) [DEPRECATED] | MultiscaleDeformableAttnPlugin_TRT | 1 | +| [multiscaleDeformableAttnPlugin](multiscaleDeformableAttnPlugin) | MultiscaleDeformableAttnPlugin_TRT | 2 | | [nmsPlugin](nmsPlugin) [DEPRECATED] | NMS_TRT | 1 | | [nmsPlugin](nmsPlugin) [DEPRECATED] | NMSDynamic_TRT | 1 | | [normalizePlugin](normalizePlugin) [DEPRECATED] | Normalize_TRT | 1 | diff --git a/plugin/api/inferPlugin.cpp b/plugin/api/inferPlugin.cpp index 8aa0027a8..31d3552b4 100644 --- a/plugin/api/inferPlugin.cpp +++ b/plugin/api/inferPlugin.cpp @@ -19,7 +19,6 @@ #include "common/checkMacrosPlugin.h" #include "common/plugin.h" #include "roiAlignPlugin/roiAlignPlugin.h" -#if !TRT_WINML #include "batchTilePlugin/batchTilePlugin.h" #include "batchedNMSPlugin/batchedNMSPlugin.h" #include "clipPlugin/clipPlugin.h" @@ -37,9 +36,11 @@ #include "instanceNormalizationPlugin/instanceNormalizationPluginLegacy.h" #include "leakyReluPlugin/lReluPlugin.h" #include "modulatedDeformConvPlugin/modulatedDeformConvPlugin.h" +#include "modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.h" #include "multilevelCropAndResizePlugin/multilevelCropAndResizePlugin.h" #include "multilevelProposeROI/multilevelProposeROIPlugin.h" #include "multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.h" +#include "multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.h" #include "nmsPlugin/nmsPlugin.h" #include "normalizePlugin/normalizePlugin.h" #include "nvFasterRCNN/nvFasterRCNNPlugin.h" @@ -59,7 +60,6 @@ #include "specialSlicePlugin/specialSlicePlugin.h" #include "splitPlugin/split.h" #include "voxelGeneratorPlugin/voxelGenerator.h" -#endif #include #include #include @@ -180,7 +180,6 @@ extern "C" bool initLibNvInferPlugins(void* logger, char const* libNamespace) { initializePlugin(logger, libNamespace); -#if !TRT_WINML initializePlugin(logger, libNamespace); initializePlugin(logger, libNamespace); initializePlugin(logger, libNamespace); @@ -203,9 +202,11 @@ extern "C" initializePlugin(logger, libNamespace); initializePlugin(logger, libNamespace); initializePlugin(logger, libNamespace); + initializePlugin(logger, libNamespace); initializePlugin(logger, libNamespace); initializePlugin(logger, libNamespace); initializePlugin(logger, libNamespace); + initializePlugin(logger, libNamespace); initializePlugin(logger, libNamespace); initializePlugin(logger, libNamespace); initializePlugin(logger, libNamespace); @@ -227,7 +228,6 @@ extern "C" initializePlugin(logger, libNamespace); initializePlugin(logger, libNamespace); initializePlugin(logger, libNamespace); -#endif return true; } } // extern "C" diff --git a/plugin/bertQKVToContextPlugin/CMakeLists.txt b/plugin/bertQKVToContextPlugin/CMakeLists.txt index 7b13a5eff..52bd30f03 100644 --- a/plugin/bertQKVToContextPlugin/CMakeLists.txt +++ b/plugin/bertQKVToContextPlugin/CMakeLists.txt @@ -15,6 +15,8 @@ # limitations under the License. # +include(ShouldCompileKernel) + add_plugin_source( mhaRunner.cu mhaRunner.h @@ -41,33 +43,6 @@ set(BERT_QKV_SUPPORTED_SMS 120 ) -# Certain cubins are binary compatible between different SM versions, so they are reused. -# This function checks if a SM-named file should be compiled based on current SM enablement. -# Specifically, the SM80 files are compiled if either 80, 86, or 89 are enabled. -function(should_compile_kernel SM OUT_VAR) - # If the target SM is any of 80/86/89, we need to check if any of those are enabled in CMAKE_CUDA_ARCHITECTURES. - if((${SM} EQUAL 80) OR (${SM} EQUAL 86) OR (${SM} EQUAL 89)) - list(FIND CMAKE_CUDA_ARCHITECTURES 80 SM80_INDEX) - list(FIND CMAKE_CUDA_ARCHITECTURES 86 SM86_INDEX) - list(FIND CMAKE_CUDA_ARCHITECTURES 89 SM89_INDEX) - if((NOT ${SM80_INDEX} EQUAL -1) OR - (NOT ${SM86_INDEX} EQUAL -1) OR - (NOT ${SM89_INDEX} EQUAL -1) - ) - set(${OUT_VAR} TRUE PARENT_SCOPE) - else() - set(${OUT_VAR} FALSE PARENT_SCOPE) - endif() - else() - list(FIND CMAKE_CUDA_ARCHITECTURES ${SM} SM_INDEX) - if (NOT ${SM_INDEX} EQUAL -1) - set(${OUT_VAR} TRUE PARENT_SCOPE) - else() - set(${OUT_VAR} FALSE PARENT_SCOPE) - endif() - endif() -endfunction() - add_subdirectory(fused_multihead_attention) add_subdirectory(fused_multihead_attention_v2) diff --git a/plugin/common/bertCommon.h b/plugin/common/bertCommon.h index aba475859..11f66d559 100644 --- a/plugin/common/bertCommon.h +++ b/plugin/common/bertCommon.h @@ -185,8 +185,10 @@ inline bool doesHwSupportBertMHAPlugin() noexcept static constexpr int32_t kSM_TURING_HEX{0x75}; static constexpr int32_t kSM_BLACKWELL_100_HEX{0xA0}; static constexpr int32_t kSM_BLACKWELL_120_HEX{0xC0}; + static constexpr int32_t kSM_ORIN_HEX{0x87}; + bool isAuto = smVersion == kSM_ORIN_HEX; bool isSm100OrLower = smVersion >= kSM_TURING_HEX && smVersion <= kSM_BLACKWELL_100_HEX; - bool isHardwareSupported = isSm100OrLower || smVersion == kSM_BLACKWELL_120_HEX; + bool isHardwareSupported = (isSm100OrLower || smVersion == kSM_BLACKWELL_120_HEX) && !isAuto; return isHardwareSupported; } diff --git a/plugin/common/cublasLtWrapper.cpp b/plugin/common/cublasLtWrapper.cpp index 511767b3d..0cd5c4f91 100644 --- a/plugin/common/cublasLtWrapper.cpp +++ b/plugin/common/cublasLtWrapper.cpp @@ -29,13 +29,13 @@ #define dllGetSym(handle, name) GetProcAddress(static_cast(handle), name) auto const kCUBLASLT_PLUGIN_LIBNAME = std::string{"cublasLt64_"} + std::to_string(nvinfer1::getCudaLibVersionMaj()) + ".dll"; -#else +#else // defined(_WIN32) #include #define dllOpen(name) dlopen(name, RTLD_LAZY) #define dllClose(handle) dlclose(handle) #define dllGetSym(handle, name) dlsym(handle, name) auto const kCUBLASLT_PLUGIN_LIBNAME = std::string{"libcublasLt.so."} + std::to_string(nvinfer1::getCudaLibVersionMaj()); -#endif +#endif // defined(_WIN32) namespace nvinfer1::pluginInternal { diff --git a/plugin/common/cublasWrapper.cpp b/plugin/common/cublasWrapper.cpp index 173fb8946..f2cdb9155 100644 --- a/plugin/common/cublasWrapper.cpp +++ b/plugin/common/cublasWrapper.cpp @@ -19,9 +19,6 @@ #include "common/checkMacrosPlugin.h" #include "cudaDriverWrapper.h" -namespace nvinfer1::pluginInternal -{ - #if defined(_WIN32) #if !defined(WIN32_LEAN_AND_MEAN) #define WIN32_LEAN_AND_MEAN @@ -33,14 +30,16 @@ namespace nvinfer1::pluginInternal #define dllGetSym(handle, name) GetProcAddress(static_cast(handle), name) auto const kCUBLAS_PLUGIN_LIBNAME = std::string{"cublas64_"} + std::to_string(nvinfer1::getCudaLibVersionMaj()) + ".dll"; -#else +#else // defined(_WIN32) #include #define dllOpen(name) dlopen(name, RTLD_LAZY) #define dllClose(handle) dlclose(handle) #define dllGetSym(handle, name) dlsym(handle, name) auto const kCUBLAS_PLUGIN_LIBNAME = std::string{"libcublas.so."} + std::to_string(nvinfer1::getCudaLibVersionMaj()); -#endif +#endif // defined(_WIN32) +namespace nvinfer1::pluginInternal +{ using namespace nvinfer1; // If tryLoadingCublas failed, the CublasWrapper object won't be created. @@ -87,7 +86,10 @@ CublasWrapper::~CublasWrapper() mHandle = nullptr; } - dllClose(mLibrary); + if (mLibrary != nullptr) + { + dllClose(mLibrary); + } } void* CublasWrapper::tryLoadingCublas() diff --git a/plugin/common/cudaDriverWrapper.cpp b/plugin/common/cudaDriverWrapper.cpp index fa83866c6..e1267173d 100644 --- a/plugin/common/cudaDriverWrapper.cpp +++ b/plugin/common/cudaDriverWrapper.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -24,12 +24,12 @@ #define dllOpen(name) (void*) LoadLibraryA("nv" name ".dll") #define dllClose(handle) FreeLibrary(static_cast(handle)) #define dllGetSym(handle, name) GetProcAddress(static_cast(handle), name) -#else +#else // defined(_WIN32) #include #define dllOpen(name) dlopen("lib" name ".so.1", RTLD_LAZY) #define dllClose(handle) dlclose(handle) #define dllGetSym(handle, name) dlsym(handle, name) -#endif +#endif // defined(_WIN32) #include "common/cudaDriverWrapper.h" #include "common/plugin.h" diff --git a/plugin/common/cudnnWrapper.cpp b/plugin/common/cudnnWrapper.cpp index f3b3601d8..caff4a8c5 100644 --- a/plugin/common/cudnnWrapper.cpp +++ b/plugin/common/cudnnWrapper.cpp @@ -19,9 +19,6 @@ #include "common/checkMacrosPlugin.h" #include "common/plugin.h" -namespace nvinfer1::pluginInternal -{ - #define CUDNN_MAJOR 8 #if defined(_WIN32) #if !defined(WIN32_LEAN_AND_MEAN) @@ -33,14 +30,16 @@ namespace nvinfer1::pluginInternal #define dllClose(handle) FreeLibrary(static_cast(handle)) #define dllGetSym(handle, name) GetProcAddress(static_cast(handle), name) auto const kCUDNN_PLUGIN_LIBNAME = std::string("cudnn64_") + std::to_string(CUDNN_MAJOR) + ".dll"; -#else +#else // defined(_WIN32) #include #define dllOpen(name) dlopen(name, RTLD_LAZY) #define dllClose(handle) dlclose(handle) #define dllGetSym(handle, name) dlsym(handle, name) auto const kCUDNN_PLUGIN_LIBNAME = std::string("libcudnn.so.") + std::to_string(CUDNN_MAJOR); -#endif +#endif // defined(_WIN32) +namespace nvinfer1::pluginInternal +{ // If tryLoadingCudnn failed, the CudnnWrapper object won't be created. CudnnWrapper::CudnnWrapper(bool initHandle, char const* callerPluginName) : mLibrary(tryLoadingCudnn(callerPluginName)) @@ -80,7 +79,10 @@ CudnnWrapper::~CudnnWrapper() mHandle = nullptr; } - dllClose(mLibrary); + if (mLibrary != nullptr) + { + dllClose(mLibrary); + } } void* CudnnWrapper::tryLoadingCudnn(char const* callerPluginName) diff --git a/plugin/common/plugin.h b/plugin/common/plugin.h index a83c854f5..7e8ee7439 100644 --- a/plugin/common/plugin.h +++ b/plugin/common/plugin.h @@ -128,12 +128,6 @@ struct ComputeCapability int32_t minor{0}; PLUGIN_CUASSERT(cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, deviceIndex)); PLUGIN_CUASSERT(cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, deviceIndex)); - // Redirect 12.1 to 12.0 to since dependencies do not support 12.1 yet and 12.1 can reuse 12.0 cubins to save - // lib size/compile time.. - if (major == 12 && minor == 1) - { - minor = 0; - } return {major, minor}; } }; diff --git a/plugin/disentangledAttentionPlugin/CMakeLists.txt b/plugin/disentangledAttentionPlugin/CMakeLists.txt index be8c57052..8df5d3fd5 100644 --- a/plugin/disentangledAttentionPlugin/CMakeLists.txt +++ b/plugin/disentangledAttentionPlugin/CMakeLists.txt @@ -16,7 +16,10 @@ # add_plugin_source( + disentangledAttentionCommon.h disentangledAttentionPlugin.cpp disentangledAttentionPlugin.h + disentangledAttentionPluginLegacy.cpp + disentangledAttentionPluginLegacy.h disentangledKernel.cu ) diff --git a/plugin/disentangledAttentionPlugin/DisentangledAttentionPlugin_PluginConfig.yaml b/plugin/disentangledAttentionPlugin/DisentangledAttentionPlugin_PluginConfig.yaml new file mode 100644 index 000000000..3cc47c34a --- /dev/null +++ b/plugin/disentangledAttentionPlugin/DisentangledAttentionPlugin_PluginConfig.yaml @@ -0,0 +1,170 @@ +# +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +--- +name: "DisentangledAttention_TRT" +versions: + "2": # Current version (v3 implementation) + interface: "IPluginV3" # Interface type for the new version + inputs: + - data0 + - data1 + - data2 + outputs: + - output + attributes: + - span + - factor + attribute_types: + span: int32 + factor: float32 + attribute_length: + span: 1 + factor: 1 + attribute_options: + span: + min: "1" + max: "=pinf" + factor: + min: "0.0" + max: "=pinf" + attributes_required: + - span + - factor + golden_io_path: "plugin/disentangledAttentionPlugin/DisentangledAttention_PluginGoldenIO.json" + abs_tol: 1e-5 + rel_tol: 1e-5 + fp16_atol: 1e-2 + fp16_rtol: 1e-2 + configs: + config1: + input_types: + data0: float32 + data1: float32 + data2: float32 + attribute_options: + "span": + value: 4 + shape: "1" + "factor": + value: 0.1 + shape: "1" + output_types: + output: float32 + config2: + input_types: + data0: float32 + data1: float32 + data2: float32 + attribute_options: + "span": + value: 8 + shape: "1" + "factor": + value: 0.05 + shape: "1" + output_types: + output: float32 + config_fp16: + input_types: + data0: float16 + data1: float16 + data2: float16 + attribute_options: + "span": + value: 4 + shape: "1" + "factor": + value: 0.1 + shape: "1" + output_types: + output: float16 + "1": # Legacy version (v2 implementation) + interface: "IPluginV2DynamicExt" # Interface type for the new version + inputs: + - data0 + - data1 + - data2 + outputs: + - output + attributes: + - span + - factor + attribute_types: + span: int32 + factor: float32 + attribute_length: + span: 1 + factor: 1 + attribute_options: + span: + min: "1" + max: "=pinf" + factor: + min: "0.0" + max: "=pinf" + attributes_required: + - span + - factor + golden_io_path: "plugin/disentangledAttentionPlugin/DisentangledAttention_PluginGoldenIO.json" + abs_tol: 1e-5 + rel_tol: 1e-5 + fp16_atol: 1e-2 + fp16_rtol: 1e-2 + configs: + config1: + input_types: + data0: float32 + data1: float32 + data2: float32 + attribute_options: + "span": + value: 4 + shape: "1" + "factor": + value: 0.1 + shape: "1" + output_types: + output: float32 + config2: + input_types: + data0: float32 + data1: float32 + data2: float32 + attribute_options: + "span": + value: 8 + shape: "1" + "factor": + value: 0.05 + shape: "1" + output_types: + output: float32 + config_fp16: + input_types: + data0: float16 + data1: float16 + data2: float16 + attribute_options: + "span": + value: 4 + shape: "1" + "factor": + value: 0.1 + shape: "1" + output_types: + output: float16 diff --git a/plugin/disentangledAttentionPlugin/DisentangledAttention_PluginGoldenIO.json b/plugin/disentangledAttentionPlugin/DisentangledAttention_PluginGoldenIO.json new file mode 100644 index 000000000..4d522ac59 --- /dev/null +++ b/plugin/disentangledAttentionPlugin/DisentangledAttention_PluginGoldenIO.json @@ -0,0 +1,86 @@ +{ + "config1": [ + { + "inputs": { + "data0": { + "array": "k05VTVBZAQB2AHsnZGVzY3InOiAnPGY0JywgJ2ZvcnRyYW5fb3JkZXInOiBGYWxzZSwgJ3NoYXBlJzogKDEsIDY0LCA2NCksIH0gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIApRUf4+KJUNvuvOJT+k8sI/58VvvpnBb76lI8o/mnZEP/Je8L435Qo/FUXtviF07r71xHc+Xub0vxzK3L8T8g+/c6SBvwfloD5EdGi/Xsa0v2Gauz/nMWe+OkyKPSZetr+rXAu/YivjPcJTk79+W8A+dcMZv+JYlb5yCRq/dBftP3cjXbwSY4e/TpJSP5tEnL9X4FU+eNb6vwACqr/+lUk+JQw9Pyt7Lz4C2ey9QyqavjVAvb+2Rzi/2NjrvshPhz+97q8+Tavhv1bupT6BKcW+w0otv9GWHD/L94M/YGhuP/bWVr8WUZ6+XJupPlO9eT9TVvW+Yx0+vmKcjb9MHZm/sQFQP0aZrT8LepO9xHOAP1souT6RJiW/2Ai5PmLexD9TvhK9PkbIP+epJ8A0aFI/vEWyPX4Xmb4T7bs9qGj+v6vxYL5117Y+oiu9P1utBL9w+U6/JnMAv8tXaj8RUqg+XZ4Hv39lAz+Y0MY9Hvl3P8C5M79Vw6e+ZsLIvnVUu78UnZc+CamFPsmOpzucN3C+3iq1v9Ne175HeK++C2JNvxUoJb7B384+im7xP4fEMj6l3YM+GneYvUya9b+gM9m89LN2PcKlHUBG+kS+amSaPu4tDr0+l5W/BUiSP69+QD8SgUo/ns1ov8SOsz/bb7O/RDwWP20wDEDKk32/4/gQvwQWzD3I4wC/JHzGv79qjD2R+Ye/tXryPmNfa79AZMY/SoNIvz/lpL6qQlA/9oydv0LraD50UKc/A8LNv6kQPT5cD4U+iyVIP2dUnr+5BKm/9p0FP2AOmD6ZQIA+qWGxPhoWLr/q020+mA2WPrzfNr+z0e4/O5ryPqJ8mL/mEyg/vYR5v2B+ST/cTJQ/PRhSv9Gfdj8GWNM+iXJSPx3K8j8GR3u+2vRAvzi3Y7/x2FC/gOedvXmrrj5rqo0+SMJTP+MFVTxoDbo/GoGHvkAXLkC8KyA/rW5bvwETib+gBvc+btNkvr3INj80TPI+UyeVvXnHWL+E5sG/nJ3kvvQ8Wz9kO1s+XnSfv1dWMT5SSMU+e0Rivx1qHT5EbG492kySv+gvtz6Tjw8/bKGKP/zihj94V7C/TRVwv1rZAz96hwM/KtoDPyeTdkDhJRI/N1qRP3Y5dD+UwSY/+Gqhvs9LQj/g10W/lIByvpKB+L6hrac9XiMUQIwC77+/ri8/eW3Ovw6h8b68Yos/PqWDPYrzib8lHje/HvotP0/5Or9Sp10+iqk6PUjPJr9hNglAhEgiP/CbAcDh7T4+1mopvxI5Wj+k4kq/8PrqvdlGAT8iol0/UKOZv79Dq74ILPO+lkAnv2j64T/DWc8+pWShvwD5aj9o0QdA0ieEP7d6wr+H7fe+JSqiP9MpNb9NPOM+a05GP1FLbb/c0HO97XBPwCIfg7+dUIG+XLefv9vy0D/fDre/g03hvtzgBT6le7g/Vcq3v43ilD+SqCc8J0R7v9SY7D5O1ks+0KcZv2X0jj3TR8W+yHvoPWWBKT+ZAss/vXCev56DCEAD3vm/jW0bvvWbFj8r3o8+PGkfvwAeVb6eavy+nOAWv4Z/WT+8yrY+hmIxvy1MZj9dVp0+uxdQP1svIT8EOVS/BmgPv6JOPz86QRw/0jmrvFdJ8D2GiqM/OXEXv5MODD+XC0++0udevrikjD98TlM/K0JQP+4Zpz9AEKw8eJQuP0nbnr4i+aU+OUQFvtKlxj02XBg/6XZRv6zpBUAtxYC/iGqbv/o8lD9oqko/UcYfP0DbID+wpki8drJlv2w/mz14Wi2/cqF5PzaWFr7JU1O/r4ykvsBr0z5BUBC/CX1SvySJeT6E2Ho+B8cBv+8r8b6Anm0+1Fq5v8YntL/26ze/5JFavkcvnz552Lw/lY9bP+3GI77hx5u84lKAv9yol7wRy5O+XjulPmjFU7/l8wQ/yjDEP6S93r0prc0+R60wP8Vszb6AeGU+W1BOPGgKyD3440W/kMnIPKL5/j4Tv7k/xpJ1P77NCUDkcES/aFBfPwK+Oz67JQxAo+xOvwP4Vr/McRm/6O0HwOKXBr+FVkK/1AAaPqT6rj5eJvA/+k5zP/WvE7+B/mW/1Nz7Pmf9qL8+beo/5feWP8s38L7+R9u/sUutP96T6r3EcJ4/NRbMv6RwGb9X06s7uW5APftu5r4Ycx8/yaeIv+3LEb6OXfY9Q7IDP2UsNj9G9I+/2l3Ev+qKoz8QJao+0Jw/vyaMxj/T5uw9NvOWPyFHij1L4wNAAq/gP3Xwfr7guHg/XDclP1Ivrz85BXe/EqEvP3R6hz9gHuG/BHWXv8iCAsCx74m+2bA3PzxNwD8Bv5c9eXbQPyqnsL9wCNq//IVjvTqkxD7u6gW9+VAEwJGEtr3b+Ka/qW8rP8Syuz72m3C/yIwDv0+Uh7/mXYC9NYR0P4tYfL8xCQE/974Hv7f5Sr+8Mtu90oKEv/a7Db8QVJm/HXz7P4RwED02ITO/jR1bPkAM5r3bRWK+BzodPwbsQT/szge/02gTv5PTjL6tUhPAyPHBv7z1rj9NjtI/TgN/vj2ZEz8uXJ8+YgxFQDtOjz/U/AK+TJ50vwigzb/FWFA+NKBBv2kMtr/NhSW/KnCKv0L01z8ls2E/rp8CvM9uvT9Gc549H31cv7v1wj8C9gk/e8SEvyToQr6FKGC/lf+wv/kZbT/EZ/Q/QwSzv8AeED+DkCa/gGj5viGnF7+ALl2/nb5GPSa5VL9ReYo+esZNvcyudL4YVmi/SacTv1JhQT8cPAA/D0F6v7xuyz3oWkA/Eq/Vv6cZCz+2oSm/wRISP/RkQ79gBue/UFPQv7n0RD1Z+oQ+S4Fnv8x6Iz+wrNS/2FSHvZQCm7+73ia/HCVCPQ1EXL935MS+NM6APzCvE7/r71U/PJqQvz+hBz9Shbg/bC8ewFQBTL8AuxM/IetPvtUGvj7Gnhq//VWxPdppH77ieZU/c0OCPkLarD6K4dK+hqf5vkR43b6i9ck+R4vXvl9dlD5e0wRABwJfP47spr5hwZk/Qe/QvqJwAsD5CIG/HHbvv5P5s74k4pY8f5XWPwZjpz7kW2C+7VNUPz6DDcDxRHE+bFdFP1BCvb+IZpI/Z0+tPp6g1L7+/SE/CFMRQCY7Oj6KLX4+WjHrvmePWb/jkFQ/Tyhbv1KRkj2Ej/S+2Dz1PsLVqj4czoQ/b5ACvwwtir5CkHq/aHrjvogtwT4CykE/BxNsv36eXj+Lha0/va3TPtg68D8NF0a/2FCfvxut479hfr8/goQnP8CsY70MWI8+BhCQvzOHHECPUgQ+YQrgPdfLOT/YRvY+20FlPolcSr9NZPE+LubwP7k2rD+K7cs/CN8Cv75Wfb9JzgC+zj9kPXgOjD+uoti/TsjDP9PMIb4okNq+o4yBv1jS079Pu1I/uyeWPXAdpb8kxaW/+eurvn+i1T8o6YS+/WbAvxKke75soou+ypkswEtkXr0aemy+lToyP5iq7D9IM5A/x6uJvqSijb/tsSRAB49yPa83ZDz6ocW8u9ZKPjfTE76D2xK/8/4Lv0soBr3jHQu/EH02vxf42T1gjIK+2ILAP32pKcB/tos/uH+fP22yBMDAdK++fy2+vlgptL//Hke/WSeOv2ZK4D+fiG8/UcKiP4C/OD/FhJC/9kYGv02P+j6vbpy/EYc2P9wXdr6D6L++eQE2P3t24z6P0Li+62SUP0hgir/1rR0/fNUXP998nr7o+qY+fSSgvwmNbD/8Vj2+LdEFv+9Fhj/eTzS/dki0v6A/x794Ixs/HOWjvxmd4D9VPgXAeyXZP/cUWD6GEca90X8Lv5JbzD7ZJhq9/ziNPy/w6T206Bk+YSu6vtA/ab0ymJ0+zObav1eRrL+ORj4/WfcuPiFmPL7CApc8O/axPrEtCr/6Pke/qYtIPqN2er+BBtE+Qu7Zv1+7gz9M+PE+VBaDPqORez9ELtU/4daBP8Sh678uyaO/HPQfv+a81TxNhQQ/Wco5v8k/Pz7HYEG/bowcv3kNtL8EWWy/AAStv9TSeb+83YY/zgtzv/N4KEAqlPw+rkU9Pla9W7+CRzM/AF0Tv0jg+T1t2CNADbvEvWMbkz9fAzS/F1APvZip4j/qgCC/UP7nPzsvNT/T/Q+/eeUhP1T5eD/wLh8/IP3Iv6klOr+GdX2+jnCYvV7kHj9A9zU+kOyqv0upwj5ZTxw/bU4PPwZXij/se1U/pxnrPgqzj71emtS/6/bbPhWsVD5iDIs+f2yjvxBgir+2zYY/lgQivdR2Lj/w++c8JsPzPF4zcD+CGwS/+NrEPVmv7L5Kdt6+z0uevgl3Yz6KHvW+nrygP/wEZb9IWz++biThvpI2uT+oRUk+exOEP9gmvr/Quog+2L5jP4KEqD2pYYg/BG0Ev39ltD8mIRNA+sW5vugY5L6BCLo/bC/KPyjaBb+6Ite+EkaQvvQWrL/GLGu/r4eAv2KORL++EQ69/dVvPs12xj8hlH+/jfx7P+QfW751mkq9+MAsP1u1j782y8M+c3IqPpIi/D7qDZQ+oyMdQO1CI79r7we/I4Yfv8AzDr/OKyO/sjGYPxXTtT9uHBK/QRVVv2Fd8T59Xg2/0gciPw3LTz7nA8K/pxTGP1Lf5T+43xy/0oDGvvNckj7sPas+XJYoPzGnAECnMTW+NV1Mv4iNsL87Hju/JrAHvRK05T8tggS/qyhlPk+JhrxFHZg/Q7khQATnB7/Ol/q+EKeFP3GQLj/oYOw/UXwVPyL1t74oNRc/AOqNPx8LUj+23AE/zIiIP3qrlT+W6rA/2hEmPwEhK74dPBY+426aP7IiUb+/wrw+tGPJvkN66zxQpKM/ea9DPkA0Pj3ED66/eQo/P3Q+JT/EcgpAHJWdvvJoYD5wXn8+/enJP04qw73p244+G58bP3YWPz7ykuS+h79GPsRsiT/aZIO/NCkIPh47M79K95g/yvfCv4EVD7/qIcE+F2PIPxOohr2OIQ6/wcnwP4VYub89uQzAk0nhPqCGAL/Ct4K/2VY1P+WmeT51ZxC/BOGjv11ZXz+WcyY/txzLvZpe7D+K+Ii/aUTDv+MgMb9nuDq9+i15PpYGd76bQLQ+cjKgv0fNuD/hPqi9jQOPP7J5rz6P2+k+RdwRPw465T57iSQ/rCGqP9g8ST5FgTU/Wce3vcNVuD8MKC2/N4XmP6p8JL2jI7e/zi0DPmdZLr9qNFc/XRInvyhy5L543PG/spTnvtcgG8BUvcq/iapCPzMqST+T1dk+wIt3v/xsQ72UGGy7S0WUv1tvwD/RmmA/b0RivrE/3DxOYlU+yasCwBEcfb6Fli6/FjWAv2Hsj76YGuY/Rw4kP8k4Er/JlBI/FB6zP8u0bD/6PnQ9pp0lv8O+Mj/rdsk+YitlP5+aIj++V4Y/LQUJv16gqD+NV0o+E9EEQJ1uML8QNN4/IKlKPlXDJr/iv/e+kASkvkcs2T6M2AU/Ad4Sv0iDx7z1GglAIiDdP9Fl3z6JqRs9/NL1PYQPHT/e6oK/3MaDviqU1b/5Zsw+oq4lPzdk975meMk/5OWcv6Nwu7+z1mU+UQeGP/GK1z/h8uq+NxKKPw+7Hb02xTC+ijdiP6L+Jj84x8m/Rv+8P9WmsD/hJCC/w6bKPobx/D4Id4U+zOAMv4LvK7/FVtG8/BuWP2EpCz8pwb2+DI5FP4ZONsDBCpM/8a7ev9yRub5YUY+/H7ilv/mVlD+Idu++9WixPssvQL2yPvQ+zFSdPRc5pL9YC38/qs38vhM+x7/pMdu+5hjAPyKoWT+IgrK+6NGyvlmtpL5w6QRAC43DPmsu3D5U4IM/JYV0PiyhhL7vD0m+rqOSvVh2GL3uRTo/NsVUPU2OOz+8TqW9fQuhPQrF/79zlGo/8GaxPpd9fz9AXDnA7qcFQJXwDr7v2I0/oxuFv8DeHD9S1oa/U68fv/v+9D8/QkO+uaZePsK8Xj8Eyv0+agcaPivcuj6Q0RlAsAFsvensTT7Ye4Y/4IGNP5zwlz/TgyM//E2Sv0kU0T9yu5K/CfOaPjkYQb/3WoO9jFOoPu+IpD7/Bdg+F47OP6Y16D4yBHq+a852P5JAmD9BIp2/Nu8YPw+AMz8+Wpi+KRewPyyoGb4dlwA+vjkxvkM/fzy+Uoy/llO4v74YzD910li/5Mt9vyXRCcD/kiO/Alupv44t0j+wQYE/nyowv+knEED8VHs/TVCmvkP2H8DOnhJAg92xv22c0r+X44I/5yQcQNovsT9ZXBA/0kEYP3F5Wj8lSUI/U/iPPmNn1T3TMIC90wNBv6W0j77Psti/SWbJvU8Ufb9qQo2/LDY4PiItsj/MFms/KgbJv0VYfb9h1nA/S4R7vzwGZr430Aw/bOV3vx3P1z1Zwaq/O/MZv3W6oz4458u/6IXhPnPfoLz7bw0/v0llPiecrj/aOgA+C9vbvh13+j2UFQs/fyFIPXlDJj26tTO/4LMpv5KIs78h8t8/6TafvzxiMb+K6Te/xBllP6UDl74Dtp8/4mksv1PYjj5O2VW/IEoJQDoDmL/SoJ4+ND8iP3rd0z4MvD2+uO8EvqhzMz2xhxa+yMB2PzV5DUDItw6/tFWvvzjNtL31GSVAnr1Nv5TO0T/mvtY/9rcNv+CoET9Nb9A/CB3Cvl93UL4K6RS/jOOBvw43Jr8Tqpy/G5sLPfccRb+TZW8+lyfHvyJpqT4nYlU/uzL/v1iEvz5CJJ0/hNWav9oW1j+pidY+qHs0vx5uZL2D7g4/t6ibPerrCT9EsWu/7WwtPpn0tL99yuO9fmZnv7FLPL9NOJ4/DbCLP3rwGz/o0Iu/SgCivslGmz89HhE+5W8UQPVgyT6GqEQ+g0SevvK+CD4OIRy+nEY1P3HqdD+bNkm/12Wqv8cI67+4CwI/HjuNv/bICcDG88Y+To0fQH/uxrtVp1Y/J5anPamGyr2ZSGs/3Z6UvqXniD6TtaQ++gcrv332fT+nKDO+hHhBv7ZYCT//AWa/LNzmPNhnFbyg/oo/pgvzPlYFzbwiWVE/UvKxP6jMDj8Jnik8QOqnv6VViL9qRpy+/ggcv2h1P77ACWg98pkHP6phkL2+Fvk+KQuEPRfc/L9JeHC/sIsTvkfXmr/ulBk/pe/DP2MAnD/FkFq+HdC+P0g8GD6Ilqy+9Qcdv0/dmr4dv8a+l4EuPoFtJD69n0c7XLbfPhlnmD/7FXM/IxG+v3JzI8CXL28/4vWuv+coZr5DxpW/TKfmv02dCj/+V0I/MJYTv6PTJcCt1gu/iZrIPvlMvb+0wjs+KdZ6vHNMFD+N5vQ9DBt5v0EpmT+UVSK+xa3fvKbqbr/k9eK+boJiv8YYMb5DGds/dZqvvy6Jzr9PT7w/8lhWvlpIK7+ZG4U/nwkbv7C66T+NjC0/hs/5vlYRCkAiEBu/9vk9P+E8mT51n6Y/md/HP8gWAz3+30C/eIHrPsF+Lb9W2wBA688LPm0Lu77WHD0+om6sv7O7eL8qp5k/OSoovy4Bhr8TYgk/J8WXP1MNOD/7/H4/U71Bv+T9tb+zK8A/Szalvi9tgL5EAqo/F2UOPx9q6T5mjwpAnL0kv+6GbT+Yhmk97YSJPtqkwz+GAQI/xs0JP+xHiT8W27q+ctZWv0+8hb+Tsfu/5pgDQO81jb9PkGK+ebqNvmlknT4n1FA//kdcP5BEFb/0ISu+Uq6QPuOofr6Avc0/EmH7PvMgPD+Wsik/ZDSWP7ddOT6X/qW/5qPMPlO/Jr9sUwe/9BsWPw+Anj+7Qa48XB+ePi7i2T8AiHY++oEmQD3FED+tYOG//9pAPzEnwz6eFqU/nVUsP2jHDb7OtZy/MQpWvrW7Wb8vnRS/E60WP26/1T9jEso+shKZv/ai4z44K5k/uxocv8w7Cb7BpnA8Ge9Iv7X1JT+qs/e99czWPrAyY7+J+t++/O04P/3jvr4nDd0/HZ3MvsETZj5Gvm4/Ao21vy9i4b+1SMO/W5yhP5NGDb+JuSNAiHIQvwT7PD7cY8U/02MAQK3vA0C/q5o/exSDP9mvFz+sQkc/ghoNv3t1Ub8AJl274UQuvoIN6L5zRjI/4o50P6sOtT21H70/3iKSv6pORr6rgTe/rOruv35Uqb3CVvm9ubjBP+B8IT+OGIO/6FLtP9VKnD9bBBU/c+tnvs+ddb/jkb6+HVyLPyA68T8BicU/eUr6vqBPj798RBA+OVzivz12pT4iJRe+VpzuvjsfzL9MewM/FV8Iv9a/lb8l0zfA8GbhvCTZ4j8kpNQ/hQjqvpIqGr8uA/A+MJZ/v3iEmj7WHUQ/JgydP5Udzb3aj1C+eMNgv26uU78V6me+VRe8Pq/gaT8jnU2/axC/P7bQir53Cq+8REk/v8EmG8DNUGI/zaE8PyoKkL5uMok9mBQEP4EByL//bwe/7lRLP46MoL86TZY+eaStv+nP7j7O/BG9pLzOvy4WlT8yDjy/tGxPvwNiTT6MBpM/cwaCvwOkfD3Wjds+Xm8xPxitND4T67u+9NxTvzRssD3aO4m/aPc6wMSE3z5GaGc/SToXwNs+gb/lgB4/Aa4DQJFXqjxnXjq/NEk7vvP7rz/pXSW/2ZdMvykq975YDXS/kzr7PXf1zz+maqU+ejSBvklolb6jFsi/exNiPxxpn726zzi+4FtMQCT2mD5hdUC/i0vavkUAkz9R+uc9fhm4v5ZSaz9+Cyu/OcjvPwQ/ij9gB+W+WPijP9T3ij1gT1o/5i74PtSqWL+vvyS/wtWDP6xnq74Dq86+6oJ0v/rh2D5pAARA66SIv+Jnxjylw7Q/BxujvUed5z6D/Ie/EUvbPr+iP77NWHw/RPyXP2m7JUDXYhQ/xs6mPq4MRz420rS+wU2tPts+l74JgSw+CaeoP2PWgL+K55E/OpeoP+nN8b14zAfAOZobv+sDpj/NVbu8RdJ/v+44Ab/gMlc/u/YLP56qdL5p0Lu+hpTIvhMjbL+hxM4/MAelvtrLmz98usI/TpF/P1b93L62tc4+3TXGvAFZZ79rEqY+x+qWv+EFmD9S4u2+zPxNPhwLkT4xj4S+kTEWP5Im875XDV8/EEmsv6ZpAT7TLvg/2wqAv7KALb93jwM/SeQ3PsqFsz68dvo+G30iP6UKjj/A09E+Pwx3vsotLD9VL/M/JNEHvsF6eb/StI0/Zor2vQUNC8Ch8Fg/RQsJv4Npub1T+ak+WRJDPqKeNT8S+N6+51oDP03jhL6vIj0/uYgdP+l4b792AYs/5jQJv+DcTj8VDbw+mknrP0TUZL6n2bK+3hWfvGE6m77+yEw/ROPOvxHfhr/ErYi/XEdzP2H12j956dW9mt8svoJ3jz1uuJQ/BGdtvwAXdD6Npnk/tUcAP68hQj5HIoA/wgEtwD2JLT+BcSe/LlLqvyzeAj8L1K8/Tr8Mvpbvcz8iX84/Hk+oP1zq0T8R/D0/9HyaPTcNzb/O93u+At9Xv7rwCkBpGzS+zVL8PSUuDT97mDI9b/fYP/NlH78tR0c+jRI+v3/2qL/mnBy/5rMXvYDN276AQjG/NgK0v0Ezqr2tmsC/B5NCPyzWqD0Mkbq/p1CevlKNQL/XaqM+4ZOrP6cF8L/Lkus92vkjvvHcKz84UFo+EIFAvwpbo75ayEu/mrqJP8SVrjw3WvM/eXd4vSVaNb9lwcG/SM3mv/fEyr/TxIg+zjsCP3Vkyr87IWU/x1P3vuhQFj4/Xc4/Q5dlP9l8ib4sJWS/WLcJwG8aOL+GMli+zLd8vzpoBr6FZJ09pkBmvpJmJr/Psyw+C0biPjKSi79vmbQ/k+jJvYZqmjyKTTU/NtBuPsMAdD/wAZM+ssgcv/0WuT6fZZK/jVTePSwcCL2WHFW+Vp8Dvm7g8L88eQy/eCW+PWWxIz7cioO/twKiP6O9Xb9ZLng/OLnaPidvJb9jPeM/F8mYv7BNaz8VE4A/xK0rv048sj8ZBoC+p8+TPudIhT47iAm+IpFPPxQiSz/lz9+/n/SmP4rM1L95KoQ/4DeQP8qki79AVtK+u4eNv0IUXL6rtp2+1ZdHPzK4pz/EpbI/PuoPv0I+Vr7nete/gE1Ov4QAdz9ry84/Jf+dv72rF7/jUNi8SXGPPi1CT7+HHtk+EpvyvjjJbLxJ2Qs/53HSO/lt374be+C9pPe0vRZyvb7rgIS+eKDMP2uYDz8ySZe+m2syP2Pqqr7zKJY/vkG9PjrB270nO+U+eBLJv+s5kL+J0pi/rV8SPg3D3T+fzQ5AUVcjP1g3AD8Piea/rOwKvzKsSb/e7x6/5S0svtu18b6yWf2/DHc/P6VPib8h/XQ+xb0EQMpca7878CHAJW+Svj/sjD8hq/o/4mGdv1xP/j4sPe6+c/vYvew8KUBVYcC/KuqBPnt17z4M+4o/ZKvIPc24nT4phsi+AsuJPua2r75WHx8/kT29vk4TwT6RuO+8aiKQPzqCUr268uK/pYahPxLeZ787XSe/Qn0Yv5btrz/jrgjA39BIQN8shz++mGQ+iNhgvSU0kj5IaAU/2SwlPxg8Dj8Udrc9FRNKvmLnGr4Jlke+YR+RP1XzFz9ULzzAHeknP+1oRz4bQ5m85OvGvu3ijz8YkXI/UNtFvyhp0D5/vni/VJewv4pwIL/SxVw/AQB0P5FZAz/hnzk/QyQEPyM4JL/0JN0+o+dMPz0ZQT9QLpg/Z1M1PwXxsz6v+og/HUPZvIrCYb8H+ya+8bE+v3zYLL+0/BO+CNxKvyKtnb73YfK/qmlaPgsBnjq4LFG/U8QoP5kEcD+GxM2/7kFDv4PmRL99nXC/d1hUP1x6Rr59boe+SD8AwMKqIj8FoJ6/63t1PVoEjj4QKq4/bYenv7A/QcAOQzw+J3fmP8uVnj/1sFY+vbf7vpafTj9TOnm/KuXzPn5mAT/1tIc/RZ4wQL/qyD5xSwK/OIHRvBRx4r+42DG/eI3RvqgqBr/qAhw+HYpSv+99jz+RH1k5YGAYvNHhp75O6h4+pDlTPz/8Xb9Seii/+oGbvoBFrL/iulG/OdPzvvjXXz9wboY+bzxGPnPUWT8xqwy+AuvHPu1l072T3Yc+rC8Vv5UVHMB6gAm+nBy2P3QcbT9KJHc/iDmePxyStT0hDUo+bx4ev03Uob4uox0/4xiaPwHLDr41f+a+uGYKOq/oGT880Li/ofQSwPzvDL9OQJy/ehUCv7lTF74tEOi+deq5PxdLpz7G15k++EgfP0nFkb+dAYU/TiqbvZ6kKz+eMom/keHGvzdhUT/AuMA+2uxmvz2iXr8/DpA/pj6YvxtD0j8Vj2a/p20jP65AqL5+aho/DksLvy+zJr7emic9rkeAv6qmPT/3YQO/GRZqvq6Nfr9G/SPAyJxDvktoGkDU20g/8sedvKCZhr5kCrg8ABAMP+Akl78Yoo4/OyM3PwXbNz/kf+A+HbOgPKJALD8igRc/00S1vpHXEr8JmtA9S0bGPxCbnr/e17u/EbcoPp9vUD16gDE+3s55PiKPZL7Ys74/bOrMv2IYVL87d9O9A1TSvy0TNL4Oq9Q/JBqrPNw8bD4UTaG/3Mkdv70ZwL6Oq6K+5wykP9fEDj9BRI6/wGt8PuwW/z5n8JE/KE/KP5vugb9clE+/UPigv+eib76Gxu4+9MF8PzaMm70Ow6O+cmYbPu3LVb8BuwVA1sfNv6IsPT7EggFAO9HeO5iZQr4WA7e+mbg4voC5rz8ljw3AjkfEPzpEtr+nhoi+7cXbvm2rFj9Xj8y/5aHsPkqOAUB8fK6/VUJCPqp3Kb/jDdo+zducPHw4JL9lyvk+4fTmP0l8Q74OQjg/+ImlvwTZdL883/E+hPe9P/QStj4kSaC++sA5ulwNoL+EwRo/lOBhP2J4575dqfC+MSGIPruZ376JcIe912EGQE70fL5ceLe+TcUlv12DPj/Ukjm+UD0mv34gqT+NtbU/XLUZv8jq7r819oA/5kMvv3BmSj9dLPy/QoFkP68Hm79bEzs/ltlpPGA1dL8QZ9C+kbIvPzHf2D0XnBU/Afz8PxI5yL/SAM8/kbjVPbYWZr+4R6q/x6hBvkPxaz89nAK+hm3BPyDAub+5EEa8bk6gv/Utuj4NC2M/F27XvnGrJsAQuUs+MpzfPrn/zj4cLp4/SRiJv6UiLj8WpJg/wqjjv2epoz7uEQG/XPWmvacCsj4Gdvm+NPssv2jjCz3gKou/UfyKv2jrLT+vC5O/dZMqP8TY7D4859y/4XQtv5TYmD+qLXu/Zcbtvj+T7D5IjEg/tMmAvm72GL83ELY/RJTeP3GWej9Hu649gepOvwGYVL98wwU/SzjWPpVnsz/mgyY/72TAvzimhj/ngH+/4pfEvj4agD4Icv8/6AhHQDRSGz/llzu+XdUIP2M9Yz+MLqS+fMnlP6uXaz4v2P4+A3oqPzPa1z7OxFY/TgQev93sDr/b0Yy/TgbhPmJoRz81Yeo+xVXWP89et7unMis/27yLv+8xxr7DDjI/wl5ZP9aClr6hopK9sEnCv4jMtr4n8GM/o0ATP6ErAD9+30s9EnLnO84KKb9r5jI/xInXPuXp+z44qga/X9AJwIBvjD8vKvW+395cv9WHMT/htci+/6uHPxr0HT9m/i4/o9euv/sgmz+hwoU+5RG9vm3UEj6tW+O/8TrRPnLCg79NJK2/qtzCv5Nsjj9oFyG/MVHEP0kqCb+1itq/RuqOvxMvnj/Gox++jlwMv9HbIz7VdAA/7QaPP2xouT+ZM7i+7rupv7Wx0759Q4U+6Lh2v9gHdb/3BLA+sEdHvSRWBj3ALEK/Ku5rvouabL8L5GM/DYOEP+JP7L9s9G2/RY6/v/hnJr+H4aq9+Y25vwH/a7+tgYC/4z1UPmcEjj3Lwzi/iBA1PjrzC79hFou+rjPWPzqUqz+uWKa/WGlUP6+3Tz9L+pK/aZtRP/TaxD+Mr4+/d+Fqv7tCgj9vAYs+jS0NP7Zhrj5MCcg+18mpv4IOhj8gtZU/aOVqvuMUMr1V+8O/M6YDP1tyEj8CvH690OqPPywMq74EihA/XISCv3HwwLzsiTK+H7dmPqIyvb7BoAa+1HdTP5Cf375SpM2/XvLfP33TsD/daKW/sZAwP/TCAL/M54Y+jKSWPpkIcL7epEi/TMcwv46Lar9N8lS/xZSJvQw8N7/3mi4/FV6+P19+FL90JnU+utb/Pkiq8T5vjps9byA+P1bp9j65a56/A4FeP34lYz+4ZkO/tGQbPabuLj9dVla+WFeJP7BJF0BhNkm/EsawvxyOmz5SuDg/PGhsvnIEuj9vV6u/HWMxP7IZG7+TFNw/vgr/P5tDRL/Hrwy/9A1cP/01xr4dATq9YPvPPNq39b+3t2K8AJIwv5h6/L7vyLg/ONWgvzcuUD871I6+ujyPvtBVSj80G64+shMSP/Padz9Boam+jLscvzbmir8bTlO/9708QKlRnz8B8Ky/TUapvya39j6MGQw/p4MMP5GJgr4VdwC+09+nPqzorz0ECQ7AmFBrvsb1Wb+fajM+fA4/QI8mvD73hqC+L/trP94i9z6gFtc+j1obP2meA0D0wJC/53/yPlAnbb8UNg4/Ci9rvxqx1b4GFpe+IM95P9Ufaz+rc5+/lwJhPQanNb+wFqG/uDlevnXxnb5TTxtA6azdPsVVsL8HlRC/sLyCPwypBUCAAcu/XaHwv8l77z+Ce8c+b0hev3TdCD8XsCjAmjddOy3Ypz74nGw/WMeBv8B8rz2h6Gy/qcGCPms1Zb+Y8tC+wu1+v92wJj90w1s/YEBwvnKfHD1ya7m/OG+ZvlujTb0TuydA71SOvylquz8jMn8/k6+PPnoa4T+chL4+4D/HPguVWb3onZw/G1mMP2MsIL8Y3ai/KNpPvlTPTL8sGYK9X2SeP3Ej6r6SZy+9P6ltPcQyWT8JzQ/AtlAbv8FaWD4wnJk/o9r7vuEy8L9ppR4/HKciv/9GmL8Z1R+/N/Q8vozQGr//Jw3AIaZlPxsvoz8lyiY/r5+Rv8xTTr7CsNW7l0oZP10YKz/Q8ju/Se2nPagg6j5jWLo/ilo0P57/ST9Mras97Ym0P27M0T5CcFy/oaGzP8qpMj+q1uK+GoD1vsDYlz6PJu0+/AZKPtpjnz40mNk/UyiJP/o1Qz67+XA/vimEvxi9yz5al+c/bEdfvor+WL9M7ya/GHmLvyjmSL89s72+n+mzvwrkfzyj+2Y/tmJov+V3wj9FwwI/EeqDP9hnKL8qwlo/fcSLv0PnYz+n9i8+36ENP+J5lr9MI2W/8+MYP+/Gcr9gKe0+X/6uv/IhWT9Tw52/h2MNP1kpID+jXzK/sf8UP7BYhT6K8Qm/UhiBv1I3+79VBbM+dUvIv9JDwz2v4oa+TdwtP7Wlmr5hrai+4Vs7P7qMqz4y36E+10PwPqGNxL/nvEE/wcocP6wigr878Hm+rgAhvYa5Cb6y76o+Bze3P1V3ij/M9qe/+z8fP30aqj/EC8Y+P6WLPwrJAEDqCIM/wUp/PnbFhT96XRQ+UdDDPJHss77uHcg/i4RRv6AkxD8x7v8+dxyzv4c8vD7WZgbASyEgP4ieYj+kpBe/kwb9PdUh+j+eeAG/SYqHvyWnvT8PNvs/FjNyO593gT98tas/chM+vwJ6+L5MjZ0/jK7XP/YYED+DO2G/BVj+P7XtB78ul6u+6EavPrvuxj8rnlo/QGnUPic07T5wPDI9O+IOP1DkIcDv3Je+GR13Pm5Uk78rzMU+52BRvm2z4D+2WMk/EX7uvjN1Gr7ScZe96T/nvqG0Rz7GHkK/AbiQvwFqHz/aKCE/o+dNvwM2ZT8UryG/4J6BPhECUj9UrQm9zdPoPrw0BL+JLkm+0cxSvsIfP79JBDW+IDnGv4F4+74z25G+/imDvhtLd751/Hy9XHn1PlrgXz8EVya/fAKav7Zhhb+pcvm+CC+0vnYeRb8n56W/q4jpvubHOT7aUxg/R7MOv3N0074aPG2/XMYAvZ4yWb+BuBI/Rpfkv2EhuL6+Kpo+Hj88PqpaLED9GLM+3ISAv8KCw70KXOO/exGlvSlDVb8EV2o/rK4Mv/gi773rsyK/rpLeP+95pL47keo/QGxQP3PR9j6Xyrw+zZ/JPvq99r/OyY6+RlxYP2pISL1D3bO/DaZWvSEg2j8mmp8/6/d9vS6Scz/Ppru+dxjav86pbL+9DMg/0kiMvqY5eb5khJm+v7rzP2wf0D9/AA5Av9wivhhclz5YIcK/MAy9P815lb9q+l0+aXSMv/q/Fr/VVla/t50bv/IDCr9YWwy/X1VVPyVsjb+d1WE+FuabP9ZgA7+IyTe/pg5svjwBlz8nxEY+qf0Hv66+9z7pF42/lG0uP/o00T4SmZ2+j61Wv4X9Yr/K6Qg/PE+dP5omI7+pseo+2pEFwIKpFb9Ob/68A+Fov+K+b7+a8yq/SZqVPi7TP74uPw/AjbkHwIRbG7/lVeo+H9cvwKLc/74uuAa/DbWxP5Qhxb4iF8Q+tqUQPjRbCMA6qUQ/55BcPuUdAj97R3tAG2IFwNzC3D9SLJO+xhyTPgxrOr2BNdm+kuARv161qD6/MsK/9SVAP28X1b4dppC/FmjmvkTqoD+4Cwm/PIe3PvMfO78rB0I/b30vPzef7D8JxzO+9iwrP6LXyD0g36U/ZgU4v6u4Pj+6Xke+F3WDvbrouz2nJYE+fgnuvcbdWz7G/8k/bkZ8P+qAXj96POm+SaVjv5SOdD+aRGA/eYC8P3meGr/NDmu+ntrRv/nxyL4/H38/e9juvnz2Iz8/E0O+zuqlPo3nkr9Lgq49xm4/wLVhP75Zs9C/B16aPysERz+A7+4+VbDCPzHscr+Zqd8/G6RuP407cr4AXJE/B5uNv1wTU7+JzBu/FWsHv0g9h7/8jZw/kIiEvoh7tD6MAhK/hwnpv/VEij7FxPS/ApCMvfIkr78PX/4/EU9pP3KV2D0kwaE/KqhYv3chCz8bm0w+pC2HPnvcoj+ahDs/l9OTPlrT07+YxXW/0k77vQQ6vz2EqpC/6VgaQDMZwj9tJBo/EIiTPU9NWb7usHO/G66ePST4gz4D8p6/LBmrPjr8Hr4KM/S/MUJcvxvE077An/E/RHoOPw/xqr/A2fg+Dw7Gv5+Vij9AN/G+VcS/vbSzqT/HwaS/xdSyv8RmFb+Z6YQ/73nCvwpCNcBL/ua+4z4NPy2imT98I+2+gqbSvgWzkz+yU++/MuzGvnv+Qj7//+U+sYICvwUSDT3BQB/A+IIov0sy6D6fd3u/cJlxPXnM5D6cYq++Do4uPqR1dr/okVO+IDccP4W9ID7qJha//JhlPqvwNj89MgPAilyUP5UprL7R2dk+Yz+ZPwGTr7/tnTW/qLyTvhKkSL9nEt4/alVbv1A4Dr9cS1E+CeCZv6qWyr48iaI+rmyqvpxBv71Pgge/KtrBv96npD6moeA/BL+WPKOrZj5HVjE/bHmivwDs2T9IL08+sODQPw2oO79Btug/l3BGPwmUDT8bpG8+SX5+vgWrmT9/ug8+8cj7v/f/jr9ZTj6+MLiePlXJZ708B5w/tb35v7cIEz6Ls+i/gXxCP5Jzwb0e19Y+byddv4LFoz88V4U/KWMVPyugBL6HcRQ/6fY0v7IFWz8xItM/xAmJPzrHOr+xC7k+qYWlvyyIEj9+teY+JFbvv1W1lL+V95C+SgqaviTQmr84KMc+JsGAPn3uRr6Ne0G/CTyGP2Hk0z9flfe++bYcv5J/+z6gT7e+JL4OvulZPT/IZfS/Ib6oP4fZlD07r9K+ZcC2va3jGb0BmN2/I1q/PxFFKT0t0eI+xJhzP3S1gr/pavI+QQiJvvjFWD9+JAjA9PHKvXRSGr+SUd0+oqnwPvgmNb8HUTa/hqTivViKZb8+jFc/ugi9vhgMOsCt6L++Ae+Ev6XN0L//Y56/jRHgPecQqj+3WaA+zkMbv0Zs6T7jDeu+VNExvyjCk7/tO+C/FKTHvsbYIT6h4sW9nvnUvmoccr8Gths/x5eov8SpRj+mSIC/mZ9Av56/u7/kWwC/FK55PzQABD/feXo/IqsFPw5Fjb81Xam+IbtHv0dQqj9fMJm/X8lkP6CYZD//MOo/ccLRvr9aNj+VBhJAxB0evzN8xL8rpPC/THQ2PxEL8b+aoL6+hOffPqiePT7p09k+9J9jPuCxoz8Cy3O/jk8tvwLNRb9zh1Q/hoBmPwEH5z64fJc/C8qWv5pn1T98+cI/uVE8P/0R5D87DNS/PEIGv9k8PL/esjg/A2uGvzTrQT+6ba8/0fgxP9BHkT5E5Xy/OstYv4Pznz9rg0c/7tIfvT341b7WpgHAsRGJv2Gp9D/45aO/AAdCPu3zgD9OoaC/viQ9PjwbcD/1yUw86pM3QKmU1b9rhIc/8PIwvo+cRT8D8+E+ILA7vwh+aj63z+2/bm4aPzqomD48fyM/bHCHP6s4vD7BpBc+so1hvxpUNr/s5pc/19m3PznTc75gbTw9z5Rnv+Uclj/CWCo/0Hn4P8jZYL9AisG+KQBtPleAJT/611y+YXpfv+yjYT9UnDg/8ZBqvyZ/rT8SyZU/BYUJPjQEoz271g0/k5hcv5QE9jynwAnAZl9gPwPfx78QcMA/gAWpvju/WL4rsyC/1XmTvm+StT9COB/AmnOjP2oRrT6vf5q/1qOJvwyU1j+9DHK//aCTv4aTkT/NVq0+5Btwv/DSXT7dS4O/t+eMPwHbhz+PMQg/oyu5Pp1u4T9tdKS5p0eXv3Ih5j7LgQVApOOBv6YRub5qONU+VctcvT94e78KmY8/jHsUQK/WSD4FcWe/V0TGv24VhD7aS40/MEnzPsKJHbuC2ha/BcyLv0OyVT/w7Gk/edrFv9h/yz9V9hI/6yWzP9jKq79T066/aYsYvnu2AD8n7+U/lsE0P3GBeL4RYIO/qXCdP+/2dr+ocdA/3ZqRvjGYyz9xzi0/jEgMvin3BL96i66+U0zbPjoEnj0GBBi/SJspvv7soD0uPQjAA5XqPgPPer9loJO/5vrXv0We5b9Of62/ZoI1v2UE+j8NoAa/IAQ2PuULzT70NQY+rZqevWP/mL/+t7k/OFLnP1xh179eE4O/TUOPvvPrdr/1hgE/DHA6v0eQCkDlY5g/Aq1ZPkl0gz8ljo0/3V4Qv53zUL94CaA9L5RcP9llDj4F28m/54VNvxZ/l72S9pq9Q3z8Pw5osb9MbgE/Qpu+P29fEUAuDc++pJz7PtHbET9dLEg+Wm/KvSs43z5SECLAG5wuP8g0AT520GO+yv8CQHrILL/6X86+OOv+P/gBVb9b1Ay/1Y0WvmA6Vz8uylQ+v3SlvzM8CL+Vexu/ESaevS4I2j4RH9Y+WlTjvzgviD+0UIE+WTixP/dt4z4n+Iw/B9/uPiBRrD84sgU/yofIvZJvEUDwl2M/6uASP/Fdo79E56S/zt6VPoIDFT4jNx2/xV0QPiFYyz/mBTI/kQySv71e5L1JEk6/moDGvm/F0772jfS+R6igv+m3Ar77gw+/Fnw7wOVjA0DOXIs/farAvmgqmjzF+JW/cg3ZP1va8j9SdCA+DQODP2F+Mz7SGau/g9rSviAYBz57uui+peZfvrEvu72go6W9AA1WPejBh79fjje/Qm2kvx9Sej/rwgbA2aiTP+ZzsT9qt5q+zJkmwLP9uL78coO9SFuBv1rlA79w3cM/qzgqPzywbL8hfsy/0G6nvmqUWr7SDf4+hAoJv6PxAj8hs/c/rcRQP3T4RL3Mizu+3J22vj4flz+dlyC/czs5PeS0UT3udAC/86yvv+pcpT5N33m9wg8APwiaCL/fQ5w/P3RgvyMk2z+Ost+/L5rePqdr8z6hqUu/KkrZPkXHoz9c4A3A4rMAP8CRH8DPNXe/hojQP6vvFL+zQZa/MLBKvpRYBECQdOq/U5GGv9ervz8xye0/wNPTvUeZnr9NIgZAsBbMP3jPLT+a+U+//dJIvW81JL5YZak+arq5P3ohYT9254m/CTGwP9tSoD6L4y8/nLe7P0NIjr/YvRK9aA0Iv56vyL/4g7E+WL0gQKuH67+tOAS9n/ojP20Q/D2ur+e9uUymv6Zfbj5Ju0G/rScMwGbomD/GWHU/dppTPY6Saj5Bg4k/TLllPrJtZz+WT5i+Fe+nPwc+pD6NtUY+XZGiv+Hrkj7/+lS/f2Yjv3W8UL9jeoi/QfwHQFJpqj+azvU/04ecv3ztkT2okaW/Dxkyv18Ka7+wqp4/nt7KvqfAiD+rpxo/0W4TQG5evb+OLqE/k76SP1sveb+/6YA/MRusPonvaL5w7VK/jxc4vx8wA0A0/zI6rDlIP2J4Sr9xpEe/en6KP+q+rL/mltS+VGALPVIX5D07ZDe+5KjKPv9eMT81ETw/o118v8d8kb797Zg/mQdlP9y5r78s9bM+fDb9v9A+QD0I6vI/GalyvzKWVb91l0M/hXrFvzz8Ib9esBg/j2Aivx6icb70gj4/49TXPmyciD5voa2+p3i9Po9n/b1/Zeo+4G3+vlzzOr7gbX2/JGQPvcGdT78TmY6/PG+EPnXYWT5qakg/frzcvk7s0D4SAQ0/ZLnjPgpeU7++8hg+mXC6Plg56zvaiAXAZFckPhyaDUARV7i+Q2M6v4snvz4J8J8+jKuYP9MBob+8Ozs/5iUXP92OEb5taK0+Q8WDPxBQXz91TpK/4/VRP3Z7hL0EP5U9TuiVPUzZGb7fiZg/UnSrvUpNj7+/fMm+v/s2PulWnT9qZBo/rrxZv1oNH0ChkIc+AUCjPwdbG747igg/aOdWP87tmz/YV4c/TUhfPwxTob7zXRK/iEuqPnvhbj/lEGS+jleIP1jvuT8ClGq/b0BWv3u+D75of50+BkoGv/oUrT9LH9k+OpMhPeLLt7+nbKi/cuCPPnJ8CMAanoE/FfMhvtaOT0DmtBNAyc05vjfH2b1u3n4/jwHaP/Cq0b/0p+S/rD0fv0ouFT83Zv4+Re6IP7CPmb+zOxTA0aZbP6wEVL/oLgHAl4sUPpANSz8X+fm9G2vqvtovI74Gvb2+z4ysv1D1Nr+acgU/DbOkPt8XLD4Nwn6/99s5vglVC8CWwjY+z+S2P12wvL9b/xW/r4CoPiZdoD+yfeW+UTcxv6wF/L9f19M9YLECQO4Bjj5ogra8+eekPj27Nby2IVC/ecJSPwcuZz6rSZA8l7F5v1nZpj/2KSo/OIwNv8w7gD+bwf6+YjFIv+d7Qr9gsuK/AW7xPvVi6r+mPZ2/b9YEwE9FsL2b7Rm+ukSnvjBzhb/BC5a/48HtPvnwDL+4+aE+qJ9iv4NoOT7R0aY/9g4WP+cn076Vr4M+7nB2vuQbATx+tqa+fUcovgAvWT7BTH6/UQiCv8O7sb+v1cy/68+bvyRKIz65jWS/PXwDPwEwDj3y9/u/e6wnP4CQTz/JbX6/WSPXPnhDW79oB8g9Fduhv4OE5b+dAMS+mmX9vaycy75Qjpe/kr/MvgDhiT7B4Te9jeE6PquMK7/a+pU/2YyWv4Gh+T41SJc/ljn2PoNiT756Pge/hZ6EPwsktr8QWni/sYzLvnBrnj/ncMI+2dF3v6fgnr5IRB2/u7Q1v4KYej9Hla+/iOzNP9wGVD9boYs/aAbtvg+KGb6Fcbe/R1AGv1TA/D5l5aU/UbeNv5gju74v7hq+OKmyv6piFz9uVzm/aRRIvuhl574XEkq/n9nmu8LIXL4oZL6+dnw6v72Yoz67qdM/mT4PPpoBfz/L8rG/N4whPpF2jL/gC72/ckc9vyiQmD89NHM/O9Z4PogvxL/M/Ui+bHiaPsW/Mz5uAey/3+tjvzQ7g71cSgk/XI0MwNQDJr5gexI+LIBZPx1fMT9lIxU/58hMv2o59L82Psm+eTWAP7hcsj+M5jU/j9LbPiFgwj7MXQ6/hy4FvhKk1T97S3G/ErHOP+DgpL7Cp6k/hBK0v9H9FT/4nDy/eie1v2PHXz5fNoE/FU5Yv8Hfv78cGby9SxGzva1eDD+hGHM/VV1zvSKB7T/LnWU+0V6xvtdHzr9Pe4a+6TqsvonWdT+nyuw+kfWWv3RjMD546Ss/iJbDP3YzYz/ZxkI/uxx2PdXQxj7H758/c5iqv7bpsr4Rx0W/XCDCPux3nT9P0xi/v/oYwJkO075q2Wk/HaIJPzo72z5IW4++R7ejv1TqAz9+k1W/PsgLQMMMEj9epRS/voMcv4hZvr3+P3W+afSOP+HjPj44TDu/9vnxP4j3ST2t0kQ/u7Qbv0vquT7PSZ8+B+Dxv0T6AEDWM6U/L13Lvi0Xjb+Z87E+kI6xPT4atj7dLEQ+ApkBP1BBuT82bxE/HVuGv3Zorj+o/9E/TLtJQKfOj78Qtng+G0EFwC+bDT/bVgy/eTP2PytNRr8mN9i/e0nxvsnc/L8OSEA/UioEwOAf6Tze+gTAE/6jvjda0j/Xprg+6w1dv3ae/7wfmJM8m/zxPjf1rr99shc/wRQtwB1AIb8H//m+tSEiP1dkuTx96bO/gih8v8ZySz4P7qG9ekEUv9kv9r4RODI/gIcEvy9ptL0+4ApAPDu2vy8J7b9VmUg/jbstvwfwor7d+Ue/hgSFvuaczD9KWE0/rrpcP+5Gtb8BWZO/d7M1wJUzT7+A6nQ/m1YEP1/UYr4Wr7a8zlowv1CsUT6ZEEQ/YdXOv2xrmT4NtG4/BojUvh5hMz9MIzRArbHVvYtfyT5P8Ty/6Jk5Prmo6z7AhEc+YIWCv4BQUz49Kbs/lrPlPstEdj9Es3K+dvk/PzL8jj7jbnO/LxWjPtXffb9xBCK/SN9BPmcwmD7FOJi/AemGPwFvEj8VaTS/lM/Ev95hFL68UuS+L7bnPW3qub4segDAadXGvhx6nL6cNyI/N70yv9De1j+jsPS/VtSnP9+zZb7BDVw/9m6jPfXckT9PVMa+mJ+MvxbTtT+KaOi9OeBiPloMnj8cF4S/", + "polygraphy_class": "ndarray" + }, + "data1": { + "array": "k05VTVBZAQB2AHsnZGVzY3InOiAnPGY0JywgJ2ZvcnRyYW5fb3JkZXInOiBGYWxzZSwgJ3NoYXBlJzogKDEsIDY0LCA4KSwgfSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAoXchg/WAMXv2dXB77/BpQ9RQfAv91hR7/176w/bd1iPy5JgL1b9Xg+9KaWvoakrT86KU2+A2htPh6xkj/RAUe/Z4bHPynmw78dmNA/Vs++vgWGeL7a6/G/2trovmEYhb+2jhu/P5sMvtUxdT/5lVQ+y4edPvs5jL8aeWG/nzzmPyM2PL+i/tM/CeV4vjPxFTu6AYk/X82iP7Lx4r3Lx8a+1wxlvcUIrT56LGQ/oZqKO1VvHj/GtdE/AzvDPveQnz6ra8K/4l0owC/Zxj41M8K9zMgDv/tfeT7naXe9GluQP+VasD1dNj+/iXt9P5xXXr4cywY/LW/0vg5bJr9lYKw9Oy9SPOSr9T7wi4m+1g3rv4qU3b6wN0s/rUGYvu5LfL4sg3w/4UM2v5N6xb98fCS+puX3vutbd79pljO+oab7Pg0vGLygLJ0+vh3oP9GNr762g44+z1sCvwirWL+CvBW/2B/1vjYNuD5CVxZAn043v+lP1b6jRoG9JqbKPiLwhj5AXaQ/AkobwHbDGMC+4/2+VXSMPyhnyL8MfUDA0DgSP0eBeT7zi4u/U0x8vyBBeL9XYOI9M1Ygv/L1/D40sjbApVpbv4kikL8WP4O+f70Jv30/TD+//u2/RVU6PSPvDj+gpu0/CKr2vnAfpj765Oa/zxUZwCJDWD8gM7a7gdmWP6rLr79OGAU/bDXvP6fj1j4XnI0+zi4MPKUSyb9qg60/K2Ecv9ElAT+CEci/ifiWv/Kwtz+b0bG+7gS6P2U3oL1Ow+u/i15kPy8DF79F3TI/BYfOPOaExb/jPCg+7I4MQLlo/D68l2u92qSJP3m5Vz4kkFY+YLqLv9t1bz5piYQ/KwhWv+FVtj9e0Lu93URjv9Fkfz+y1pK+kW9CP2Nhlb5CDHW97LP8PnbwlD6Wbf+/6oSPv9DtbT/QTge+QRMGv8Hlrb/NNIq/JmUBwERsz701Mxm/TnJ3v0uEi71cWro+9PYFQGWFwb+ZZpQ/Lx15P59yR78WHVA+6E7qviYlPj40K8E/AwkbPyk0Aj+BUdy+kvW9v7F4Vb4jcpo/PHrDvysNh79UTAE+VhJ8v9HiSr/q4di+jcyuvw1geb/aEzQ/5VenvxOHzL9RSSG/cGE7P9+lSb6S3e2+krE1PVdz378PSv0+mVHEPuGQ0b4iwoA/y1GQv1JfkD+WMx4+n1CsvgnOoryhrdu9OzKcP5/Qib92NEO/k6WEP/p0GL9WiWY/HofZPxS4zb7DbS4/MKyTvzQ2QT/NNUw+EzLyPjZEJb9intS/IdUAP8wOBMDEe6K+S/FwP9eARD5LY/k/xzaDP+NlJ79PwN88yAdCvcWEyr/8vXC/B44Pv3BRAsCDiEI9sQx/vmMh1r/99M4+cEM3P7p7aD5E7KW+wWgXP+wiLj8AZJ0+yhlTv56ejT6DBZk/r7wYv6NEBkDAjwa/N70EPQQoLL8coWQ+HwHfv2XJC7+hyou/I5YKPhqUfj+pYCU/RMkXwEWxAkDvLpc+pshGvyTmID4bazQ+mbYqv6uqSj5Ogxu/iQjPvv2vL7/GYbe/KFYVPi/WFT9pzwM/n+ZRP+wPpj6zYAi/NvRWPyLDrb9mkma/0dV2v0c/ij+XWnE/EoD5Pv80iT+tGGG/D3uyProtAL5EzVg/r/rrvTwXxb77eAo/n0xgP6wYK798Knq9/HqHvzvgH8BKRU0+OeEbP58thj49i/m/iaKzvmn5DcBeUq8/1gOLvxGxoD+DJHI/F41mvxn/kL+6Y7M+nyVYv3jj0T0uNfe+AP3jv/2/DsDqH2g+QMIPv0uXDb8iJDI/WzOWP/uKi770Yq6+YAkAPxGwrb88gmQ+ZkgNP5EjLj+D5/e/LuhCv9zVYL+Ikcw9JyT2PjWUvz8ebP2+08H+vnQokz/v2Jc+aEKiPyjUkz5mHJS/86Oqv/SXxr87fCk/8SOYP+a37D/A1Vc/5deUP/N7FL9vBZS/zZ5ZP0Da1T7MC9w9jlXGPpF4pT4cVKQ/FsKlPwZSCz+Jhlg/mpkDP3Ae7L45rkE/0H6yv+vlhj8h5pi/z1SSPgNcWL/nqg0/LfUKvfcgzL/xZdc+nvITv9WSYT+8wq06n3TKPj0jdb963ha/r9LqvttQWz9LtNY/vB9OvynWiT//2wrAM9uYv4Fydb8BH62/BrPKv5h00z6QNFu+bUHCPtHCMD/z/oc+K2MoP/Fmjr1m75e/dGaHPw3QFr89g2+/GzA2vvqtab9vudW+1iewPtlZsj4x2GW/hbT5vr6DiT8oKP4+3s0EQHEOlz8yeJI/raWxv1qMQz+Fhlm9JQ0NvzcMLD5i+XE/gDYAP6njYL/KET4/8Dgjvy7iXT6zUJu+eh5gvoAoRD8RJYW/J/z4P1kZ6b/AABm/aoy9Pp2VFT83xs29hBJrvZ8z0j56nWy+w1OyP5XfVj9rcV8/DFZ3v2aWor2s1pm+jLeAv3FnuD4nowm/jMgnPwr8iL+GCdw/D3Zlv8uQYD5Lt0o/fhUSv2M7Wj/jVZ2//t65P1LSZzxrrhi/Wi6ZvlUVTz/dnhW/6NU/v33aYr6yppa/lQfpPzz/EL/W5FQ/U39WPmwKfL+5H8O7aLRIP5THQL7tt5g/BMGCPwE+az8iWOu+RziKvxQUHb9j80g+rrgLPwf7CsA87QS8JOIhPkgajr52SKU/j/PXPl60pz/BJF2+hv5vP9tJ0T+S5ac/TMuLv9Xhgb5uubC+kn/6Pg==", + "polygraphy_class": "ndarray" + }, + "data2": { + "array": "k05VTVBZAQB2AHsnZGVzY3InOiAnPGY0JywgJ2ZvcnRyYW5fb3JkZXInOiBGYWxzZSwgJ3NoYXBlJzogKDEsIDY0LCA4KSwgfSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAretC8/rMFWv+vuij19RgHAp5G4P/UqA7+W0WQ9aIqevgixkj+1mTXAahKIvIG32L0HQ+i+/vTcPwYoY79vlEU/xGa2PnZ5oT/nzCq/2qM7vwVMjD+3w9k+gsDFv+UPCr/y08U+t3dWPw/iLb8sgzrA9PEFP3gznL8UHAHA1EQ1wA1hUD9LtAM+dxJnvo3LsT+2JyM/6+LOPyhFsr/0hA29d/DOvzTjar4olX6+wGkgPzGhOj2clx8/pxQ0P1rqCUCQbqG+3eZwv1j9ez45Yk2/5LC9P/QBkb6dyRC/S1DxvjTqNz9tiCG+R0Qkv0Aq1T/yLVu/u60RPYn9Jj+Xz9C/tOT1PsUNh7/koVI/THB2v1BZMT/wi40/G4dcP5OsgD4o2my+PJATv91KvD8eEjO/lb6Ev55VKj+fz0u/XTdzvwbIuL/1P/e+f2apvQrteD7OiHM/yOIGPwJ0Q77P80q+rpkCP5Tjoj+CWAE+6zHwvncsfr98/Li/ppAqP2f2Ez8ZzhM/Tb8DvnAWtD9h3ve+hox0P9+wHr3gvBu/EbAXP9O70T4ixbY+TXWmPcc1G70W2xhAuZWXPy8LOT/19uQ9h+qAP06bBD9aq9Y8yWybvv23pb1B6ErAv6dXPokZ+r6c3Ms/dXeePPQvmL8iT0y9TOeFv3Vi8j4O95Q++jR1Pzt3Sr8TTAs9CbmuvymVMz++1AQ/KNr9Pq011DodXdQ+/FrNP+D3kb4B+Q0+MAaLv1pGXr9WUsg9sk2NPz03g74alHG+Q1PQvtDlCL+ABpc+I6a/v9T1oz8eWSq/q844P1CPor8bOSs/kAQsPyK2xD7ekn6/kIl/PmNul78vaoG+z8ODP6j0Sz/gwXg87Ix5vzxFJT9nxZ48DYAUwG5aSD/wg8a87OmZP9MxjL8eT0vAFOSOPtZHlT7uZ9i/8n56v31VMED/eJy+ISA7P6SRJr+GxwXALlguvlL9h77XvMy+0MjqvSxOhz5CuBm+9Mqhvzft+D4Ehfm/DjkDwO86PT8VzAE/ZxdrvQ6esr+nOTW/voaGvkFGzL/7MD2/H/8mP/Gxtj8nwNa+OalBP+z/9L/M0Sg/kM9Nv2hcnT7Psn2+VAOpPwTbX7/OqiA/mhuPvtUaIr4yRDy+CH5Yv1n/Zry2TYC9rwd3PgkRrT1fSUw/EGtsP9N6hr/gZ14/39mXv7l9Fj8zpx4/CKb0v+drvj253u++cT5Cv7vorD+EVye/uG5Ov/MFmz3Pgi8/VAwRvqFLlT8F7gc/0GcLwCaLB7/i1A+/zW71PgH9J8D6IiNAU/EHPylzGD6cPZc/bv4VQEHpXj9q5XI/m9MFPiHsEL52m/O+E3xTvyjgL78F36y/fmKbvTCj17+ff0M+Z88UvTV2Fj5qqgW/SOtEQEcYrj8rTZ+/+XRuPkqmG7952oG/e4erv4JL3T7vbi6/csN3P4Nofj9t3K8/BTZavx9j8z6F6CE/e8fyvtmSRb/GBM4/rvRoPsjOIj+Tc1K/gxGcv3bHEb/t/v29XDiIvx9MIbzwINi/t12Pv9fAKz8Ekho/EqTIvhpGgr/2gYO/BB2/vir/JD+CoW0/aDn+vqSlk7/NIok+MNNSvzSsBL+moqk/1r1TPwXEIT4+zqK/V3hYPicFiD8OU8C+TvGsP614pT8z9dQ/1EicPk1fFj8XBLU/0RDlPuFQAL9op+W+wEd0Ph4xpb93shK+qFuMP4E9DMDpacQ/9amPP0Vm676pGws/GEpOO1NwnT8fkXs/hDWzv9RWFj8ckFK+H6SpPXlpOr/1wt8/vyCFP6VhPb+F1Kq/4nbFviF33j6F+HU+qGagPy44Sj+tNXg/l1mnPorUsb8yZUi+BbGHP7UJU72IygA/+zKjvuDQg7/Qm/y9dzuePibbJ79KXnG+qoBwv2tDjz/owpa+5fO5PwKuwz9t+6W92RSBv1p/BcC2a7y/0tmvv7FxsD9xNe09P3rHPmEbDsD3Vpm/qRdjPw7Ukj77vBa+gpkQP9Nh0T/1WGK+wRGOPSA4RT5TGBlA2lsGwLTnLj9xHeu99BcRP5FJKL+Oj0i9AR82P+w5R0B2204/1BpZvwf32L7lJei+o9flv5UBqb6wmjs/Chqjv640hj+fvfk+t/Y7vyXtEL7Plcw/ysk7P1LpDzxBY3O+N+WcPfACA783tAbAC7MCQIQNHz712cs+rTBtvZ0wPL/9Yj6++LjMv8m7EUB9BJ6/eReEv6hp4b6q36Q/J7dtPLM5nr8pjqA/2yI7P+DWCT9cOjc/Oj0ZQJjYC0CUzC+/d9vPP58uxD59if8+VZzYv5t5Lr41FVC/qvtTPyoZiT9ixATAdoK0vCiZjz/FKD++6qM6P/bLsz4X7q4+zOmKvuu4m79Njo+/IJg1P+tmnzwn1g2+HgjZvrZBnL41AYY/HaWrvgKoq7/2Xz4/MMtRvvOYAD0gxc0/nW6ovOIPlD735pa/B/FgvuYJHD99Fse/VqpNPo1rzj0JHSk9n8GEP4Y7qz52Mb0/4lt+v88vCz7s/Kw9Vd2Iv8DPdD9zxxRAU1CLvBBJAT8GGK09fK02v6wO6b+SrTS/sSuyvwQg2j+ivuu9VxyxvpafYz+KlY+/NFFEv3lpiz20roM/bm4AP/JIfj4x/J89t+vMPhl+Tj4i1oO/kb08PaFuib+bDqY9fRf2vjqFIkBLaCO9H76zv4pmCb6y9AQ/yjDWP1rmFr8Ajcq/OxxRPw==", + "polygraphy_class": "ndarray" + } + }, + "outputs": { + "output": { + "array": "k05VTVBZAQB2AHsnZGVzY3InOiAnPGY0JywgJ2ZvcnRyYW5fb3JkZXInOiBGYWxzZSwgJ3NoYXBlJzogKDEsIDY0LCA2NCksIH0gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAp6lTM97r2MvPN+eLwQVzU+mWyOvT29170NQZ47E7cBO8BUeD3Qs7k9AE0GvioOgz2S4BA+Yv25vadTSLxLfSY+onH3vYO4gD4gpWC9grBVvn88nj5jgg09gwOvPnNPwb0CnGQ9KH5aPmOnnD2zEbo9X61uPVDbTb0vrWi9c+ipPsJOTz3CxS6+Xfh2PlP2Ir32ErW98mKTvbqyGjzLgHs+MzOOO4qynT0nJWQ+6hDePdof9r1Siyu90vYaPkMRYz5t0ag9N/MivvDUlzwD+0G+ytVgPuh3Rj74qFk+KJyHPmi4Pj0C7tI9q1AdPqeFbD0uhqQ8IgQFPnA4NbxrMoO9ledZPS/Sjz3n72E95/9EOxMylj1sToG999QJvcw8Jj6toa29uB4CPipA074Y/QE+RXx2PTpAnTvVOtQ9YGg7vT3K2704cEI+R4fxPXOIPb6zqYE8RxpxvYW6uD5tK3g8wOILvOVMQD6jvAo+NLisPV3dkLxtFPW9lzLQvT86hr0PHYE8LT/Wvcm+vz16fuS7JyCiviMVhDziwCw995ijPV0VG76iIQ09xd62PlWzuD045Uq8YsIwvfr2dr2Y10I9HgpAvMBtSj5YmMq96w8/vnM0Xz4L7ky927chPld2OT6fORM+xxW7vLelRz4atXu+mFZ5PQh4mj7duIW9C/qIvXAr9byap4i9M86Ovc7HpL738ws9T7YDvlCdrb5VjGK81Xusvke5eL69Mr49p6OrPbAIcj6Htac+EkvEPdfOqj7NfdI9Q/vJPj3GDDwDSdO9Rz2fPr+MOz7Qs+k+als3PoPnDT6Ix6Q+p2uiPkkcoD2Xb80+kCL7PYA3n7z+AZ4+aCxQPXuN4T0DA74+Y8HFPc27qj2Y8YU+cD+kPvINAz9nrhU7/4SlPQCLdz4F+Bw+gCvuPdZmJD7ijaQ+6xSXPhcyFT7TiIQ+qFVlPZWjXj5XbuQ+mLMSPtUiyj2w86E+B2BRPhpxmj5dP4c+0KxJPUUgoz0XGb89i+EaPuU6cz6PZy4+x06avgeUS73PFPk9sfmRPdu4sD3t69C9DUETvrjuqr06lnq8SZM9PVrawz2fLBC+e57ovY9Muz1lehk+0++0vWBK+D6LX9+8U/qXvfPgDD6zv/86tKA7Pk1mcDsB4rK9P7tuPRpKujxbBHG9mP5qPoIAqL4o7kW9XTMKvtei7L2qT5y9iH04PdlJFb6/L5q+qLuSPX1DUb0CTwI+vbU9vuKfAL6/Kag+/SenPWQel76PkJK9WqUyPOXPpT0DIhu+4LrqvUNlq72TGD2+SilAPb34rLz6/269s+xjvH4yPz6dll09C+wAvnr0i72RvSM+Ao0GPoPSML6Yyee95xFpPQ/CBL0QNPi9+jSpPUfmnz1rN2k9U8ylvjsuQj1Hwg49AJ+JvP+mjz7fdRk+eOR0vaKZn7xIby8+7ehsvdOmFL1fGLQ97fJHvlcDHr5KC0o9bzoLvlr/VT649fy9U907vHWsCj59VV4+u3NQvp9VSz4t4LK+KlwUvkM/lD37Hlm9vz6FvgBYpTvS8cy97USavj5tmz214Tc9kwXBPFuj5r2r+i29NPY6PoUnjD1D0EC+pcwivsBkDj6POjQ9gFyyvZdT1b36GJ28pbGvvm/pVT7HE6m89t45vdgbGD5hwKM9ZVaoPbOf8T2b4C6++u5MO20NgLznio26kmS5vUAzIjyn4ys+3THWvZrsOj3QQmY+QCxhvRt7Ez0XQtU9wGsYvD2yHz0Ix4C8EBSSPV2O6TzGewS9SEJFPvJNDj5TPiu+3Uv7PbNmRjzff0G+M9h3PI33hTwPv5Y+v8CKvTOrArsVMSQ+x2WWvAdEHL66yZi8/0/cvSdg+7zCg2o+YKiUPYyLFb74Lbw96y2qvWKGNL7atfU8kwjgPfTcnz23Zam9bYwXPl0lIj73l+g9+zcBPVDtmb1IYB4+vZxVPe1b77uDRgC+x82cvfBBKr6zBbw+iFsnPvzuhD6Xm/Q8sjwcPlRlsj1odow+VD0+vj8epr33j788oDo2vlOYfr3zUsE7vepRPueCvj1nmIw8h+lNvXqY7TwAU+E7zzFTPkC1Mrzq3gy+4w9GvoIeO75D/uS+d2S8vdBDgL0rTgY+ZuXHvjODUr1JKDO+AmOMvitswL3leby9JzCKPPjaNb4/mba9SjEtPVgsUj24joq+7W58vvuM1r1HCjO+XQQPvrD3jrv3kIe+R/mFPWun+r1v1uq9E2u0PeWqwL0no+Q9fZpdvmcPcby4q5a9TbSMu1eWn71P5La+/4sHvstBmb4l4UK+AOj6vd7Cmb3dvLW+TWh0Ph8BXr5/bYu+PWI5vSdILr0LeKW93WmYvvUzhb7POoy+p8uCO3oGmr2H3X++J2uXvW2AprzvGFS905o3vgu3v73damQ90C8EvqPj5b0ExQO+pwQSPuygNr5DrIm9bQ7fPaBHEb04ML69l9FBPoVXp72MeRY+yTM+PS+lLr0XoG89ioJSvbVkgz5go26+f9a8vYt4kz6HPJw+LMq4vLsNAD5uiCe9AEiFPm+gVD7OzUC8gnhYvhutV73DylM9F4lxvgqljb1Hcdo8892IPdO1RT0K18Y9twI7PndAcT6aZH68Xw/bvXH4mD4ar/M9003avYBDWL3zBxy+VSeuvn9qqD4nmYs+ipOmvStYNT5ARm08HlgMPUAnKTwCZjW+S4ezPO1xbDxN/po9WtnSuejPor1vhxy9EIQLvmPobb7RNbQ8sqq+PbBjnL373iE+11IQvg4ITL7/qc+98t9tPu/KKL1kQ8a9gPeAvfcnYD3qTlS97/2RPdEwKj3bb5W+AOvHPQpa+r0ws1g+Xk2yu8V+C723Ltg9hZFvPrtZgL3AFRM++/BEvprH+7pAGG4+p8aBvsW6T75NKyI+B1B0O32VBb5A2647D8G9PRK9Gz6zJDS8aILdPLfkVj7XGSA9nYOiveFDkr1g3DY+i1uAPMjYmTwTcSo+IE11PPeYd77tvrI+WYYJPWxIHL56bU88SjzrvdzfJT33OoM9enmOPWjILT2v2os9OHX+PRoxZb71/io9eExQPkhNHL7AKPW9keiiPSp4ZD6HuoU9nK2MPsqqAz1xWwK+3GAavZISaL6n9oa6swf4vW2PSLy6rO+9PwKjvGe6lT5ZFzc+g4mivRvIMj6A19m6Y9KoPnjN/z2CcGY+N7sYPqqhoj43aGk+golLPqidUj4FDg+9+916PXvPsb17hP09PeOGPqM37T2w6iy9qgNPPcMS2j4/TY4+c5KfvFOlLz6AoqQ+h9FMPndFSbwYQ+49lXDZPtNOlD46ZHw+bW6oOzsIl73Pzf69Q/GpPsvzjz5rkam8OEG6PnB2HT45cQU+rR9vPSkyKr7L80E+5I9HPpdaHjwByQ69W/n7vZILPj6Ysw2+Xez7vncxsb3g1wQ+wyjAvsiAaL6j1LC8UJCuPfifnT6E1wA9Sx2XPgtHNb2q/q0+LesdPeZfET0zs2q5Z2eauxHIEb7/3jY9SrltvSTqij6UrIi9sDiwvHcaHD6XTNg9f6QSPso2VL4g+OE8DzuKPSc0+L0TPy+9h20mvqhELb1NN2W93cmPvuN6dD4XjzM+qc+VPlrSbL3KfOi9x7H4PXAMAj6jvB6+aKEnPYsw5z11DpQ8smFwPZr/kjr7MuK9NYbDvRML8z2i+Qg+mMfdPbd1pD1HKtI9M6lTva8VHD7lQfO9DUs3vUqERT7+XQC9RUMVvo8eJL4wEyg+a1ofvgq0pr1vvT2+Sq/SPpqHqjns9hW+QPJEu5oJ/LgKjC49kNT6vDtm3r0sIYQ+83lSPfbWAz2Rn489K4Envq/oDr7L7M29bcSTPUOxjL1A/2o+tV/JvHKCUb1ados863rUPRygG74X6FM9oviYvjMDlrovEbE9B530vEytl71zSF8+z0icPXc3zb4YHOS9OnrivLMG+D2LKwC+lQn2vfobFT4K9DW9vUgQviA4X77NJxq795gBvsa1IL7QmXc8x21dvpomoDsfqmo+9wYzPWqgq73DkAk+elQNvbh5Hj2KxYk+4Jogvnyhmj2t2O+8Cvo5vE04AD7jrhW9x46hPrl5lz2rkZC+LWvvPRxfvT57zJM97ayGvmo8HDyLSYC9DZOnvirBdL4zYWQ+v2U/vlfOSr4v9Te+V6g7vvj7Jr7mu0y+CvQEvuW3ZT4sSiS9L8/kPvgEDT5POFM+j+gSPmoAHD5I62g+LaM7Pn1o7T2Yt5k98rNfPm3NWz7AK0O99RhzPmfm1j3Rzqu9tNQlPj3vbj5z1Xw+zdr/PePSID24rpE+ylknPqoFdz7F7vI9Xye9PurWFD3kFBM+VTIzPq52gT3aj8g87dibPt9Urz7FANE+IjdRPniCHj7lO7I+2tOxPiBYvrzqPMI9n69BPh83Cz1WWAU9R2zVvsDzkb6Qi6++EJcLv5wUCb5q4S2+KshvvjKj8L5w0oe+1z+gvrLJ1b4X+h0+oDpMvVYomT5Ajcm9ZwfmPKF1Lb6ANEO+qhxZvtYOyL2jpb69bzkQPrBF4j697jS9QNIFvChhWz79Oc898bGmPT/a0z2v1VS+y8P7PaQBlT7wRTa9+TAOvob8Gz4q/aQ99QihvadllD4yDrc9aiTjPf1xeL4yTEa9S5FIPu3MkD5axXK9graNPJFsJD5Ze0w+7/mHPl+bkb0Ktci9u06nvZX5oz4zdJA+J9oHPkATzj0+Eh4+t6FWPiNiKz4gyca8K9UOPli8az69d08+H5auPWCkBTyjOhc+nf36PWBBnr6PxrY9eo9tPperPTytTqg8O/TMPVduDb3hvcy9aMZAPmdfBzybYxg+TJYnPSENF72afQy+szyIPDO7Jr6j+ZG8Ay/kPHN01zyJrRQ+Szafvfo7lD2KzTq951Z0PWTZJb66p709/SSjvgOCSr6CMiA9+5iBPWXqXb61Uiu9Ny8APup1zL6T5na+xAwoPdO2+TwUPKK+QCNuvDJ36D0jO4G9EHx7vidv5LwQ/e89wzogvU4ysz0YtnC+CpKfvkcPu76Xwww+IH5APOBYdb3zv3w990IPvvM/CD4r9Aa98SWWvav3L70nUUg9F2M5PATtN71QyRo+j+inPkMoqz06f8G90ufXPf1n9D6n4Uk7GwwKPmdNDj6APPa9NQHjPQJA3D3mt4U+M9LTO//qjb4UYwi/2zIfvo/ISL6FMH2+swvCPUdu4T3luuo9T6Uzvu9+LT6yq3E+BalZPrqj7z4Sxog+v7BjPgWa3D0u7Rk+S2yMPV8KHT7n+uS7cFE9PrKhOD4HTUk+8N6gPiBUWD5Kj9E+30JJPhKQjj44nrs+HcNMPp36ZD7nQkg+zXjTPkeAnD5p644+2ta3PRvWeT5nfKW7RfMePwBnQj7TftE++KGiPp8NRD6DuFg+jbtbPs7PAz5SUHw+V2xePmecZD63s8o+YQCOPtZCgz5s9K49pe0KvtAYRj7NvX4+mmKAPd8ZR74KbU0+F5jXPQMTij0ClrM+S++TPbMjnTklnui9cJMcPnjjfT5jdua9lxB3PmCR6z1TSeo9im2FPo17sT09P6K9r+t5PUrXEj6j94q97QrVvYDP7rw3hES+M4V+vhN+8L2+BRe+e/0TvgBVhjyyShu+d4Rqvg8qt75txnY9u2oUvrPzmr7j936+uGe8veB6bD07K12+hk4HvoAlibtbyge9kIYQvjS0nb7Dm+O9m1zLvqq2hb3wPdu9K0yAPTA4dT3HZs+9QoDIvRVQ3r2txO28Pr65vWegsbvHKLs7C4rwvV99hL3XqEs9oCKNvSHtlr6D13s9IPlZPoj9lTwuUze+wjXOPN/0mr5CDYM9KE+tPUvcGj7PkZK+ndEbPn9QJD5CrwA++sVBvaCW3rwKHke+H7yKPbpkuT7Ns1Y6d/jJPXPgBj6wXAY+eh+wPY3C/LwfwXM+ZgYXvtz7xr0cMwY+CIorPdThiL0jPNE9ykwcvvphiL244OO954lUPdOc9DzpRUm+ioPkvEldFT6dKXY90hqMPXoMOb1DaFY9aAW8PRL6PT3gpme+iiCZvTBHTr6+Jgw+ziwZPhh71rwjX4I9s1/0uzMYczw1yuC92OCbvqjC2j3d7Gi9Kiz2vXSSj77Kgv892HAoPsjnqD7NEhk9gOMWPkYNKT/olIo+jafEPCD0qbuDGbU+Izl5Pcot5z3KI8M+ChbzPrpPnj4zXL4+68ibPRpwcr3uC5K9mq4LPl+4QL23EVA+5TiJvNhVsz3Uxq+9fk8ZPbWhGr47BJq+R/zbvW2kAL57Gxi+8CGCvtfblL5tt1u+KHQ7vjNRPL4OmJi+J8Vdvhs/ur0r2Rq+BCy/vteKG75BuSy+w3h6vdVCX74IMCK+ujXEvBquDjwIizu+u/yJvsdFe75hQK6+M0RGPdMy270KY1G+35wNvrrMgb59ZIU9A5F9vlintL5DhX2+gHa9uzvUMb582oG9AGOrO1+hVT6b8Aq9Z6YOPQTzmLy7NbM+s1T1PZMrlbzglOE9p5+Ru+BwtD1SKRY+77OEPujqqz4a8om6oGB/PUt2Cz5Xk8g+V6R7vdvxrj3/GLs9c+lqvduiqDxI6zs+8VtKvYZfi70E/xq+uGBvvdnEMD00bB2+AOJjuz00cz30TYU9d0hsvpMgOj56IR8+Mkqjvq7IGj4lcoI+L/bBPRAzsz1jbLs9P0wdPlNR+z2CB5Y9Y+bgPEPOdj4Qt2A8Q0ZgPfGsCb53XxC9Zs2AvlNWUD6H6Xs+4lZVPne8Yz5NRDQ8i3a8PVUebT7w/xK9Y1GUPpLONT57Q949wuNrPHWjP733KwY9EHNJvKDwXb59zOO9p+FZPUcVhb4Q6BC+g0DqvS7Prb4PGCa8ykuOPjtr8TwD3oY9jgiPvVNSaD2Sa9U9y6+rvXAQFD3bBek83LTHPfhOnb6sLD49EBW/PQHuQj5jpC48ANlsPTmjKT4Lj6s9QCfvvUq1Eb76lK49qOZDPSPvG75nr4k8M6ZEvqvKt77weUu9HVJ9vRs3gz0f91O+MGb4vNVarD0DXzQ9A03fvcMGn77qlCK9IhwqvcM9Xb5nJYe9Z5SBu4BWML6gaPs9XuQNPiJBmrxnDqi7M5OXvS2+Ib1TyXu93ecpvqoPcr3A87c7Zy7pOJ7lAD2NVj48rU39vS8wub5j/He+7yJFvsUEED5ftl2+z5nKvnhn3DyzW5K9wvUEvkfigb7Ij0O9Y+kDuzAXf7496nw945lLvChupLzGJgI9alFqvmWvWz7y8My9DeKIvjBNor0tlDq61wamvWfYYr3DhXo+0NjTvIrtfb5g+q09AGp2PSXRpL0apti8AH0GPR0nTD4TONu94JAePEi9lT4Absw9ECh5veK/xjw3F5k+C6lqPoj0Db1f2+K9VyE2PqN5jbz2STw+4iYOvfVw7b3EvUq+FzSyPcMe9rxT5yc+H/IlPc3EIr039Pk9k7QxPuv+CL0tDNE9wKztO4143b1zhg8+8tbbvAU8tT0NG+w9OM1Dvr/UoD3X0O8+3caZvbcRC73nr4k9WwwCvZN9LT4/dRQ+NREZPiiLHj5pe0i9V6xFPQX22bzWUz6+5N+gPm0py73DnYW+s+/CvT3Zc753GGG9JeiCvuU4Vb5yOFm+CLRjvbMjkb0HHXc98l8SvjpiyjwwBJ69OmUSvmqLlrw1nni+44uHvnfRC756Gtm9epW2Pad4EL4Rngi+EgiPPTon6D3NGhO+wkjCPS/Ibz1FyoK+jnaTvfl6D74xd6S989qOvm3Bnz21oT2+QNTavVOq3r0N2vu9s6Usu2eXvD17cki+9+t8vuekIru4NcM8GqVNvvhV07zdGcc99yrpvbuhTL4a2kU71lGNPj1B8L131EC+QHHtPXXm6L11jy8+ahsVPQplzT2L6/s9/z4wvpDWfL3ber29tR70PWm1ET1VF38+5oElPaAWSb77SSE+HW4vPuVAbj779Z8+vX4LvtHgir1HbDq804FLvoyDPb7IlJc+tyU9PpD3IT1btcE+LLunPjr5ST0T0tc+kSE2PoNwtj4AmQw8bUK8PG9xwD7VYY0+J1eoPkvJeD49n3o+bQXnPo/Poz6qjmY+l+7nPePbxr2as8w+x1fIPl035T4HoPM+SOxsPjbYLz7Fl5E+A/qIvZcLgD6gI5E+AJtPPrzXOz1afwe+C+YDvgK0pb4KIjm/zbwivuPCdT6JIAq9Yh65vkxQQL5/qk2+uXSLvkSWhL0DFYq8xV2avGCoWL6QT569jtBLvhumhb4QTdO9j6UQvqp7ijy3aam+yDV5PtO5eL6KDzW++UmevtPkA75LPjE+m+1IPkfUIz3H6+s9NwFUPqXSU7wqpti8xtCoPr+QRT0aFDs8TT2jPT1/jz4zoKA+CwIPvjcQiT46KYE+9HMYPvduDj7SAHI+j6s/Pt2wWj6psDo+tR02PpPVwj1lJsW9UnuXPuo1bj71tb89gNztvC/whz67n6E+RxzIvIBrSr1Sl2I+zSLkPgCmRT79zZc9HcGsPNfskT66XEW9EOuVvhR7mDwrE0M+pFERPeKyjj1j2QI+zWJMvZ/Vrjx31ua89SJfPncElz1a6mE7Q4v4PpcYET54kPm8AJfMPbi/Iz5uxSM+b3IvvtBs+j30D72+u93evY2/Bj7rlBu+xQAcPc+tpj2QAtg9WE7fPfBeuzzKemG+qgMCvkI8Ir2KKPy901+9vmfDqjqo2zs+CvY7ve22bb79kRM9U0B7PSdbmTxXeHm+aKu8vYA1Xzwo80o9k03lOm954z2Svve9XRqRvpcJET6rs4q9GC+yvA1Lu7xzOZG8wg/OPRayFL5QH7q9y/ENPfqn6LzfKo6+CF4uvgpQm73xI8y9KyiovrIeAr8XcMC9kMfDPWV9e76dr7++QyRBvjlAub6akAa9wJQYvo2n0TuWeDi8UBv9vUvA+b045L29MntPvmhgRb7TbSu+U1pbviZXHb50pIW+cfCkvYeAKL3HaBy+DJ69PNIS2D03Ayu+gtmSvkUwy70rFQQ+c0jbvnAtUb4zGqQ8PWG4vcFvh74HRI68LRR9PPYfFj4QEGC+00EeviID5D2fjw0+oRg0vs3og76XSvE9Q7mwvVAOuL69+7S9W/CDvvhVs741SwU+Ww0evQrpIbwHXH69eh4LvMiaqL1aPYY7yJosvkgCXb4twog978UTvuXbXL23fOS8iMxoPjck4b2xIbS+SvUfvfcnGz4zyU+6z4K3viBA6b2vF3G+jTYqPYrjRz6qQ3g8WTUhvYOemr2cmDY+BeetvUowEb33UQs+2ooQvNUugj3tnQC9Wxywvn1J3T0bkQq9GyoMvrUEFT5zjO898efEPSq7+z2Xcn0+44iHPqC+Cj1yGgs+T607PvsstL0fcRO+/3byPFvUsj5vA0s+ScKJvZLn1j3KIWo+N4+YPg/3Nr3LuyE9fZHnPXNh1bzC8b28JzpeO/P5iL27xWu+GtMLPi8B6j26RVG9wF1zPsW8Az5TkLO8FdKiPUUbAL73/bc9E3yLPlXAyL3HZ2k9em8wPeP9bz7LSGs9m2iJvp0FYD3CXWc+jyIaPrtNkb3PUJw+k4k0vLMaH7qAtGa7twP+vPU1ar3KGIs9DWRyPhLQ2byUnx0+GFjpPfq5oj1Auag+hbSnPfsnvjwUTJk9q1R1vXB92bsPMym+cBvNvSd/C741xum9fdkmvg3qnDxAOSa+TXdoPW1ikDxhnkY+jgg5vqXJgz3HLhs+aDZAPnVZEr2fgt08bYXkPcq95z3fyhe+CJGovK23Bz56+Qk92p/XvCAhbr71aAW+2A1QvgWCdz57qAG9S64yPpAtwTwD4Ck+G7jrPXNiubvDBpw9sjfnvbwiNz6jaA0+YZWVvVAYMz3g3kq804m3vWu9pL6qy6a9u5VtPtl2BL09Gs6+qGK6Pe7Qpr60pZo88+fYPfpK6r2Tc8G9x1llvhp+HzzKtFy9MDNIvPD9DT7Etks9CiAuvbC6O74XuIu+Bt+RvVf8xj0j65M9mfJBvld3mT0I7G+912hhPpPobz3wJ/09ZBaDvmk5lrwKfBs+bRjFPMiFSL4AzGQ+p6t5Pus/mT5z43U85QWpPh08zT4gp2A+JcBUPqtf2z0ap9k+U9uGPlxHFz64eTA+eBV4vZbiHL4QeI4+zsGDPgVFyT60WAE/if+bPrW6lj6gCVA9x6uFPAkEzD0vf0k+2clFPqIw8z3RagG+aEuCPgJpcL1kjyC+3eicPqt9Zz6bZEq+j7q2vbtxcz4vOFA+d+skvagEVz46/+s9/yPTPbPdoj5jmUQ9R20tPlg9/T0RCpE+Tz6yPSeoWD7zUw29HSq3PTeFET41a2s+UAMnPXwkDj6qDJy8k5LiPt+EiT16b/a8s3akPas+jr617QY84zkDPgSzgD4r7Yy+B/LpPmfvtTw6DLk8T++ZviqIDL7jG3k9xxi5vETXEb78Ej++p5ZEva99/70vz0m+n8fCvRCAOL7lrSu/Mj4CPleZlr1/zvG9UwC+vRM3jTyaOkU7edI6vgcoZr4OeIK+ilFcvmJJQb75vau9q4KNPWpvCD5AG788aE9ovoi5gL3i4IY+CUAaPVTkqb3HVxg+kySfvHxzpDy4oS0+Z0p7PRz8k70wa3e9N1jVPN9qrrzSthC9GuIWuzM4Ur2Tzdm9zfySvdLXD71NU0e7MSsLPlcAlz3OMSG+dbhCvmfVDL5z8wO/kAsrPbrdJL5M0kC+LyeIvnONUD6a62u9ioTTvVuB/D0PTh4+4O30vMcn/r4jPj+9EjaUPvPLDD6DQpy9fHQTvm/YHD73ntm9wHzvvEBear0aUQa9AEDUONplTj714zC9yMOTvOU7Bb6X64K9EOECvXDXaL0EGhm+M4UOvjriCj54oc+8VP6PvS3Xizv6kyQ+79bFPbuVmr5AJKO635uBPsjZ6L1CSTW+W386PT+fdz3AfZg9x6kUPi12WT5mEoY9DXluPd0qHT7oTAs+yBqCvJdQ1L3iK/A8Hb6TPvKZgD2iIP89lvaHPiCIET7N+oA9L9OeO2BiqL2ri4M9BbcKPuNGJz664CO+72QZPcfj9zxnvkG+s6eqvqMvq76/QEm+TSlPvh4uG74zEky+oBn3PYT2Oj6ETaw9IDeyOxRGLL4tDl4+85zEPA0s+zz4pzK+zuWBvodHHb6KNnY+EhUsveDNfb1Bnkw+elOVvc3aWj6TQF29P4J3vdAASr0gn/49y/MgvWEKOb2jmMG8UzCivOehvzzkoam+bQa9vNdS9D3iqZm+n0o3vnUNij4n/lm8mpeXvHdVDj2TloQ9W5WNPeCdJ74Aglc+sBfuPVx4Sj1aSPQ9I3KkvFtcGD7IHBG9AsSTvZ0wojwqGKE9fJEIPggF+r3vN4a+rfw6vY7ANr2Ih4I+WmPGvb6BCj5VBje94C/lO9yxDL7atFG+hqYdvt6KrL395uk9RvHBvWiyXT2b5KI9gwBFPcdTHr1Xqvm81++XPvBsEj6NLry9UDxVPPPv6TsLmIa94z/VPrPDGDsAfuS5Z6WMPOtNoD0XYhs+JP9CPu4VnL19LOw7RxEKPtfmULyaXHE+fS4CvrMZJD67JFo+zy5fvrqcLD2dNno+M3QVul26Hj3bmwa+usb7PTPqx73KD8Q9N76lPfvdCT5RhBO+cI9TPuD0nT6/XMe9938fPrUm3Lw35z0+q1gAvWTwIb0fzT8+g9uePnHcqTwXaQo+z8I7vhlzxb1qO3E9rw7lPqPsDr0ibMA+Ki5oPZUcL77tu+29cM+avBM+jz0ofBG8CLGiPYcbEr2/Cg4+QJSiPqdSVLxHMgq+tVIlvn9/FD7jo+y8NB0TvkJeuDzN36g6pu+svpqU1ryaUto98Livvcrz/T2n9kW+92DFPZh4Ar48jb+9L/xqvT/+lr0nHou9+MsquwAOGbp6JB8+6m0nPo/41r7MVDY+D557PicQ8L3LgI2+ohXVPGrbijwCb1G8pTFqPjdRh73i0PM8lv8dvtqVGz6Djhk+/z9EvV97LL6XGGw8RyERPqsgGr34udM9Z3lbvC24HD7oBOo98E0jvrP2iL2fmMS92DEpvfLtkz4n7Ce+N7O0PbqPBr4W2Q2+B2kZvqOHTz2kPgq+6MGSPvrdjz6UuY08ktaEPovx2j6COIO+wIahvvWxYb4T3qu8kMZSvj3mnb6pmQy+5/oLvhkTs74zm3K6FcV6vt1kir6d8pS9osD+vWd5ubw3rOO9KkL5vrP3/L2NAn6+QjFxvpVSXb7mTYA+V7nwPlm6tT1D1GC+3ycHPq/CxT7AHjc7iHvJPSJBBT4okhY9E9sDPtpQOj7/imY+s84APW/WubxPxoo9InArPvR6CT5bwkw+oItlPlOHIz7t9lA9Ao6Jvd8G8D1n+10+34kRPlioUT24aDO9G2gKvsAj2rsakM8+mg+SO5BAiT6dKTw+J6jMvFsOo7xHdjk9LbddvgMjFr4PCIC9E0qKvo7kRD5tgIA8sMTzvHupvrzhfwS+1Hw7PrBDaz2dqH2713h1vvqZVLy9+nq+jycFPoCo2jz3+0C++wm9PULzvb0oI/m95K0bvqoJSL1f+eu93uU5PpcsmL2yH3G+qP5FvW8Plz5q0so8HaJ5vmPH2j1eS7g+UbMiPmijmz1D36o9QYgIvkChBz0YYTY+ZxxqPfGjHzxyc6M9b6Q9PmpfDj6T4+u7F98SPvLcv7xq64U+mLTMPaN0Bb57Gpo9gIchPPd+kzuHG609CmItvliWgb2o7HG9wuCyPj2eCr1X6SE+3cBVPl1gTT4gkdG8t7v8vF3skD2fcHq+WBhnPvXbXTyduji9SMsHPo03Sj7peYm+xMi1vcNJiz7ztO09EdubPfiQxTwAn/u6hBKfvtPQuj461WM+KlJzPZejDz66vUu9pwUiPuM0Gj6KzJW9PAAdPsUatD0jAwQ+n5KWvXgTk71HFK48liSPvdqkt777rGG9uuqEPtJnBL59fbm+XXklPiczmDuFCkC+L8m0PTf78bzAI326+jxtvSOlCj3yZ5q9al8Ivs9Abb3Peha+WWAHPQvpVL53YCi8ZZE8PmMRAL2IFyG91+QkPMch4b13J8W93a2MvE/0eT7SOZi++9s8PruLFj7w8Tw9bCoSvfogPL0KWJi8B99NPuVI0z7K8mi9Z5i8PQNhdz7/d40+ap+avLuryz6v7Vc93bEyPrAqv7xFkjY+hb8yPschdL6QzGI+vcZUPg92dT3qICA+akb6PZIMjb0NzN09evH0vYvxADyAY5A+3yAXvUsWAT6N/QQ7CnnhPU8fsD1l20C+Sn3qPVEUvz7DsZ68zWMnvutCuLzN6e69oAWsPvv9eD4AK3W8anp2vUCzdj2AeVg+xgchPkAFIDxN2/Y9bxaMPeVJFz4ojYS+cyIIu51wSz2aFxM+F1arPpwTxT0AeLa9g52zPTc/Zj0Nrqw+QNtcvOIbzj4FnjY9WNWJPgwSBr0IB6I9Izv9vZqAqL6qk7m9BbcCvtN5/D05Uok9+6BCPrnJsL3TONs9GbeePc9Hoz2FEJI+E1NhPU/dIL5d+Fm+gJPEPs8MrD4Kol+9muSxvInOnz57bys+bSHnPI1DBjxa202+t8gSPizxAT768hE+NbIavdKpOD5rhlS9lwUnvlpb0Tq63Yk+OoJVvQD7YLtNf1I+B6PDvGv0oD3nmfY7x7D9PSNUzj1pmp0+GSaiPXuhkD43sSw+Y4RHPo0Oez4aGlc+0AgSPT7hST3b65Q+kveJPgNf7TvetR691aI1vbsDUL3ziRA9y8fnPgCJsL1wv2c+audJPpFGrj6HOwa+gHhfu2czfj7T5cI+wOhUvnB10j2niJm+m70wvlIsXr6rgri+ZYmLvVezeb5d/YC+AuHVPc/64z2Q7ZI8SJiYvmdgbz5uC8o9dUILPv8mUj7/WcI8i8PePZ4TCD4qVpQ9e0PXPTWvRj6qzpg9di4nPvolWr4di22+cOsLvn97374fp4q+WldjvRdLdr5fAq6+XnY2vnvnkr4zPZW9pfRtvSg0FL57H/G9PhPAvosjs70zeie6SnyKvlVGcb5Fh6C+JeuBvp16zL7NpJm+p1GUvoIAIL5oYCG+IWahvl1OQr4VCHO+/RUxvocWX73EmIu+J/BIvmOIGL1n9De9T8EsvnKAwb7IWDi+cHcLvVS3or4D2OE9yrqKvRdFCL4wc2O9+OQkvsGowT12fC++UDfePeBS+rx1yj6+S1eHvh9c/r7jwTA+D5oxvmLM+ryzNIq7ithHPfr/O71qKX69egruvfUFRb1NoPk8M7tLu1iMfb7dAMI9oPZAPrA82L2a54O+PZsuPa8NgT6GjhQ9M+f1PO+4Oz5vcTS+ID29PYB7dj67fxc+GGEuPk9ATz4TIH4+5W7uPRP9Az5d+Qs+JTzePAW+vj07ON49eBSTvR3pjT4KMyk+aOPqvR/Wrj2Lp46+2lFBPTVKrz0vjGY+rdyUvVJMwz4qEMU94jHXPcvtST4A9lU+J007PjKVjz6DAlU+0/wdvgRSxb2NtU4+UBPLPZMLOrxnNJU6/dQdPuqFXj0FIwe9ZcGJvZNY1j3Fu9G9J2+VPveBET4zjZ09nD5DPkItI77H+ok9uq3cPRtlRL502o4936W6PRP1cz5bNDQ+PVuKvQ+hkz1KRlK9ngajvrpJ1Ty7Phk+L88Zvv0bur2q2sg9h2spvjM8oz0ArZc7wqfBPRpc0z0P7yy9buAaPgCUObkaiBC9DUl8PVM1uL0+iZA9Z4x4vskGor09/2I9Ty1LPal7H707cR07ZaF4vXJeAz3HDN29jQX+PRvGar67Osc9GZiSPTlkwD0KAfe962aVvcmDJD5biUs+rWOzvDWlvDyUWDC+n8ESvf2emL6G/R8+LXnxPFdYB7uiMF0+CG3HPRswfD4ItjA9V6XkvlXlqD2/+3W+QzHAvcrA2r06Y9s8eiXuva/7sL0w45i+Kw0/PZ2Xob0nYLM9P6puvZDiPD0gWxI+7aZqPNft5L7T6SG7J2OoPuehvbztr4y+yLxHPS2P1T3F8wg+l82kPVWrOz5Q9F47P+5Cvs3uAz0LFGg+2N/FvN8hnT1FKti8Z1iYPi+7vz1C0lc+dvGxPc1F/T26Gxe+L1wzPuh+U75axXK89QMJvodBUT6Fd0K+vTDVPRDukj3qPAo+N8jjPfqW872D2zs+uFSPPs2iuDrtGjs8pTWJva+aiT6fzB4+gP1zu5s5G7zNU/e9IkXDPlgkfT6/oJY96phTPQPLhz5nlag+Ku55PidNDD2wC1M+97C3vdMGgz0z7dg9u6oRvrAPGL1Ay489EETwPT+vzzwO1A2+vWyYvM152jpDLfi9zs4lvpNIkD6vZIc8K2K8PTvE0j1lBnQ9A33iPTKkH70ENYE+W5QYPvAz8j0SzRo/h3ZPvWC4hj4fkCM+dzr1PTeOQj4ahsS8wjWePCqCZD4jHvQ8a5AqPncGlj0lQOy9wM/BO96WQT6PH5g++t2LPCiANz5I5ZM+9AqxPpuumT49f3s9aaChPji/gj6YGYo+CzjKPcqY8T1fdWM+aPiUPSGiLj46GzE9l3orvuKFH76vP6Y9VRCkvaIDeL3nrM88CP6wvgXBWD1bm7I8/DATPrdl171vsIO92vRfvu81gr6dIPm7w2qWvR22PrzaFA6+K/AEPj7xuT1L7ak9Cm7ovsiS2T3GKjk+qLFOPiMVMD0a/Ew+aO09PhX6Xzz3ybg+FMyPPihg7T37ukU+t4XoPbsCsz20mSs97DcXPsAkJ7uHbKg+5z+YO0A5/T0/wBQ+oE49PDO/BT5zl4O9Mww9O8Atl70KX4w+Z2PoPpqvfzs3Esg+zecPPgz3rz5T6hY+Oo3wPea6xj5vbKk+yHQ5PioqjTyPpCW94Dd3PmeKQD731jK+nQ2SPepvLj3QD989x+DYPKiTOr7VYma+qEXFPepvWr4CcD6+n3fPPH1YHj2QHTa+v7GOvV1THb3dpQA94xDIO/sx0r2C4vo8O+xdvvMTOz4PtQo+12ssPYBmOb19Ztu8r4Q5Ps2k1riQzDw9+2uvvL2wi77HsiU9DZpfPg3AkT7G4I89nUt8PJIUoj7/RyS9MZAnPUBXST4ZCQA+CNsHPk3zMzxx1Tu+cGnoPQsoXT5cTpC8iwfhPSDKAD1n87w7KlCOPSpTeT5KJE69G9eaPhAZXz4wrFQ+wt4BPjPfDj7j2QY9DX6zPv3Twz3DIU8+qPMbPlCPvz2Ok609V/tAPgblur69+6w+e3jmvSwxFLwfq5i9q7qjvVhUYD7wthi92Y6ZPRM//r0SSEE+WqZIPQewcT4TXbs+N/0CPQhUJD6bS5M+5nEuPe9tlz5nVoy9evQ6Pm13Pb4rCiO+U0WFPO8dWj6X9Ka9Wq17vvj6sj2/Fw6+fnOMvrfJ5L3XRoy8l8p/vMf85b3vuSM+yn9Yvgtwjb3Qaiu+bY/ou1qiNDya0ei9l5Y8vruHEz6TleQ9HFrGvFLFk72qkPK9a4BGvgdNzrzm264+57CvvedVrDvw3pk9QEHWvOaGybo1Ck29SuHlvRcTN71a+n69KmAJvRPMgL4/8tk9w7SQPFsnDj1doIq+OtUzPqcpez7K3J48x0cfvR3Z/j0buUk+F9jevXokID4qxam+/SQRPrf0KL27nLC9IBWKvfPoY75BeJG+r5/zPd00mr30FKK9dX5tPnI6Ej6qBSU+V6wRvk3vfj6cmS4+9bTiPXoOZL0Qw3E+Cjj1PbsDnj2WBR4+PRtdPn7Ktj1t9k8+EwpoPv3gpr3oRtY951YLPiVMqbxrYvo9eImlPTJvhT5Hw4s+w3QwPgIUQT4fnvY9jcGmvMAUVrx3XXe9p+m5PsqaRj2QkpY+F0ZkPuO7bz4rCmk+ALKquu53wT51g0k+vUjuPWlqmj2Pazc9ZfrHPkMKij5CSME+qhmpPfixLj3dHiG+sOc/PujxFj6ufRG+OukTPvVbC73nn3I+zRpXvrLYVz34xGs+B0TOPSdikbv1Bky+p90rPo3kOLwdLbo91ewCPgdc7Dx64yQ+gEIdPAOR6b1vDmu9qaFBPhcvqr3j+x69CzogPqCjDLyDLw8+Cj6ZvFDYiD4PuFE+mjKHPUBqoT6wv6m9S58Wvf2sIz1Ck7Q9Gl5uPC1InTy7rw0+BSw9PlUlCj5Xeai9E3s3vboVQz2Ho1w9OsB+vMJ8bz5ruZe+x9KPPRqQqD6H+Zc9Lb5yPR121j0t+0U9I5VBPv3CIj6xbcY9s7mCPmdegbmrOEo+9jQLPoDCmrxz0JA9M7YUPYfeX70aneW+FgcOveTuS72KOYO+z+CIPgcAy7ynotQ9sVKwPCfegjyb+co9GnMUvdRaxL2a91Q8ExkGvtw4Bj0vwBA+/xDBPjthE70Da1y+dHsAPruXxT5u5Bc9qh4UvqVPYD5/wak9Paa5u7kmmT5saZA+N4UFPu/mhD26kYM+0zd6PfJNqj3bpBi9ry8sPjOE5DxvEiI++0UGPWpWIj59rd09ENJVPVIifD7H9IW+CiUsPsVusT0atmA+uRYMvh2h0T4XNt09MQgbPujFXz7Xh9E9IPwTPppwgz5gsN08AFiKPpNwDj4oj5E+595EPmWjyT4QkgG9dzjevO9OYD7nFms+kzDsvQHyiL14I9A9zDm7vH2hLT0zeZy8WKeAPsK1cz3iaJW9MISLvqZ+wr3bisc9b5LrPSyKljy8say9x3trvjQlkT2jXla9CxAHv8Clhj0Fgzs+wIxHO9A1xr46EE6+kHpMvt39cr3PMRc+vSgkPSeu0r3XDne+2BgEPjtAFL4BTEI9Ios8vSk4ND1952Y9HSpOvk8SOr5lnky9f8OVPEvQ2r0WMBq+GmFmvgjUFr6dEba+A/hCPnNHpb6/ho290Z0lvo2i3b1lvme+pMNKvsLIcz6Akla8jztFve3eljvfFBm+UJ/jPMfRIr5zPys+CD8+vW+oi74trP29q25PviLCDL7Upr49ZT3lvYttmz6/z4g+IGFevBsfHLwOdJs8XIguvvdTqb7ixJO9nz4SPTLbMz1L0zS+l54SvtK+Gz3KgBU+mPtgPjfesL5F6Pw9Qq/hPstGgz5fPQK+yUYaPuUaHT3+4Zs9q5D+Pc2JNz5toim+JxbEPQM5PT470sM9k8mCPtYOpj1X+zA82CW1PkBd1r31epi8ItsKPvAbZz7CQZQ9aFtevd+Cw71lpEu9bcGAPG/otD7nBoq8uoYOPdkijz7acoY+v/xaPhD3qj3+WaM+ci+BPjeAcD5LVzs+mufLu2Oe2z7txVw+15R7PqsnQb7xjYm9yCcyPoDryDsFkII9XwEhPY1uwjrc3j0+c8JjPoqr6z1VPCw9hZuQPVq2Oj3HVsS9UA6Tu0fzvjwaYoK8WneavrkyHD4A42c8Y1LGvGgs7b31pe2+UKHRPT3Urj40A8O9ag7/vZXWrLxjupW+hYL/vY357Dtio0y9L3aYvSJW+b1HGMQ8OLiLvFkjSL63k4G9n7Zevhf25T2Djb6+KkI+PIVdGD4TeOu8K5yyvjp2z72fV0G+ArZrvsdDLL5VsqU+U38HvvqBpbwZxwG+KhSEPWJPsL2ozVK9Z6V5PHDu7j2BTB0+H7mTPS3HEL7iGzE98C5JvSc4Kj5wGoe+x3aXvdqmHjx7g0u+neEbvu89qb1VUGu+Mw8sOoLqVr6Vi4I+l94rPYRVMj7PLYC+nSsrvurUxTvn9FK+6qj/vZmRJj6Lhmy+g7Ylve16bb5iT1e+m2c4vpeg+L1qCB09lLsBvmD4g7zXB1++JxKFvmKujz1qNjw++uAxvMehML5ayc49CydIPtcuSz1joiq+wG/2O5ACz733RlM9WhJNvNeKQbwxqr+9+owSPnAGWr17vfY7cAgCvbf2cb6dLO69c7z4PQIGtL7VJ+M9N4uTPteeqb2WB4O9OFQHvXs4rj3TXW49G9Apvjs8kjwzQlW+2yAdvndI4T1YjBc+f5dEvl9FU73Fiu09MyT6vQKjoj1lERC+n0O1vd8uyD2aGaW5988yPguVKL19iki9Px2pvfXpiL5Xp/y9i+LiPaeeGbxaOzo+GD7WvYAmdj26GgI9/w4ivWsMlb5CWFE+IwWFPrMWEz4t5pm808mtPg/VIr5/mUI+1EODPkXNOz0uh0g+iLOMPRK9ID5TNDA90ONevPPiuD6Xi3A9Ry52PmZ5vr2mawu95TeIPmfSaDwufpA85PqvPf0DwbwzZyw6G4eQPalxxj7NCLc8zY74PQ/1Gz7Tjbo+mY0wPlG1ub0Y04A+Whl8PPVd5j1D9qc+/lurveLVAD7Ts1c+Ak8qPQoN77188wM+yOu1PXpyyLwjGlQ+EmyZPaAGSr3gqjM+fRQjPirfpL1zE5K7LGMgvU0LDD4NMLa92VWMPU2RwL2lHCm+m/DVPQe0eL2bJOc9/f7LPUuxVj3hgya+iz3aPSs7Sz7xlAg98JZ6vaAquz0X1HO+TwJ9PUdusD5D17U9LzWAOxdMXT0EL0U+4rhnPvrer7377Vc+FWHLPeiyBD7NXMw6D5UDPmshaD4374Y8l3T7PZoubD3qnju954D1O+dKH7szkdY+OoOfvdJ+tz2y1/s9Azp6PhP6Pz6QS7U9InTnPar84j4KT+09Slt9PmPlt7zXDHo+ZZdMPs07mT4n1Ao9PloNPvue0T38jJy92h4XPq2F3D1K8e69y1CYPgUZfz5Sy+e8r6qEvs+1AT6uKJO9IlOUvU/8Cj7BlAC+K8NRvoNSYb7ru7y+Gs0Avm2bhL5fzJq9+gvYvrOvSj4/kKU+JrZAvivnl75NMOg6CABDvezynL63m3m+wOMAvsj9Wr1YBPq9mISjPaM+Tr5wpMK+I6kPPQOPbL6Xa3y+UzZTvoIwq70os3q9KujTvZOOJ76DQiW+07HAvoX+hL7baf29yg8TPr30dr7S/bC9UGU1vSX8N75zM9m9UzWOvHscDb4K9lW9y3qHvX9DaD3XVHa+Ot6IvR/8ib4AaCq6EMNlvdgU4b09wGi9prozvrUcmr1Jw4K+sldNvvqMzTwgqKm9kwBkvr931r0ID/I9a+Gqva/mOz6C/Am+WyezvpcMhbxCJXU8CwuhPRjncb5k5Iq9pzmRvrjDoL7vdia9koYkPkO1Mb6q4pq+YGdKPS9ZML4tXim8aJlYvVAfYj3nb/Q9I+lKvD+qBD2Qfzg9LyWTvZItUD1f0qO9GYMiPadUyr1SWiW+uBNMvZ8lxL1H7FO+YscUvqqs/70lCzW+GnjCvArcXD5QR7W+96g3PiJyHz6gbiE9XKyaPAudFL57DPY9zVeevBXfNb5zw8e7CtfbvXzTgT3H07i9F+dLPU/KB77DbCm9+u2DPaYlMr4oPxM+tIdDvoeJCL7YyGI+l5eDPYa4Nr2voD++9ahXvZeVq75971O9SwwMPmqgvjwjbMm9Ow+aveoeLr6zU2W8SkOtPTeX+j1/G6++qntxPu/gwD4wuQ8+/isGvlhTtT1nwiO+3VvIO2MEPj5A1og+WCStvNNO5bsj3yI+uGShvJAv5T3rIqA9V5AJPYJJ5D1Ofcm9qXMaPUPlBT4qAdo9n3WdvG0d3z2nIQA+U1vdPCURAD7R6TE+7XAnvd1f1j2HM+E8m2IpPgNTST5BVws+s99wPoPXXj23OKk9t9EmPjOJvTzAYp486IQiPb1UPD6aca+76awfvnekBT4aaQE89TpZPt2mxT0Nur683Z/9PeO4l72PGxc9MwKIPRBLqz1HjAI+1jObPfoaMD6KCDK+tp6dPWc77juyjVe9g70ZPnpNhD1fMMk9Gv/OvnwbmT2DUCU+Thctvsom+b0nMxU+o7YjvgyzGb4XwIs9M6KPPethrz2SoIY9/y/gPUWSfD4qgoY8+CxqPfnPKr4ZrpI9PxnlvSv3mj0/JQ8+qn4HvULTRTwQ1a49qlZcPaonMD1HJy89eR6FPp/gnL3+qI8+JzWKvJ4gFT6nGHC93ZSlPJnjjD4dr7Y9ez5IvogbBj2oRxw9HHhLPmXq5j2iEdQ98I57vkomZz2NDZQ7mOccPpJ2AD4ribe970lOvpcFCz7bOQs91jMEPjtSl70oJkC+my4/PrN/MD6QgGI8ZybSOpfrvj1OrAI+IEbNvetomj7Fraw+ZRAZPZdehr5KNyY+Zdq1PpKk9D3aR1S80LxTPs0UOT7MCiw+VLqrPb8upz4vzpw+CXXLPrpm6j3sgUk+pZLSvVJdhD53lFA9B6LNPs9rNb0wg5+9K5khPmfxqbrVtDk+O32dvcCMeTyAihG+R5c8PeObBz8wMAM9jQQ5PuMSZj5oaZY+i401Pu0ZML2BRqU+JzkmvNkuvD3Azgs+Lef4PaNmhD53ACc9OukTPqc+XTyX/N09ydIQPgA09bo/Zn8+jfrlPCqKGr2bZ/I+cuvcPL3iZr34Hcc9EU+Rvfgpnj0fh00+N0YfPucXQz5V3oQ+WkxIvU9Nl70k+ca9D5chvvQCsL3Q9g2+d/7GPdcUeD5nquS5r307voWawT2q6fk8kqkHvvb3Cz6QQU4+QlWYPLepjz2XM9o+gsajPSdDgD07yEE9eRsrPW9fMT63LPG8goe+vVfsEj5HUIU+GleNPdYpEz5rTb+9k89vPY3Vwzxzt0A+qlJevc+mrz3Vy6g9Q01qPgoPnz0Dzdu9ON6SPtsMcT6aQxc7v9JOvaY2Hb1/wQQ+R2fiPe0N/z2DEpS+ULWXu1mIsT0zKOE80mrkPGOXKT4Wt5q+W2ecPk27hD1j3Qc+HUtivZq46zw190C8N+mxPQJGdz6np0M75VZMPLKH2T3Hnji9", + "polygraphy_class": "ndarray" + } + }, + "attributes": { + "span": 4, + "factor": 0.1 + } + } + ], + "config2": [ + { + "inputs": { + "data0": { + "array": "k05VTVBZAQB2AHsnZGVzY3InOiAnPGY0JywgJ2ZvcnRyYW5fb3JkZXInOiBGYWxzZSwgJ3NoYXBlJzogKDEsIDY0LCA2NCksIH0gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIApRUf4+KJUNvuvOJT+k8sI/58VvvpnBb76lI8o/mnZEP/Je8L435Qo/FUXtviF07r71xHc+Xub0vxzK3L8T8g+/c6SBvwfloD5EdGi/Xsa0v2Gauz/nMWe+OkyKPSZetr+rXAu/YivjPcJTk79+W8A+dcMZv+JYlb5yCRq/dBftP3cjXbwSY4e/TpJSP5tEnL9X4FU+eNb6vwACqr/+lUk+JQw9Pyt7Lz4C2ey9QyqavjVAvb+2Rzi/2NjrvshPhz+97q8+Tavhv1bupT6BKcW+w0otv9GWHD/L94M/YGhuP/bWVr8WUZ6+XJupPlO9eT9TVvW+Yx0+vmKcjb9MHZm/sQFQP0aZrT8LepO9xHOAP1souT6RJiW/2Ai5PmLexD9TvhK9PkbIP+epJ8A0aFI/vEWyPX4Xmb4T7bs9qGj+v6vxYL5117Y+oiu9P1utBL9w+U6/JnMAv8tXaj8RUqg+XZ4Hv39lAz+Y0MY9Hvl3P8C5M79Vw6e+ZsLIvnVUu78UnZc+CamFPsmOpzucN3C+3iq1v9Ne175HeK++C2JNvxUoJb7B384+im7xP4fEMj6l3YM+GneYvUya9b+gM9m89LN2PcKlHUBG+kS+amSaPu4tDr0+l5W/BUiSP69+QD8SgUo/ns1ov8SOsz/bb7O/RDwWP20wDEDKk32/4/gQvwQWzD3I4wC/JHzGv79qjD2R+Ye/tXryPmNfa79AZMY/SoNIvz/lpL6qQlA/9oydv0LraD50UKc/A8LNv6kQPT5cD4U+iyVIP2dUnr+5BKm/9p0FP2AOmD6ZQIA+qWGxPhoWLr/q020+mA2WPrzfNr+z0e4/O5ryPqJ8mL/mEyg/vYR5v2B+ST/cTJQ/PRhSv9Gfdj8GWNM+iXJSPx3K8j8GR3u+2vRAvzi3Y7/x2FC/gOedvXmrrj5rqo0+SMJTP+MFVTxoDbo/GoGHvkAXLkC8KyA/rW5bvwETib+gBvc+btNkvr3INj80TPI+UyeVvXnHWL+E5sG/nJ3kvvQ8Wz9kO1s+XnSfv1dWMT5SSMU+e0Rivx1qHT5EbG492kySv+gvtz6Tjw8/bKGKP/zihj94V7C/TRVwv1rZAz96hwM/KtoDPyeTdkDhJRI/N1qRP3Y5dD+UwSY/+Gqhvs9LQj/g10W/lIByvpKB+L6hrac9XiMUQIwC77+/ri8/eW3Ovw6h8b68Yos/PqWDPYrzib8lHje/HvotP0/5Or9Sp10+iqk6PUjPJr9hNglAhEgiP/CbAcDh7T4+1mopvxI5Wj+k4kq/8PrqvdlGAT8iol0/UKOZv79Dq74ILPO+lkAnv2j64T/DWc8+pWShvwD5aj9o0QdA0ieEP7d6wr+H7fe+JSqiP9MpNb9NPOM+a05GP1FLbb/c0HO97XBPwCIfg7+dUIG+XLefv9vy0D/fDre/g03hvtzgBT6le7g/Vcq3v43ilD+SqCc8J0R7v9SY7D5O1ks+0KcZv2X0jj3TR8W+yHvoPWWBKT+ZAss/vXCev56DCEAD3vm/jW0bvvWbFj8r3o8+PGkfvwAeVb6eavy+nOAWv4Z/WT+8yrY+hmIxvy1MZj9dVp0+uxdQP1svIT8EOVS/BmgPv6JOPz86QRw/0jmrvFdJ8D2GiqM/OXEXv5MODD+XC0++0udevrikjD98TlM/K0JQP+4Zpz9AEKw8eJQuP0nbnr4i+aU+OUQFvtKlxj02XBg/6XZRv6zpBUAtxYC/iGqbv/o8lD9oqko/UcYfP0DbID+wpki8drJlv2w/mz14Wi2/cqF5PzaWFr7JU1O/r4ykvsBr0z5BUBC/CX1SvySJeT6E2Ho+B8cBv+8r8b6Anm0+1Fq5v8YntL/26ze/5JFavkcvnz552Lw/lY9bP+3GI77hx5u84lKAv9yol7wRy5O+XjulPmjFU7/l8wQ/yjDEP6S93r0prc0+R60wP8Vszb6AeGU+W1BOPGgKyD3440W/kMnIPKL5/j4Tv7k/xpJ1P77NCUDkcES/aFBfPwK+Oz67JQxAo+xOvwP4Vr/McRm/6O0HwOKXBr+FVkK/1AAaPqT6rj5eJvA/+k5zP/WvE7+B/mW/1Nz7Pmf9qL8+beo/5feWP8s38L7+R9u/sUutP96T6r3EcJ4/NRbMv6RwGb9X06s7uW5APftu5r4Ycx8/yaeIv+3LEb6OXfY9Q7IDP2UsNj9G9I+/2l3Ev+qKoz8QJao+0Jw/vyaMxj/T5uw9NvOWPyFHij1L4wNAAq/gP3Xwfr7guHg/XDclP1Ivrz85BXe/EqEvP3R6hz9gHuG/BHWXv8iCAsCx74m+2bA3PzxNwD8Bv5c9eXbQPyqnsL9wCNq//IVjvTqkxD7u6gW9+VAEwJGEtr3b+Ka/qW8rP8Syuz72m3C/yIwDv0+Uh7/mXYC9NYR0P4tYfL8xCQE/974Hv7f5Sr+8Mtu90oKEv/a7Db8QVJm/HXz7P4RwED02ITO/jR1bPkAM5r3bRWK+BzodPwbsQT/szge/02gTv5PTjL6tUhPAyPHBv7z1rj9NjtI/TgN/vj2ZEz8uXJ8+YgxFQDtOjz/U/AK+TJ50vwigzb/FWFA+NKBBv2kMtr/NhSW/KnCKv0L01z8ls2E/rp8CvM9uvT9Gc549H31cv7v1wj8C9gk/e8SEvyToQr6FKGC/lf+wv/kZbT/EZ/Q/QwSzv8AeED+DkCa/gGj5viGnF7+ALl2/nb5GPSa5VL9ReYo+esZNvcyudL4YVmi/SacTv1JhQT8cPAA/D0F6v7xuyz3oWkA/Eq/Vv6cZCz+2oSm/wRISP/RkQ79gBue/UFPQv7n0RD1Z+oQ+S4Fnv8x6Iz+wrNS/2FSHvZQCm7+73ia/HCVCPQ1EXL935MS+NM6APzCvE7/r71U/PJqQvz+hBz9Shbg/bC8ewFQBTL8AuxM/IetPvtUGvj7Gnhq//VWxPdppH77ieZU/c0OCPkLarD6K4dK+hqf5vkR43b6i9ck+R4vXvl9dlD5e0wRABwJfP47spr5hwZk/Qe/QvqJwAsD5CIG/HHbvv5P5s74k4pY8f5XWPwZjpz7kW2C+7VNUPz6DDcDxRHE+bFdFP1BCvb+IZpI/Z0+tPp6g1L7+/SE/CFMRQCY7Oj6KLX4+WjHrvmePWb/jkFQ/Tyhbv1KRkj2Ej/S+2Dz1PsLVqj4czoQ/b5ACvwwtir5CkHq/aHrjvogtwT4CykE/BxNsv36eXj+Lha0/va3TPtg68D8NF0a/2FCfvxut479hfr8/goQnP8CsY70MWI8+BhCQvzOHHECPUgQ+YQrgPdfLOT/YRvY+20FlPolcSr9NZPE+LubwP7k2rD+K7cs/CN8Cv75Wfb9JzgC+zj9kPXgOjD+uoti/TsjDP9PMIb4okNq+o4yBv1jS079Pu1I/uyeWPXAdpb8kxaW/+eurvn+i1T8o6YS+/WbAvxKke75soou+ypkswEtkXr0aemy+lToyP5iq7D9IM5A/x6uJvqSijb/tsSRAB49yPa83ZDz6ocW8u9ZKPjfTE76D2xK/8/4Lv0soBr3jHQu/EH02vxf42T1gjIK+2ILAP32pKcB/tos/uH+fP22yBMDAdK++fy2+vlgptL//Hke/WSeOv2ZK4D+fiG8/UcKiP4C/OD/FhJC/9kYGv02P+j6vbpy/EYc2P9wXdr6D6L++eQE2P3t24z6P0Li+62SUP0hgir/1rR0/fNUXP998nr7o+qY+fSSgvwmNbD/8Vj2+LdEFv+9Fhj/eTzS/dki0v6A/x794Ixs/HOWjvxmd4D9VPgXAeyXZP/cUWD6GEca90X8Lv5JbzD7ZJhq9/ziNPy/w6T206Bk+YSu6vtA/ab0ymJ0+zObav1eRrL+ORj4/WfcuPiFmPL7CApc8O/axPrEtCr/6Pke/qYtIPqN2er+BBtE+Qu7Zv1+7gz9M+PE+VBaDPqORez9ELtU/4daBP8Sh678uyaO/HPQfv+a81TxNhQQ/Wco5v8k/Pz7HYEG/bowcv3kNtL8EWWy/AAStv9TSeb+83YY/zgtzv/N4KEAqlPw+rkU9Pla9W7+CRzM/AF0Tv0jg+T1t2CNADbvEvWMbkz9fAzS/F1APvZip4j/qgCC/UP7nPzsvNT/T/Q+/eeUhP1T5eD/wLh8/IP3Iv6klOr+GdX2+jnCYvV7kHj9A9zU+kOyqv0upwj5ZTxw/bU4PPwZXij/se1U/pxnrPgqzj71emtS/6/bbPhWsVD5iDIs+f2yjvxBgir+2zYY/lgQivdR2Lj/w++c8JsPzPF4zcD+CGwS/+NrEPVmv7L5Kdt6+z0uevgl3Yz6KHvW+nrygP/wEZb9IWz++biThvpI2uT+oRUk+exOEP9gmvr/Quog+2L5jP4KEqD2pYYg/BG0Ev39ltD8mIRNA+sW5vugY5L6BCLo/bC/KPyjaBb+6Ite+EkaQvvQWrL/GLGu/r4eAv2KORL++EQ69/dVvPs12xj8hlH+/jfx7P+QfW751mkq9+MAsP1u1j782y8M+c3IqPpIi/D7qDZQ+oyMdQO1CI79r7we/I4Yfv8AzDr/OKyO/sjGYPxXTtT9uHBK/QRVVv2Fd8T59Xg2/0gciPw3LTz7nA8K/pxTGP1Lf5T+43xy/0oDGvvNckj7sPas+XJYoPzGnAECnMTW+NV1Mv4iNsL87Hju/JrAHvRK05T8tggS/qyhlPk+JhrxFHZg/Q7khQATnB7/Ol/q+EKeFP3GQLj/oYOw/UXwVPyL1t74oNRc/AOqNPx8LUj+23AE/zIiIP3qrlT+W6rA/2hEmPwEhK74dPBY+426aP7IiUb+/wrw+tGPJvkN66zxQpKM/ea9DPkA0Pj3ED66/eQo/P3Q+JT/EcgpAHJWdvvJoYD5wXn8+/enJP04qw73p244+G58bP3YWPz7ykuS+h79GPsRsiT/aZIO/NCkIPh47M79K95g/yvfCv4EVD7/qIcE+F2PIPxOohr2OIQ6/wcnwP4VYub89uQzAk0nhPqCGAL/Ct4K/2VY1P+WmeT51ZxC/BOGjv11ZXz+WcyY/txzLvZpe7D+K+Ii/aUTDv+MgMb9nuDq9+i15PpYGd76bQLQ+cjKgv0fNuD/hPqi9jQOPP7J5rz6P2+k+RdwRPw465T57iSQ/rCGqP9g8ST5FgTU/Wce3vcNVuD8MKC2/N4XmP6p8JL2jI7e/zi0DPmdZLr9qNFc/XRInvyhy5L543PG/spTnvtcgG8BUvcq/iapCPzMqST+T1dk+wIt3v/xsQ72UGGy7S0WUv1tvwD/RmmA/b0RivrE/3DxOYlU+yasCwBEcfb6Fli6/FjWAv2Hsj76YGuY/Rw4kP8k4Er/JlBI/FB6zP8u0bD/6PnQ9pp0lv8O+Mj/rdsk+YitlP5+aIj++V4Y/LQUJv16gqD+NV0o+E9EEQJ1uML8QNN4/IKlKPlXDJr/iv/e+kASkvkcs2T6M2AU/Ad4Sv0iDx7z1GglAIiDdP9Fl3z6JqRs9/NL1PYQPHT/e6oK/3MaDviqU1b/5Zsw+oq4lPzdk975meMk/5OWcv6Nwu7+z1mU+UQeGP/GK1z/h8uq+NxKKPw+7Hb02xTC+ijdiP6L+Jj84x8m/Rv+8P9WmsD/hJCC/w6bKPobx/D4Id4U+zOAMv4LvK7/FVtG8/BuWP2EpCz8pwb2+DI5FP4ZONsDBCpM/8a7ev9yRub5YUY+/H7ilv/mVlD+Idu++9WixPssvQL2yPvQ+zFSdPRc5pL9YC38/qs38vhM+x7/pMdu+5hjAPyKoWT+IgrK+6NGyvlmtpL5w6QRAC43DPmsu3D5U4IM/JYV0PiyhhL7vD0m+rqOSvVh2GL3uRTo/NsVUPU2OOz+8TqW9fQuhPQrF/79zlGo/8GaxPpd9fz9AXDnA7qcFQJXwDr7v2I0/oxuFv8DeHD9S1oa/U68fv/v+9D8/QkO+uaZePsK8Xj8Eyv0+agcaPivcuj6Q0RlAsAFsvensTT7Ye4Y/4IGNP5zwlz/TgyM//E2Sv0kU0T9yu5K/CfOaPjkYQb/3WoO9jFOoPu+IpD7/Bdg+F47OP6Y16D4yBHq+a852P5JAmD9BIp2/Nu8YPw+AMz8+Wpi+KRewPyyoGb4dlwA+vjkxvkM/fzy+Uoy/llO4v74YzD910li/5Mt9vyXRCcD/kiO/Alupv44t0j+wQYE/nyowv+knEED8VHs/TVCmvkP2H8DOnhJAg92xv22c0r+X44I/5yQcQNovsT9ZXBA/0kEYP3F5Wj8lSUI/U/iPPmNn1T3TMIC90wNBv6W0j77Psti/SWbJvU8Ufb9qQo2/LDY4PiItsj/MFms/KgbJv0VYfb9h1nA/S4R7vzwGZr430Aw/bOV3vx3P1z1Zwaq/O/MZv3W6oz4458u/6IXhPnPfoLz7bw0/v0llPiecrj/aOgA+C9vbvh13+j2UFQs/fyFIPXlDJj26tTO/4LMpv5KIs78h8t8/6TafvzxiMb+K6Te/xBllP6UDl74Dtp8/4mksv1PYjj5O2VW/IEoJQDoDmL/SoJ4+ND8iP3rd0z4MvD2+uO8EvqhzMz2xhxa+yMB2PzV5DUDItw6/tFWvvzjNtL31GSVAnr1Nv5TO0T/mvtY/9rcNv+CoET9Nb9A/CB3Cvl93UL4K6RS/jOOBvw43Jr8Tqpy/G5sLPfccRb+TZW8+lyfHvyJpqT4nYlU/uzL/v1iEvz5CJJ0/hNWav9oW1j+pidY+qHs0vx5uZL2D7g4/t6ibPerrCT9EsWu/7WwtPpn0tL99yuO9fmZnv7FLPL9NOJ4/DbCLP3rwGz/o0Iu/SgCivslGmz89HhE+5W8UQPVgyT6GqEQ+g0SevvK+CD4OIRy+nEY1P3HqdD+bNkm/12Wqv8cI67+4CwI/HjuNv/bICcDG88Y+To0fQH/uxrtVp1Y/J5anPamGyr2ZSGs/3Z6UvqXniD6TtaQ++gcrv332fT+nKDO+hHhBv7ZYCT//AWa/LNzmPNhnFbyg/oo/pgvzPlYFzbwiWVE/UvKxP6jMDj8Jnik8QOqnv6VViL9qRpy+/ggcv2h1P77ACWg98pkHP6phkL2+Fvk+KQuEPRfc/L9JeHC/sIsTvkfXmr/ulBk/pe/DP2MAnD/FkFq+HdC+P0g8GD6Ilqy+9Qcdv0/dmr4dv8a+l4EuPoFtJD69n0c7XLbfPhlnmD/7FXM/IxG+v3JzI8CXL28/4vWuv+coZr5DxpW/TKfmv02dCj/+V0I/MJYTv6PTJcCt1gu/iZrIPvlMvb+0wjs+KdZ6vHNMFD+N5vQ9DBt5v0EpmT+UVSK+xa3fvKbqbr/k9eK+boJiv8YYMb5DGds/dZqvvy6Jzr9PT7w/8lhWvlpIK7+ZG4U/nwkbv7C66T+NjC0/hs/5vlYRCkAiEBu/9vk9P+E8mT51n6Y/md/HP8gWAz3+30C/eIHrPsF+Lb9W2wBA688LPm0Lu77WHD0+om6sv7O7eL8qp5k/OSoovy4Bhr8TYgk/J8WXP1MNOD/7/H4/U71Bv+T9tb+zK8A/Szalvi9tgL5EAqo/F2UOPx9q6T5mjwpAnL0kv+6GbT+Yhmk97YSJPtqkwz+GAQI/xs0JP+xHiT8W27q+ctZWv0+8hb+Tsfu/5pgDQO81jb9PkGK+ebqNvmlknT4n1FA//kdcP5BEFb/0ISu+Uq6QPuOofr6Avc0/EmH7PvMgPD+Wsik/ZDSWP7ddOT6X/qW/5qPMPlO/Jr9sUwe/9BsWPw+Anj+7Qa48XB+ePi7i2T8AiHY++oEmQD3FED+tYOG//9pAPzEnwz6eFqU/nVUsP2jHDb7OtZy/MQpWvrW7Wb8vnRS/E60WP26/1T9jEso+shKZv/ai4z44K5k/uxocv8w7Cb7BpnA8Ge9Iv7X1JT+qs/e99czWPrAyY7+J+t++/O04P/3jvr4nDd0/HZ3MvsETZj5Gvm4/Ao21vy9i4b+1SMO/W5yhP5NGDb+JuSNAiHIQvwT7PD7cY8U/02MAQK3vA0C/q5o/exSDP9mvFz+sQkc/ghoNv3t1Ub8AJl274UQuvoIN6L5zRjI/4o50P6sOtT21H70/3iKSv6pORr6rgTe/rOruv35Uqb3CVvm9ubjBP+B8IT+OGIO/6FLtP9VKnD9bBBU/c+tnvs+ddb/jkb6+HVyLPyA68T8BicU/eUr6vqBPj798RBA+OVzivz12pT4iJRe+VpzuvjsfzL9MewM/FV8Iv9a/lb8l0zfA8GbhvCTZ4j8kpNQ/hQjqvpIqGr8uA/A+MJZ/v3iEmj7WHUQ/JgydP5Udzb3aj1C+eMNgv26uU78V6me+VRe8Pq/gaT8jnU2/axC/P7bQir53Cq+8REk/v8EmG8DNUGI/zaE8PyoKkL5uMok9mBQEP4EByL//bwe/7lRLP46MoL86TZY+eaStv+nP7j7O/BG9pLzOvy4WlT8yDjy/tGxPvwNiTT6MBpM/cwaCvwOkfD3Wjds+Xm8xPxitND4T67u+9NxTvzRssD3aO4m/aPc6wMSE3z5GaGc/SToXwNs+gb/lgB4/Aa4DQJFXqjxnXjq/NEk7vvP7rz/pXSW/2ZdMvykq975YDXS/kzr7PXf1zz+maqU+ejSBvklolb6jFsi/exNiPxxpn726zzi+4FtMQCT2mD5hdUC/i0vavkUAkz9R+uc9fhm4v5ZSaz9+Cyu/OcjvPwQ/ij9gB+W+WPijP9T3ij1gT1o/5i74PtSqWL+vvyS/wtWDP6xnq74Dq86+6oJ0v/rh2D5pAARA66SIv+Jnxjylw7Q/BxujvUed5z6D/Ie/EUvbPr+iP77NWHw/RPyXP2m7JUDXYhQ/xs6mPq4MRz420rS+wU2tPts+l74JgSw+CaeoP2PWgL+K55E/OpeoP+nN8b14zAfAOZobv+sDpj/NVbu8RdJ/v+44Ab/gMlc/u/YLP56qdL5p0Lu+hpTIvhMjbL+hxM4/MAelvtrLmz98usI/TpF/P1b93L62tc4+3TXGvAFZZ79rEqY+x+qWv+EFmD9S4u2+zPxNPhwLkT4xj4S+kTEWP5Im875XDV8/EEmsv6ZpAT7TLvg/2wqAv7KALb93jwM/SeQ3PsqFsz68dvo+G30iP6UKjj/A09E+Pwx3vsotLD9VL/M/JNEHvsF6eb/StI0/Zor2vQUNC8Ch8Fg/RQsJv4Npub1T+ak+WRJDPqKeNT8S+N6+51oDP03jhL6vIj0/uYgdP+l4b792AYs/5jQJv+DcTj8VDbw+mknrP0TUZL6n2bK+3hWfvGE6m77+yEw/ROPOvxHfhr/ErYi/XEdzP2H12j956dW9mt8svoJ3jz1uuJQ/BGdtvwAXdD6Npnk/tUcAP68hQj5HIoA/wgEtwD2JLT+BcSe/LlLqvyzeAj8L1K8/Tr8Mvpbvcz8iX84/Hk+oP1zq0T8R/D0/9HyaPTcNzb/O93u+At9Xv7rwCkBpGzS+zVL8PSUuDT97mDI9b/fYP/NlH78tR0c+jRI+v3/2qL/mnBy/5rMXvYDN276AQjG/NgK0v0Ezqr2tmsC/B5NCPyzWqD0Mkbq/p1CevlKNQL/XaqM+4ZOrP6cF8L/Lkus92vkjvvHcKz84UFo+EIFAvwpbo75ayEu/mrqJP8SVrjw3WvM/eXd4vSVaNb9lwcG/SM3mv/fEyr/TxIg+zjsCP3Vkyr87IWU/x1P3vuhQFj4/Xc4/Q5dlP9l8ib4sJWS/WLcJwG8aOL+GMli+zLd8vzpoBr6FZJ09pkBmvpJmJr/Psyw+C0biPjKSi79vmbQ/k+jJvYZqmjyKTTU/NtBuPsMAdD/wAZM+ssgcv/0WuT6fZZK/jVTePSwcCL2WHFW+Vp8Dvm7g8L88eQy/eCW+PWWxIz7cioO/twKiP6O9Xb9ZLng/OLnaPidvJb9jPeM/F8mYv7BNaz8VE4A/xK0rv048sj8ZBoC+p8+TPudIhT47iAm+IpFPPxQiSz/lz9+/n/SmP4rM1L95KoQ/4DeQP8qki79AVtK+u4eNv0IUXL6rtp2+1ZdHPzK4pz/EpbI/PuoPv0I+Vr7nete/gE1Ov4QAdz9ry84/Jf+dv72rF7/jUNi8SXGPPi1CT7+HHtk+EpvyvjjJbLxJ2Qs/53HSO/lt374be+C9pPe0vRZyvb7rgIS+eKDMP2uYDz8ySZe+m2syP2Pqqr7zKJY/vkG9PjrB270nO+U+eBLJv+s5kL+J0pi/rV8SPg3D3T+fzQ5AUVcjP1g3AD8Piea/rOwKvzKsSb/e7x6/5S0svtu18b6yWf2/DHc/P6VPib8h/XQ+xb0EQMpca7878CHAJW+Svj/sjD8hq/o/4mGdv1xP/j4sPe6+c/vYvew8KUBVYcC/KuqBPnt17z4M+4o/ZKvIPc24nT4phsi+AsuJPua2r75WHx8/kT29vk4TwT6RuO+8aiKQPzqCUr268uK/pYahPxLeZ787XSe/Qn0Yv5btrz/jrgjA39BIQN8shz++mGQ+iNhgvSU0kj5IaAU/2SwlPxg8Dj8Udrc9FRNKvmLnGr4Jlke+YR+RP1XzFz9ULzzAHeknP+1oRz4bQ5m85OvGvu3ijz8YkXI/UNtFvyhp0D5/vni/VJewv4pwIL/SxVw/AQB0P5FZAz/hnzk/QyQEPyM4JL/0JN0+o+dMPz0ZQT9QLpg/Z1M1PwXxsz6v+og/HUPZvIrCYb8H+ya+8bE+v3zYLL+0/BO+CNxKvyKtnb73YfK/qmlaPgsBnjq4LFG/U8QoP5kEcD+GxM2/7kFDv4PmRL99nXC/d1hUP1x6Rr59boe+SD8AwMKqIj8FoJ6/63t1PVoEjj4QKq4/bYenv7A/QcAOQzw+J3fmP8uVnj/1sFY+vbf7vpafTj9TOnm/KuXzPn5mAT/1tIc/RZ4wQL/qyD5xSwK/OIHRvBRx4r+42DG/eI3RvqgqBr/qAhw+HYpSv+99jz+RH1k5YGAYvNHhp75O6h4+pDlTPz/8Xb9Seii/+oGbvoBFrL/iulG/OdPzvvjXXz9wboY+bzxGPnPUWT8xqwy+AuvHPu1l072T3Yc+rC8Vv5UVHMB6gAm+nBy2P3QcbT9KJHc/iDmePxyStT0hDUo+bx4ev03Uob4uox0/4xiaPwHLDr41f+a+uGYKOq/oGT880Li/ofQSwPzvDL9OQJy/ehUCv7lTF74tEOi+deq5PxdLpz7G15k++EgfP0nFkb+dAYU/TiqbvZ6kKz+eMom/keHGvzdhUT/AuMA+2uxmvz2iXr8/DpA/pj6YvxtD0j8Vj2a/p20jP65AqL5+aho/DksLvy+zJr7emic9rkeAv6qmPT/3YQO/GRZqvq6Nfr9G/SPAyJxDvktoGkDU20g/8sedvKCZhr5kCrg8ABAMP+Akl78Yoo4/OyM3PwXbNz/kf+A+HbOgPKJALD8igRc/00S1vpHXEr8JmtA9S0bGPxCbnr/e17u/EbcoPp9vUD16gDE+3s55PiKPZL7Ys74/bOrMv2IYVL87d9O9A1TSvy0TNL4Oq9Q/JBqrPNw8bD4UTaG/3Mkdv70ZwL6Oq6K+5wykP9fEDj9BRI6/wGt8PuwW/z5n8JE/KE/KP5vugb9clE+/UPigv+eib76Gxu4+9MF8PzaMm70Ow6O+cmYbPu3LVb8BuwVA1sfNv6IsPT7EggFAO9HeO5iZQr4WA7e+mbg4voC5rz8ljw3AjkfEPzpEtr+nhoi+7cXbvm2rFj9Xj8y/5aHsPkqOAUB8fK6/VUJCPqp3Kb/jDdo+zducPHw4JL9lyvk+4fTmP0l8Q74OQjg/+ImlvwTZdL883/E+hPe9P/QStj4kSaC++sA5ulwNoL+EwRo/lOBhP2J4575dqfC+MSGIPruZ376JcIe912EGQE70fL5ceLe+TcUlv12DPj/Ukjm+UD0mv34gqT+NtbU/XLUZv8jq7r819oA/5kMvv3BmSj9dLPy/QoFkP68Hm79bEzs/ltlpPGA1dL8QZ9C+kbIvPzHf2D0XnBU/Afz8PxI5yL/SAM8/kbjVPbYWZr+4R6q/x6hBvkPxaz89nAK+hm3BPyDAub+5EEa8bk6gv/Utuj4NC2M/F27XvnGrJsAQuUs+MpzfPrn/zj4cLp4/SRiJv6UiLj8WpJg/wqjjv2epoz7uEQG/XPWmvacCsj4Gdvm+NPssv2jjCz3gKou/UfyKv2jrLT+vC5O/dZMqP8TY7D4859y/4XQtv5TYmD+qLXu/Zcbtvj+T7D5IjEg/tMmAvm72GL83ELY/RJTeP3GWej9Hu649gepOvwGYVL98wwU/SzjWPpVnsz/mgyY/72TAvzimhj/ngH+/4pfEvj4agD4Icv8/6AhHQDRSGz/llzu+XdUIP2M9Yz+MLqS+fMnlP6uXaz4v2P4+A3oqPzPa1z7OxFY/TgQev93sDr/b0Yy/TgbhPmJoRz81Yeo+xVXWP89et7unMis/27yLv+8xxr7DDjI/wl5ZP9aClr6hopK9sEnCv4jMtr4n8GM/o0ATP6ErAD9+30s9EnLnO84KKb9r5jI/xInXPuXp+z44qga/X9AJwIBvjD8vKvW+395cv9WHMT/htci+/6uHPxr0HT9m/i4/o9euv/sgmz+hwoU+5RG9vm3UEj6tW+O/8TrRPnLCg79NJK2/qtzCv5Nsjj9oFyG/MVHEP0kqCb+1itq/RuqOvxMvnj/Gox++jlwMv9HbIz7VdAA/7QaPP2xouT+ZM7i+7rupv7Wx0759Q4U+6Lh2v9gHdb/3BLA+sEdHvSRWBj3ALEK/Ku5rvouabL8L5GM/DYOEP+JP7L9s9G2/RY6/v/hnJr+H4aq9+Y25vwH/a7+tgYC/4z1UPmcEjj3Lwzi/iBA1PjrzC79hFou+rjPWPzqUqz+uWKa/WGlUP6+3Tz9L+pK/aZtRP/TaxD+Mr4+/d+Fqv7tCgj9vAYs+jS0NP7Zhrj5MCcg+18mpv4IOhj8gtZU/aOVqvuMUMr1V+8O/M6YDP1tyEj8CvH690OqPPywMq74EihA/XISCv3HwwLzsiTK+H7dmPqIyvb7BoAa+1HdTP5Cf375SpM2/XvLfP33TsD/daKW/sZAwP/TCAL/M54Y+jKSWPpkIcL7epEi/TMcwv46Lar9N8lS/xZSJvQw8N7/3mi4/FV6+P19+FL90JnU+utb/Pkiq8T5vjps9byA+P1bp9j65a56/A4FeP34lYz+4ZkO/tGQbPabuLj9dVla+WFeJP7BJF0BhNkm/EsawvxyOmz5SuDg/PGhsvnIEuj9vV6u/HWMxP7IZG7+TFNw/vgr/P5tDRL/Hrwy/9A1cP/01xr4dATq9YPvPPNq39b+3t2K8AJIwv5h6/L7vyLg/ONWgvzcuUD871I6+ujyPvtBVSj80G64+shMSP/Padz9Boam+jLscvzbmir8bTlO/9708QKlRnz8B8Ky/TUapvya39j6MGQw/p4MMP5GJgr4VdwC+09+nPqzorz0ECQ7AmFBrvsb1Wb+fajM+fA4/QI8mvD73hqC+L/trP94i9z6gFtc+j1obP2meA0D0wJC/53/yPlAnbb8UNg4/Ci9rvxqx1b4GFpe+IM95P9Ufaz+rc5+/lwJhPQanNb+wFqG/uDlevnXxnb5TTxtA6azdPsVVsL8HlRC/sLyCPwypBUCAAcu/XaHwv8l77z+Ce8c+b0hev3TdCD8XsCjAmjddOy3Ypz74nGw/WMeBv8B8rz2h6Gy/qcGCPms1Zb+Y8tC+wu1+v92wJj90w1s/YEBwvnKfHD1ya7m/OG+ZvlujTb0TuydA71SOvylquz8jMn8/k6+PPnoa4T+chL4+4D/HPguVWb3onZw/G1mMP2MsIL8Y3ai/KNpPvlTPTL8sGYK9X2SeP3Ej6r6SZy+9P6ltPcQyWT8JzQ/AtlAbv8FaWD4wnJk/o9r7vuEy8L9ppR4/HKciv/9GmL8Z1R+/N/Q8vozQGr//Jw3AIaZlPxsvoz8lyiY/r5+Rv8xTTr7CsNW7l0oZP10YKz/Q8ju/Se2nPagg6j5jWLo/ilo0P57/ST9Mras97Ym0P27M0T5CcFy/oaGzP8qpMj+q1uK+GoD1vsDYlz6PJu0+/AZKPtpjnz40mNk/UyiJP/o1Qz67+XA/vimEvxi9yz5al+c/bEdfvor+WL9M7ya/GHmLvyjmSL89s72+n+mzvwrkfzyj+2Y/tmJov+V3wj9FwwI/EeqDP9hnKL8qwlo/fcSLv0PnYz+n9i8+36ENP+J5lr9MI2W/8+MYP+/Gcr9gKe0+X/6uv/IhWT9Tw52/h2MNP1kpID+jXzK/sf8UP7BYhT6K8Qm/UhiBv1I3+79VBbM+dUvIv9JDwz2v4oa+TdwtP7Wlmr5hrai+4Vs7P7qMqz4y36E+10PwPqGNxL/nvEE/wcocP6wigr878Hm+rgAhvYa5Cb6y76o+Bze3P1V3ij/M9qe/+z8fP30aqj/EC8Y+P6WLPwrJAEDqCIM/wUp/PnbFhT96XRQ+UdDDPJHss77uHcg/i4RRv6AkxD8x7v8+dxyzv4c8vD7WZgbASyEgP4ieYj+kpBe/kwb9PdUh+j+eeAG/SYqHvyWnvT8PNvs/FjNyO593gT98tas/chM+vwJ6+L5MjZ0/jK7XP/YYED+DO2G/BVj+P7XtB78ul6u+6EavPrvuxj8rnlo/QGnUPic07T5wPDI9O+IOP1DkIcDv3Je+GR13Pm5Uk78rzMU+52BRvm2z4D+2WMk/EX7uvjN1Gr7ScZe96T/nvqG0Rz7GHkK/AbiQvwFqHz/aKCE/o+dNvwM2ZT8UryG/4J6BPhECUj9UrQm9zdPoPrw0BL+JLkm+0cxSvsIfP79JBDW+IDnGv4F4+74z25G+/imDvhtLd751/Hy9XHn1PlrgXz8EVya/fAKav7Zhhb+pcvm+CC+0vnYeRb8n56W/q4jpvubHOT7aUxg/R7MOv3N0074aPG2/XMYAvZ4yWb+BuBI/Rpfkv2EhuL6+Kpo+Hj88PqpaLED9GLM+3ISAv8KCw70KXOO/exGlvSlDVb8EV2o/rK4Mv/gi773rsyK/rpLeP+95pL47keo/QGxQP3PR9j6Xyrw+zZ/JPvq99r/OyY6+RlxYP2pISL1D3bO/DaZWvSEg2j8mmp8/6/d9vS6Scz/Ppru+dxjav86pbL+9DMg/0kiMvqY5eb5khJm+v7rzP2wf0D9/AA5Av9wivhhclz5YIcK/MAy9P815lb9q+l0+aXSMv/q/Fr/VVla/t50bv/IDCr9YWwy/X1VVPyVsjb+d1WE+FuabP9ZgA7+IyTe/pg5svjwBlz8nxEY+qf0Hv66+9z7pF42/lG0uP/o00T4SmZ2+j61Wv4X9Yr/K6Qg/PE+dP5omI7+pseo+2pEFwIKpFb9Ob/68A+Fov+K+b7+a8yq/SZqVPi7TP74uPw/AjbkHwIRbG7/lVeo+H9cvwKLc/74uuAa/DbWxP5Qhxb4iF8Q+tqUQPjRbCMA6qUQ/55BcPuUdAj97R3tAG2IFwNzC3D9SLJO+xhyTPgxrOr2BNdm+kuARv161qD6/MsK/9SVAP28X1b4dppC/FmjmvkTqoD+4Cwm/PIe3PvMfO78rB0I/b30vPzef7D8JxzO+9iwrP6LXyD0g36U/ZgU4v6u4Pj+6Xke+F3WDvbrouz2nJYE+fgnuvcbdWz7G/8k/bkZ8P+qAXj96POm+SaVjv5SOdD+aRGA/eYC8P3meGr/NDmu+ntrRv/nxyL4/H38/e9juvnz2Iz8/E0O+zuqlPo3nkr9Lgq49xm4/wLVhP75Zs9C/B16aPysERz+A7+4+VbDCPzHscr+Zqd8/G6RuP407cr4AXJE/B5uNv1wTU7+JzBu/FWsHv0g9h7/8jZw/kIiEvoh7tD6MAhK/hwnpv/VEij7FxPS/ApCMvfIkr78PX/4/EU9pP3KV2D0kwaE/KqhYv3chCz8bm0w+pC2HPnvcoj+ahDs/l9OTPlrT07+YxXW/0k77vQQ6vz2EqpC/6VgaQDMZwj9tJBo/EIiTPU9NWb7usHO/G66ePST4gz4D8p6/LBmrPjr8Hr4KM/S/MUJcvxvE077An/E/RHoOPw/xqr/A2fg+Dw7Gv5+Vij9AN/G+VcS/vbSzqT/HwaS/xdSyv8RmFb+Z6YQ/73nCvwpCNcBL/ua+4z4NPy2imT98I+2+gqbSvgWzkz+yU++/MuzGvnv+Qj7//+U+sYICvwUSDT3BQB/A+IIov0sy6D6fd3u/cJlxPXnM5D6cYq++Do4uPqR1dr/okVO+IDccP4W9ID7qJha//JhlPqvwNj89MgPAilyUP5UprL7R2dk+Yz+ZPwGTr7/tnTW/qLyTvhKkSL9nEt4/alVbv1A4Dr9cS1E+CeCZv6qWyr48iaI+rmyqvpxBv71Pgge/KtrBv96npD6moeA/BL+WPKOrZj5HVjE/bHmivwDs2T9IL08+sODQPw2oO79Btug/l3BGPwmUDT8bpG8+SX5+vgWrmT9/ug8+8cj7v/f/jr9ZTj6+MLiePlXJZ708B5w/tb35v7cIEz6Ls+i/gXxCP5Jzwb0e19Y+byddv4LFoz88V4U/KWMVPyugBL6HcRQ/6fY0v7IFWz8xItM/xAmJPzrHOr+xC7k+qYWlvyyIEj9+teY+JFbvv1W1lL+V95C+SgqaviTQmr84KMc+JsGAPn3uRr6Ne0G/CTyGP2Hk0z9flfe++bYcv5J/+z6gT7e+JL4OvulZPT/IZfS/Ib6oP4fZlD07r9K+ZcC2va3jGb0BmN2/I1q/PxFFKT0t0eI+xJhzP3S1gr/pavI+QQiJvvjFWD9+JAjA9PHKvXRSGr+SUd0+oqnwPvgmNb8HUTa/hqTivViKZb8+jFc/ugi9vhgMOsCt6L++Ae+Ev6XN0L//Y56/jRHgPecQqj+3WaA+zkMbv0Zs6T7jDeu+VNExvyjCk7/tO+C/FKTHvsbYIT6h4sW9nvnUvmoccr8Gths/x5eov8SpRj+mSIC/mZ9Av56/u7/kWwC/FK55PzQABD/feXo/IqsFPw5Fjb81Xam+IbtHv0dQqj9fMJm/X8lkP6CYZD//MOo/ccLRvr9aNj+VBhJAxB0evzN8xL8rpPC/THQ2PxEL8b+aoL6+hOffPqiePT7p09k+9J9jPuCxoz8Cy3O/jk8tvwLNRb9zh1Q/hoBmPwEH5z64fJc/C8qWv5pn1T98+cI/uVE8P/0R5D87DNS/PEIGv9k8PL/esjg/A2uGvzTrQT+6ba8/0fgxP9BHkT5E5Xy/OstYv4Pznz9rg0c/7tIfvT341b7WpgHAsRGJv2Gp9D/45aO/AAdCPu3zgD9OoaC/viQ9PjwbcD/1yUw86pM3QKmU1b9rhIc/8PIwvo+cRT8D8+E+ILA7vwh+aj63z+2/bm4aPzqomD48fyM/bHCHP6s4vD7BpBc+so1hvxpUNr/s5pc/19m3PznTc75gbTw9z5Rnv+Uclj/CWCo/0Hn4P8jZYL9AisG+KQBtPleAJT/611y+YXpfv+yjYT9UnDg/8ZBqvyZ/rT8SyZU/BYUJPjQEoz271g0/k5hcv5QE9jynwAnAZl9gPwPfx78QcMA/gAWpvju/WL4rsyC/1XmTvm+StT9COB/AmnOjP2oRrT6vf5q/1qOJvwyU1j+9DHK//aCTv4aTkT/NVq0+5Btwv/DSXT7dS4O/t+eMPwHbhz+PMQg/oyu5Pp1u4T9tdKS5p0eXv3Ih5j7LgQVApOOBv6YRub5qONU+VctcvT94e78KmY8/jHsUQK/WSD4FcWe/V0TGv24VhD7aS40/MEnzPsKJHbuC2ha/BcyLv0OyVT/w7Gk/edrFv9h/yz9V9hI/6yWzP9jKq79T066/aYsYvnu2AD8n7+U/lsE0P3GBeL4RYIO/qXCdP+/2dr+ocdA/3ZqRvjGYyz9xzi0/jEgMvin3BL96i66+U0zbPjoEnj0GBBi/SJspvv7soD0uPQjAA5XqPgPPer9loJO/5vrXv0We5b9Of62/ZoI1v2UE+j8NoAa/IAQ2PuULzT70NQY+rZqevWP/mL/+t7k/OFLnP1xh179eE4O/TUOPvvPrdr/1hgE/DHA6v0eQCkDlY5g/Aq1ZPkl0gz8ljo0/3V4Qv53zUL94CaA9L5RcP9llDj4F28m/54VNvxZ/l72S9pq9Q3z8Pw5osb9MbgE/Qpu+P29fEUAuDc++pJz7PtHbET9dLEg+Wm/KvSs43z5SECLAG5wuP8g0AT520GO+yv8CQHrILL/6X86+OOv+P/gBVb9b1Ay/1Y0WvmA6Vz8uylQ+v3SlvzM8CL+Vexu/ESaevS4I2j4RH9Y+WlTjvzgviD+0UIE+WTixP/dt4z4n+Iw/B9/uPiBRrD84sgU/yofIvZJvEUDwl2M/6uASP/Fdo79E56S/zt6VPoIDFT4jNx2/xV0QPiFYyz/mBTI/kQySv71e5L1JEk6/moDGvm/F0772jfS+R6igv+m3Ar77gw+/Fnw7wOVjA0DOXIs/farAvmgqmjzF+JW/cg3ZP1va8j9SdCA+DQODP2F+Mz7SGau/g9rSviAYBz57uui+peZfvrEvu72go6W9AA1WPejBh79fjje/Qm2kvx9Sej/rwgbA2aiTP+ZzsT9qt5q+zJkmwLP9uL78coO9SFuBv1rlA79w3cM/qzgqPzywbL8hfsy/0G6nvmqUWr7SDf4+hAoJv6PxAj8hs/c/rcRQP3T4RL3Mizu+3J22vj4flz+dlyC/czs5PeS0UT3udAC/86yvv+pcpT5N33m9wg8APwiaCL/fQ5w/P3RgvyMk2z+Ost+/L5rePqdr8z6hqUu/KkrZPkXHoz9c4A3A4rMAP8CRH8DPNXe/hojQP6vvFL+zQZa/MLBKvpRYBECQdOq/U5GGv9ervz8xye0/wNPTvUeZnr9NIgZAsBbMP3jPLT+a+U+//dJIvW81JL5YZak+arq5P3ohYT9254m/CTGwP9tSoD6L4y8/nLe7P0NIjr/YvRK9aA0Iv56vyL/4g7E+WL0gQKuH67+tOAS9n/ojP20Q/D2ur+e9uUymv6Zfbj5Ju0G/rScMwGbomD/GWHU/dppTPY6Saj5Bg4k/TLllPrJtZz+WT5i+Fe+nPwc+pD6NtUY+XZGiv+Hrkj7/+lS/f2Yjv3W8UL9jeoi/QfwHQFJpqj+azvU/04ecv3ztkT2okaW/Dxkyv18Ka7+wqp4/nt7KvqfAiD+rpxo/0W4TQG5evb+OLqE/k76SP1sveb+/6YA/MRusPonvaL5w7VK/jxc4vx8wA0A0/zI6rDlIP2J4Sr9xpEe/en6KP+q+rL/mltS+VGALPVIX5D07ZDe+5KjKPv9eMT81ETw/o118v8d8kb797Zg/mQdlP9y5r78s9bM+fDb9v9A+QD0I6vI/GalyvzKWVb91l0M/hXrFvzz8Ib9esBg/j2Aivx6icb70gj4/49TXPmyciD5voa2+p3i9Po9n/b1/Zeo+4G3+vlzzOr7gbX2/JGQPvcGdT78TmY6/PG+EPnXYWT5qakg/frzcvk7s0D4SAQ0/ZLnjPgpeU7++8hg+mXC6Plg56zvaiAXAZFckPhyaDUARV7i+Q2M6v4snvz4J8J8+jKuYP9MBob+8Ozs/5iUXP92OEb5taK0+Q8WDPxBQXz91TpK/4/VRP3Z7hL0EP5U9TuiVPUzZGb7fiZg/UnSrvUpNj7+/fMm+v/s2PulWnT9qZBo/rrxZv1oNH0ChkIc+AUCjPwdbG747igg/aOdWP87tmz/YV4c/TUhfPwxTob7zXRK/iEuqPnvhbj/lEGS+jleIP1jvuT8ClGq/b0BWv3u+D75of50+BkoGv/oUrT9LH9k+OpMhPeLLt7+nbKi/cuCPPnJ8CMAanoE/FfMhvtaOT0DmtBNAyc05vjfH2b1u3n4/jwHaP/Cq0b/0p+S/rD0fv0ouFT83Zv4+Re6IP7CPmb+zOxTA0aZbP6wEVL/oLgHAl4sUPpANSz8X+fm9G2vqvtovI74Gvb2+z4ysv1D1Nr+acgU/DbOkPt8XLD4Nwn6/99s5vglVC8CWwjY+z+S2P12wvL9b/xW/r4CoPiZdoD+yfeW+UTcxv6wF/L9f19M9YLECQO4Bjj5ogra8+eekPj27Nby2IVC/ecJSPwcuZz6rSZA8l7F5v1nZpj/2KSo/OIwNv8w7gD+bwf6+YjFIv+d7Qr9gsuK/AW7xPvVi6r+mPZ2/b9YEwE9FsL2b7Rm+ukSnvjBzhb/BC5a/48HtPvnwDL+4+aE+qJ9iv4NoOT7R0aY/9g4WP+cn076Vr4M+7nB2vuQbATx+tqa+fUcovgAvWT7BTH6/UQiCv8O7sb+v1cy/68+bvyRKIz65jWS/PXwDPwEwDj3y9/u/e6wnP4CQTz/JbX6/WSPXPnhDW79oB8g9Fduhv4OE5b+dAMS+mmX9vaycy75Qjpe/kr/MvgDhiT7B4Te9jeE6PquMK7/a+pU/2YyWv4Gh+T41SJc/ljn2PoNiT756Pge/hZ6EPwsktr8QWni/sYzLvnBrnj/ncMI+2dF3v6fgnr5IRB2/u7Q1v4KYej9Hla+/iOzNP9wGVD9boYs/aAbtvg+KGb6Fcbe/R1AGv1TA/D5l5aU/UbeNv5gju74v7hq+OKmyv6piFz9uVzm/aRRIvuhl574XEkq/n9nmu8LIXL4oZL6+dnw6v72Yoz67qdM/mT4PPpoBfz/L8rG/N4whPpF2jL/gC72/ckc9vyiQmD89NHM/O9Z4PogvxL/M/Ui+bHiaPsW/Mz5uAey/3+tjvzQ7g71cSgk/XI0MwNQDJr5gexI+LIBZPx1fMT9lIxU/58hMv2o59L82Psm+eTWAP7hcsj+M5jU/j9LbPiFgwj7MXQ6/hy4FvhKk1T97S3G/ErHOP+DgpL7Cp6k/hBK0v9H9FT/4nDy/eie1v2PHXz5fNoE/FU5Yv8Hfv78cGby9SxGzva1eDD+hGHM/VV1zvSKB7T/LnWU+0V6xvtdHzr9Pe4a+6TqsvonWdT+nyuw+kfWWv3RjMD546Ss/iJbDP3YzYz/ZxkI/uxx2PdXQxj7H758/c5iqv7bpsr4Rx0W/XCDCPux3nT9P0xi/v/oYwJkO075q2Wk/HaIJPzo72z5IW4++R7ejv1TqAz9+k1W/PsgLQMMMEj9epRS/voMcv4hZvr3+P3W+afSOP+HjPj44TDu/9vnxP4j3ST2t0kQ/u7Qbv0vquT7PSZ8+B+Dxv0T6AEDWM6U/L13Lvi0Xjb+Z87E+kI6xPT4atj7dLEQ+ApkBP1BBuT82bxE/HVuGv3Zorj+o/9E/TLtJQKfOj78Qtng+G0EFwC+bDT/bVgy/eTP2PytNRr8mN9i/e0nxvsnc/L8OSEA/UioEwOAf6Tze+gTAE/6jvjda0j/Xprg+6w1dv3ae/7wfmJM8m/zxPjf1rr99shc/wRQtwB1AIb8H//m+tSEiP1dkuTx96bO/gih8v8ZySz4P7qG9ekEUv9kv9r4RODI/gIcEvy9ptL0+4ApAPDu2vy8J7b9VmUg/jbstvwfwor7d+Ue/hgSFvuaczD9KWE0/rrpcP+5Gtb8BWZO/d7M1wJUzT7+A6nQ/m1YEP1/UYr4Wr7a8zlowv1CsUT6ZEEQ/YdXOv2xrmT4NtG4/BojUvh5hMz9MIzRArbHVvYtfyT5P8Ty/6Jk5Prmo6z7AhEc+YIWCv4BQUz49Kbs/lrPlPstEdj9Es3K+dvk/PzL8jj7jbnO/LxWjPtXffb9xBCK/SN9BPmcwmD7FOJi/AemGPwFvEj8VaTS/lM/Ev95hFL68UuS+L7bnPW3qub4segDAadXGvhx6nL6cNyI/N70yv9De1j+jsPS/VtSnP9+zZb7BDVw/9m6jPfXckT9PVMa+mJ+MvxbTtT+KaOi9OeBiPloMnj8cF4S/", + "polygraphy_class": "ndarray" + }, + "data1": { + "array": "k05VTVBZAQB2AHsnZGVzY3InOiAnPGY0JywgJ2ZvcnRyYW5fb3JkZXInOiBGYWxzZSwgJ3NoYXBlJzogKDEsIDY0LCAxNiksIH0gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAoXchg/WAMXv2dXB77/BpQ9RQfAv91hR7/176w/bd1iPy5JgL1b9Xg+9KaWvoakrT86KU2+A2htPh6xkj/RAUe/Z4bHPynmw78dmNA/Vs++vgWGeL7a6/G/2trovmEYhb+2jhu/P5sMvtUxdT/5lVQ+y4edPvs5jL8aeWG/nzzmPyM2PL+i/tM/CeV4vjPxFTu6AYk/X82iP7Lx4r3Lx8a+1wxlvcUIrT56LGQ/oZqKO1VvHj/GtdE/AzvDPveQnz6ra8K/4l0owC/Zxj41M8K9zMgDv/tfeT7naXe9GluQP+VasD1dNj+/iXt9P5xXXr4cywY/LW/0vg5bJr9lYKw9Oy9SPOSr9T7wi4m+1g3rv4qU3b6wN0s/rUGYvu5LfL4sg3w/4UM2v5N6xb98fCS+puX3vutbd79pljO+oab7Pg0vGLygLJ0+vh3oP9GNr762g44+z1sCvwirWL+CvBW/2B/1vjYNuD5CVxZAn043v+lP1b6jRoG9JqbKPiLwhj5AXaQ/AkobwHbDGMC+4/2+VXSMPyhnyL8MfUDA0DgSP0eBeT7zi4u/U0x8vyBBeL9XYOI9M1Ygv/L1/D40sjbApVpbv4kikL8WP4O+f70Jv30/TD+//u2/RVU6PSPvDj+gpu0/CKr2vnAfpj765Oa/zxUZwCJDWD8gM7a7gdmWP6rLr79OGAU/bDXvP6fj1j4XnI0+zi4MPKUSyb9qg60/K2Ecv9ElAT+CEci/ifiWv/Kwtz+b0bG+7gS6P2U3oL1Ow+u/i15kPy8DF79F3TI/BYfOPOaExb/jPCg+7I4MQLlo/D68l2u92qSJP3m5Vz4kkFY+YLqLv9t1bz5piYQ/KwhWv+FVtj9e0Lu93URjv9Fkfz+y1pK+kW9CP2Nhlb5CDHW97LP8PnbwlD6Wbf+/6oSPv9DtbT/QTge+QRMGv8Hlrb/NNIq/JmUBwERsz701Mxm/TnJ3v0uEi71cWro+9PYFQGWFwb+ZZpQ/Lx15P59yR78WHVA+6E7qviYlPj40K8E/AwkbPyk0Aj+BUdy+kvW9v7F4Vb4jcpo/PHrDvysNh79UTAE+VhJ8v9HiSr/q4di+jcyuvw1geb/aEzQ/5VenvxOHzL9RSSG/cGE7P9+lSb6S3e2+krE1PVdz378PSv0+mVHEPuGQ0b4iwoA/y1GQv1JfkD+WMx4+n1CsvgnOoryhrdu9OzKcP5/Qib92NEO/k6WEP/p0GL9WiWY/HofZPxS4zb7DbS4/MKyTvzQ2QT/NNUw+EzLyPjZEJb9intS/IdUAP8wOBMDEe6K+S/FwP9eARD5LY/k/xzaDP+NlJ79PwN88yAdCvcWEyr/8vXC/B44Pv3BRAsCDiEI9sQx/vmMh1r/99M4+cEM3P7p7aD5E7KW+wWgXP+wiLj8AZJ0+yhlTv56ejT6DBZk/r7wYv6NEBkDAjwa/N70EPQQoLL8coWQ+HwHfv2XJC7+hyou/I5YKPhqUfj+pYCU/RMkXwEWxAkDvLpc+pshGvyTmID4bazQ+mbYqv6uqSj5Ogxu/iQjPvv2vL7/GYbe/KFYVPi/WFT9pzwM/n+ZRP+wPpj6zYAi/NvRWPyLDrb9mkma/0dV2v0c/ij+XWnE/EoD5Pv80iT+tGGG/D3uyProtAL5EzVg/r/rrvTwXxb77eAo/n0xgP6wYK798Knq9/HqHvzvgH8BKRU0+OeEbP58thj49i/m/iaKzvmn5DcBeUq8/1gOLvxGxoD+DJHI/F41mvxn/kL+6Y7M+nyVYv3jj0T0uNfe+AP3jv/2/DsDqH2g+QMIPv0uXDb8iJDI/WzOWP/uKi770Yq6+YAkAPxGwrb88gmQ+ZkgNP5EjLj+D5/e/LuhCv9zVYL+Ikcw9JyT2PjWUvz8ebP2+08H+vnQokz/v2Jc+aEKiPyjUkz5mHJS/86Oqv/SXxr87fCk/8SOYP+a37D/A1Vc/5deUP/N7FL9vBZS/zZ5ZP0Da1T7MC9w9jlXGPpF4pT4cVKQ/FsKlPwZSCz+Jhlg/mpkDP3Ae7L45rkE/0H6yv+vlhj8h5pi/z1SSPgNcWL/nqg0/LfUKvfcgzL/xZdc+nvITv9WSYT+8wq06n3TKPj0jdb963ha/r9LqvttQWz9LtNY/vB9OvynWiT//2wrAM9uYv4Fydb8BH62/BrPKv5h00z6QNFu+bUHCPtHCMD/z/oc+K2MoP/Fmjr1m75e/dGaHPw3QFr89g2+/GzA2vvqtab9vudW+1iewPtlZsj4x2GW/hbT5vr6DiT8oKP4+3s0EQHEOlz8yeJI/raWxv1qMQz+Fhlm9JQ0NvzcMLD5i+XE/gDYAP6njYL/KET4/8Dgjvy7iXT6zUJu+eh5gvoAoRD8RJYW/J/z4P1kZ6b/AABm/aoy9Pp2VFT83xs29hBJrvZ8z0j56nWy+w1OyP5XfVj9rcV8/DFZ3v2aWor2s1pm+jLeAv3FnuD4nowm/jMgnPwr8iL+GCdw/D3Zlv8uQYD5Lt0o/fhUSv2M7Wj/jVZ2//t65P1LSZzxrrhi/Wi6ZvlUVTz/dnhW/6NU/v33aYr6yppa/lQfpPzz/EL/W5FQ/U39WPmwKfL+5H8O7aLRIP5THQL7tt5g/BMGCPwE+az8iWOu+RziKvxQUHb9j80g+rrgLPwf7CsA87QS8JOIhPkgajr52SKU/j/PXPl60pz/BJF2+hv5vP9tJ0T+S5ac/TMuLv9Xhgb5uubC+kn/6Pt60Lz+swVa/6+6KPX1GAcCnkbg/9SoDv5bRZD1oip6+CLGSP7WZNcBqEoi8gbfYvQdD6L7+9Nw/Bihjv2+URT/EZrY+dnmhP+fMKr/aozu/BUyMP7fD2T6CwMW/5Q8Kv/LTxT63d1Y/D+ItvyyDOsD08QU/eDOcvxQcAcDURDXADWFQP0u0Az53Eme+jcuxP7YnIz/r4s4/KEWyv/SEDb138M6/NONqviiVfr7AaSA/MaE6PZyXHz+nFDQ/WuoJQJBuob7d5nC/WP17PjliTb/ksL0/9AGRvp3JEL9LUPG+NOo3P22IIb5HRCS/QCrVP/ItW7+7rRE9if0mP5fP0L+05PU+xQ2Hv+ShUj9McHa/UFkxP/CLjT8bh1w/k6yAPijabL48kBO/3Uq8Px4SM7+VvoS/nlUqP5/PS79dN3O/Bsi4v/U/975/Zqm9Cu14Ps6Icz/I4gY/AnRDvs/zSr6umQI/lOOiP4JYAT7rMfC+dyx+v3z8uL+mkCo/Z/YTPxnOEz9NvwO+cBa0P2He976GjHQ/37AeveC8G78RsBc/07vRPiLFtj5NdaY9xzUbvRbbGEC5lZc/Lws5P/X25D2H6oA/TpsEP1qr1jzJbJu+/belvUHoSsC/p1c+iRn6vpzcyz91d5489C+YvyJPTL1M54W/dWLyPg73lD76NHU/O3dKvxNMCz0Jua6/KZUzP77UBD8o2v0+rTXUOh1d1D78Ws0/4PeRvgH5DT4wBou/WkZev1ZSyD2yTY0/PTeDvhqUcb5DU9C+0OUIv4AGlz4jpr+/1PWjPx5ZKr+rzjg/UI+ivxs5Kz+QBCw/IrbEPt6Sfr+QiX8+Y26Xvy9qgb7Pw4M/qPRLP+DBeDzsjHm/PEUlP2fFnjwNgBTAblpIP/CDxrzs6Zk/0zGMvx5PS8AU5I4+1keVPu5n2L/yfnq/fVUwQP94nL4hIDs/pJEmv4bHBcAuWC6+Uv2Hvte8zL7QyOq9LE6HPkK4Gb70yqG/N+34PgSF+b8OOQPA7zo9PxXMAT9nF2u9Dp6yv6c5Nb++hoa+QUbMv/swPb8f/yY/8bG2PyfA1r45qUE/7P/0v8zRKD+Qz02/aFydPs+yfb5UA6k/BNtfv86qID+aG4++1RoivjJEPL4Ifli/Wf9mvLZNgL2vB3c+CRGtPV9JTD8Qa2w/03qGv+BnXj/f2Ze/uX0WPzOnHj8IpvS/52u+Pbne775xPkK/u+isP4RXJ7+4bk6/8wWbPc+CLz9UDBG+oUuVPwXuBz/QZwvAJosHv+LUD7/NbvU+Af0nwPoiI0BT8Qc/KXMYPpw9lz9u/hVAQeleP2rlcj+b0wU+IewQvnab874TfFO/KOAvvwXfrL9+Ypu9MKPXv59/Qz5nzxS9NXYWPmqqBb9I60RARxiuPytNn7/5dG4+SqYbv3nagb97h6u/gkvdPu9uLr9yw3c/g2h+P23crz8FNlq/H2PzPoXoIT97x/K+2ZJFv8YEzj+u9Gg+yM4iP5NzUr+DEZy/dscRv+3+/b1cOIi/H0whvPAg2L+3XY+/18ArPwSSGj8SpMi+GkaCv/aBg78EHb++Kv8kP4KhbT9oOf6+pKWTv80iiT4w01K/NKwEv6aiqT/WvVM/BcQhPj7Oor9XeFg+JwWIPw5TwL5O8aw/rXilPzP11D/USJw+TV8WPxcEtT/REOU+4VAAv2in5b7AR3Q+HjGlv3eyEr6oW4w/gT0MwOlpxD/1qY8/RWbrvqkbCz8YSk47U3CdPx+Rez+ENbO/1FYWPxyQUr4fpKk9eWk6v/XC3z+/IIU/pWE9v4XUqr/idsW+IXfePoX4dT6oZqA/LjhKP601eD+XWac+itSxvzJlSL4FsYc/tQlTvYjKAD/7MqO+4NCDv9Cb/L13O54+Jtsnv0pecb6qgHC/a0OPP+jClr7l87k/Aq7DP237pb3ZFIG/Wn8FwLZrvL/S2a+/sXGwP3E17T0/esc+YRsOwPdWmb+pF2M/DtSSPvu8Fr6CmRA/02HRP/VYYr7BEY49IDhFPlMYGUDaWwbAtOcuP3Ed6730FxE/kUkov46PSL0BHzY/7DlHQHbbTj/UGlm/B/fYvuUl6L6j1+W/lQGpvrCaOz8KGqO/rjSGP5+9+T639ju/Je0Qvs+VzD/KyTs/UukPPEFjc7435Zw98AIDvze0BsALswJAhA0fPvXZyz6tMG29nTA8v/1iPr74uMy/ybsRQH0Enr95F4S/qGnhvqrfpD8nt208szmevymOoD/bIjs/4NYJP1w6Nz86PRlAmNgLQJTML793288/ny7EPn2J/z5VnNi/m3kuvjUVUL+q+1M/KhmJP2LEBMB2grS8KJmPP8UoP77qozo/9suzPhfurj7M6Yq+67ibv02Oj78gmDU/62afPCfWDb4eCNm+tkGcvjUBhj8dpau+Aqirv/ZfPj8wy1G+85gAPSDFzT+dbqi84g+UPvfmlr8H8WC+5gkcP30Wx79Wqk0+jWvOPQkdKT2fwYQ/hjurPnYxvT/iW36/zy8LPuz8rD1V3Yi/wM90P3PHFEBTUIu8EEkBPwYYrT18rTa/rA7pv5KtNL+xK7K/BCDaP6K+671XHLG+lp9jP4qVj780UUS/eWmLPbSugz9ubgA/8kh+PjH8nz2368w+GX5OPiLWg7+RvTw9oW6Jv5sOpj19F/a+OoUiQEtoI70fvrO/imYJvrL0BD/KMNY/WuYWvwCNyr87HFE/", + "polygraphy_class": "ndarray" + }, + "data2": { + "array": "k05VTVBZAQB2AHsnZGVzY3InOiAnPGY0JywgJ2ZvcnRyYW5fb3JkZXInOiBGYWxzZSwgJ3NoYXBlJzogKDEsIDY0LCAxNiksIH0gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAp1t7C/xE/fvtXmg778m469FW0wP6gCL78+Joy+M85pvDUwGT6dWcg9JjzfPt4FWD9XB8k+XTzoPkqGyr+Pu009A4L1PuSbOj/mPTG/zLYov0P4Er9sUgk/N1aEP5ye0L44cGg/++L8PYuJ/r0cdVi+7CE/PyOypj4gpxFAZStav1CiBD+4eQe/mIlevxQMMrwhpgw+W8M6QLmQPL6yqZC/EMiLPufGBMDWU7y9dEeDPO5qSz9PP3k/8OkAv486Gr5/IES/W2SQv4eqnj7aBKO/+knUvrjDfr8KuAw/Q5XmP2+apr+2L+2/ssT/vaqYgDw66F8+H4TkvmN0Tb5TKrw+1yyFPwo/+r+h5by+sfBaPnsAa7/3lt0/Oaoiv4UiKb13Vw4/cV4Xv4Fc7L+beNu+tcSDP4V9rL7rr1i/2/hsP1Rbqb7mmQK/BAsZPuNPRT//bRhAv8VTP6N1lD8AhwG/pU+DvyhPfz3ne9s9tQyKv2mAM75bc8w+fIW4PgIZAT9U+ZQ+UssBv/6OA789d5m/gnPkP+teXDzhP2a+UcVtv9LhO0DQYom/9ZHbvpHo1D6Ppxk/CTt1P7df5z+MkwE/BCyHv1COg740nJ0+0V0EQErrsr8H7pk/eTs4PhB8Vj+b4tC9qqlXvzM5QD/LiCc/uth4P6/7WbzwjRm/VYbHv3LCUT8LCNk+iD13v/ffCb8D++6+b9K9v6JTUb9aIZG/czDTvkcbPb+yQdC/HnIBQEFToD/RbUe/0XqLPvy91T6AdZg/GZBRvyjKiz+GQwy/iv2qvwNhOT+SN6U/TaAlQJmQmb9n7DI/eNHRvjKKSD8AOby/caKhv7ufRr/nt6E95x0kv8m3KD9IcYM/6n0ZP280zz8659Q+RcObvx+Drj+F6xq/MBFpPx9wyT8D0+k+IoK/v/QPa7+xT6k/fy4tvnP3oj1RQkI/N0IyP9m+jj4prVW+FBagvkKfWz+8tg0/07yDvqiCij+FIQS+iAZav+dLQr/Cjnq/U3mLvq3qnT+4Gk6/yv9APsscKL86R4W+CEnBv8bUYb60Wka+UFAKP8CjZD7qZJm/AseKP65ZVz6sJ0s+lXEWvMGLBL+UB18+VIDzPkea37877Am/aYiCvssogz5qvpg/zpBFv9v0/r5vNQO/6jBNv27Csb5LU0U/FeSGv30K2z7v19m/2bMPv9KyqL7NYdY/47KMvizhDMCX2/+/nGILPRYgHT/AO8A+f5crPuzElD+mIgq/3BhlP93l0L+pqme/Ik3nv4a4jL5nfD29C67pPWZ5fT0jdG2/f85CvlWsv79Jfq4/4HSbvkhP+r5mYss/7wGfvz0TkL/zUIy/JwCLvdL+3D5fWz0/kpskQLT2D8AY1hs/HKAOv04fFr9jqR6/IaVov4Oe1z/MoFc/fgW+PBburD+NmsG/jFSqv9NjC8DVDDO/TJQlPoWn0j0W7Rg+g520v42bTT/rYHU+cqk7PwhG8r+7e58+sQiYv/Rty74jzAg/SY+6PYb+Wb8Mj807WQnEv/Y7kz/5NVK+pSyPP8jKBr8TCQa/v6FDv+9MYD9InTW/NQQlP83Qw75e7Py/GgASvwXtlr+V3Qo9SF+Yv7QMbT/7hbq9uds1vozoJz+bwnY/I6akvGJtRb9wEeO/9aAvv58BHr/wQNs9GLewvmAcMD957By/AKRpveeMlz+3A68+7el1vQgZGr90GyK/ysFZv21/Cr9Rc0E/rjZjvl3FrL9fU+I/etKjPrkWtL8GOuK+ZhjjPRMw2j+h1o2/zt15P18isz997yC/RBaOvsilhL8eWFC+Xt8VvdPeKD8ii4s/OYM/v0/R+7+ICa68jUqWPlpnfz63Qae+B37/PRj6wT+pcsM+JSq/vvcMAL/H7S2+soDRPy6zO77AJ6K8zvH2vn2rjD/y2/o/wbbmPteEqT2ZqRY9fJgYwDXCPj/sYgk+pk/bPmMiNr+I18S+3HHSvovEgj/P7MG+T0oFQACa3D6SSaO/pKEDvi7UJr/DcAzAd7vMPyYBND9GgAg/lF8pPvTlsL+qnPe+8n5XP2GOhr+CCCLAeo5ovuQoEb4j0Qo+M4A6PyI6Iz96fYQ+LAeZP5ptwr9GLAE/0WQgP/qpnr8QXsC/EPY+v8haKb8Z4EA/alIdv6g5W78HlBy/WC+PP+GzVTyVjTe+kErhvTxipT+4yT+/k2KoP1eMVz76RCS+wQ4gQDIfh79NFrW+T8Cdv4zUcz9YN9g/CMDVP1qqnD8FXUq+3gC7vuTo1b/BSwJA5R3wPsfh+77F/ak/GAd5v8Mw0D6xxTC+WIAjP9gygr0BuLm/yceaPS5Bhj8+V++88m1GP+nThb/gINe8698fvyM6Pz9M/QA9q6gwPxg6iz/qDTE/LtaLPteKf78ulza7nXgIvycpsL7xwr++Z9x6vxKK+r9sCyu/QwVGPhGRrL8JeIA8/zvzP4Tzgj+RX9I/6kHIPvZwlT/BVTm/fPMkvxNOL8Cd05c/VinFPwadDb/1BYK+HP1Avyvrwz7K/Y8/JxPfv+2h0T7sp42+UjvoPUMB6z4kuEC+WCXUvRgCW76lZng/tWfWPqfgYT6MrqK/NNXBvi8PZj+5Zxo9DtcoPw8d4j8L8YQ/zMm+PTDQAr9bbSPAGW0svhzrpr4U3sc+VZkdvoMJpL5Yw+O/u6Fwv5sL9D7cots/Y7vHvdrWpz6c1XM/v5ZPvw3Puz7CkD2/ImVdP2cJGz6tzpg+YlhIvxMANT/wawVAQAOBv+X0rb6CKm0+61FJP20+D79eUDXA7PmtP4ewQj/oGN+/oYkbvzS6yD9lUbk/tkpwP9FWNL5P9ak9ahEGvilqkD81N2C/40hOv61QgT9ctDW9nnGwPznoK79GX2C9QiNsv6ZC8TsHQb2/Fqv5vivXt7/0pDE/ZnI3P0B53b/FvKo/ptUfQDFxPD0QICS/1cE0QPbTqL9RtqQ85+43PswtrD+1L7g+8Uz/vtKAc79FA6++/+KAv3zkQz0pbxC/oTy/v56ooD/vzv6+T1Kivx9MLL5kBow9OYzfv7nNqT5CCYa//gLbP0ciBj+xv8y+UcM+v0/51r3b3xw/ENnQvOFeXr6wFW89gJYav7C4r76gfQXAH0Z5Pwb8mD40AUa/vwnzP7I1sj1aoIS/JVNbP/rMyz5jBso+o/zZvmCn5j4vBYa+LlIOPzjV87+qJN6+lG/cP4heRL8mlRq/MFvlvsR+m795VA3A1x5svrweCz6OD1e9DFsUvqBLMD147II/UwRDvhP3Er+u74++ElAUP0y9tD8HziC/zJ3KPTA9rT6YGX6/50GnPDObu78+cOK/L8a7PjuLxL6YIk8+fZiSvFv1eb+TXDm9Q9adv2ILsj9wkK2/73tIvihphb8fBxI/X7GpvljHBL9ra1o/bRjxPM9CBD8tOHu/C9Gzv9mP2z3UP6O+Bl4gPwuno71F/1E+AMvPv0248T8NLyi+D5UyPxsKVr95s50+nsOmv3B15z48Qra+7CDvPG2uMj/iw52/2jovv2gm6j5DSQZAdWUNvrJUFz/cE9u9gZv4P/ak37+uyME91BHQv6trIMDyTJg+g9gOPibWkb5JfNm9ikpGQIgvhL6DlwI/xjwmv7kolz6H83O+ey/SvqvR6D//rZY/UAcMPjV4E8Az1ri+Z/4sQJIybL48No0/kpG4PxQ4YD6fgVe/KNwbwFIlLz/Zk1C/yWA0Ph36n783zYe+/hwKPRmFiT5+so6/YTSZPxnOGL8EVda/rEffvSDNDD2OcSQ+W1quP6Ufcr4i+/S9JTWNv8sPUj+Euzw+4A3XvmHM87+A4rg/niSdv/9dF7/veLc+zyiZv1Qrv7/C7gfAQpM4P1ORC79lWDtADo5+vzfka7+YvRC/S/h4Pn6+8L7OwRw/xYR6v//XsL/nK+a/Ck8OwCygBL8XGpq+26Uhv+wnnz8YMJ0/TGurvjWf5b+WExG/5Y0Zv/85KL+1nTK/WEknv8joxb7SsiK/f1JVPkCCuD4JIGm/RbKiP13ler8XFM+/WPXCPqlBUb9RgKQ+tdUZwP87sj9sGyw/ng/Av5JIvb/PDNY/7sQ8vyONVb//BhHAM5mzvlUvab6OKjc/ABWzv8KYwr+T5OY+iNyAP4Ubyb8Ob4o/ecUZPUfAiz+xz4+9AG0hwFadG77Lsoc/isxkv1ijxr5XXDS+o5E5wLju1T+OwGa/4XcBP16Qsb2p+aA/l+bPPv2PcL5ovWS/8j9MP+3M778TWSU/gK2Mv9VKoD+1KyU+TOUkP8Dppz3lTeC/v1hcv7UhLD/Tpyo+rSqXPwF6zj7GeXe/3MpLv1rG0j1MGRTAWDPWP6Es4j54D50/O1HKv+lNnz/h0A5AixKpv5d6RT+Qe5E6ArDFv0pb/D4FZ1U/buMZv8E8lT+BWNW/K1S/vzDgCz7J9M88XHE3v5VBTz+x6Ac/Aa9bP/Uu/L2zQho/RyzkvsRxA0AAA1C/Mb0GQKrmDj2+ELC/OR2vP0vsAD5F41k/UKecP4MIrT4ZnRe/Wsh5Pz8NRjphlC8/udYJwMra5D5ceiO/pp/TP6eCHz9FnXU+oP34PtKZf7/gxs8/OuX+P/YryT5Ykss/srMRv6IPTL+2oCc9l7DePp+4yb4tqwk/Fd+cPgiRf7+fzwQ/MBBdP6eVLz75iZM/49ObvzOX7z7Cy5W/OJqOv6+EIb/bKnG/dEkMvzhKW77CT1Y/9W6kvsj1yr++7ZE/iktWv9WEcL1voOQ+rGZMPhMOjD/mP/U+yX5cv5BtC0Ba/0Y+W+AWvtHDdj8ScH8+o2fMPvF/H0C33Iy+jrGivrfA7j1vJ4k/G9bAv1aX+78wTRa/YyTRv3+D2b7SowA/Hghkv0Gitr+kAI49ps9cP+T02L/A+lE/+zpRP+w+dD8p95s/zO0MQLJRtr4RZ+A9azfVvvSNATsyNcQ/bymsv8BtXL93R7A/jpCCP2Tsub4Xulo/2LOuPgAJwD98mJg96keDP7kWKUAHHRk/VQxKPUnyOD8YBru+dtQYQItkOD7rvYu+t+9Vv7ZxjD///10/O6X+vlGSEz9dGMG/7Wg9Pq7P5z7Iido/YlRFvw+ho7/EOmW/rRSrv5cXqz/QOcY/bfOjPmVTnD9jLAk+5cnCveiDqr6R656+XR7Gv2ZMhz4sNSW99s8+Pz+Jvz3Nmya+ggZfvxTFkz0d/Tw/71iSv3Alsz3Qom4+GNM3v8B9Fr9AJWw+8RORvzQpsz7yNWW/lwuIP/ieFUCPWly/2cEIP8ILOr/9PYU//fOhv3gP9j40fF+/xfNTvqP4g7+PhVI/WERWv4hNDT+eLbC+fnDqvVNNFz7xBkO/empXPgHiwz84MUe/4P2VP6rBvz+J71C/Oz+6v56amj7mIgy/ZC7vP1qSDj3xXE2/H9aSvwGCiz8dORK/1IUzPX2vdD+28Pe+3h88vXm6h7xb7i4/", + "polygraphy_class": "ndarray" + } + }, + "outputs": { + "output": { + "array": "k05VTVBZAQB2AHsnZGVzY3InOiAnPGY0JywgJ2ZvcnRyYW5fb3JkZXInOiBGYWxzZSwgJ3NoYXBlJzogKDEsIDY0LCA2NCksIH0gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAqAEu88iHSLPA/RuT2Qmk2858wHvvpm+TxNXLk8eyVBPSWBoL0S25k99XyhvH0jc7yCoAK8h5pZvvgPA71VmVS9/aLcPTthTz0SGdS8qadLve1rlT2aH1S8JUhtPArFnb3bJ7G8DYcBuyD7pL2IiyM9g4VbPG6plj1zRB+8sGjpPRwzFj2X2Ek94Ys+Pfpvob0hqqU8H9nPvMN29LwfB0i9Z5yLPeKieT0nMR89t3cZPl+Ll72Crz88mOLVvR0hzj0D4Ye9oBPTvfhl3j22MZM9bW4jvXc4nj15tCQ+pwmpPI0gxj2yiiE9wlGrvDhQsz3Tfao95zgWvU1LxrprFpG9q1EePZ+hqT2QlOW96vRgPU0hJjyR+Zg9+tp3vUaPJT4KnwA9m3ZbPjMe8TrAtSQ7d4GAvSAcN74gdkE8+kcuvhzHTL6XuM49t/cQPgW6MD3zDvM7q3avPM8f1z1d3HA9kmbfPEvLhz1g2vU8zSHxPepkZz3TnvU9dmRIPbeRb7tHcs09n00nPoBrXD29yZg8FRxSvJ1GzD0jcIo9LV5PvTgekj1dSfc935w/PrckYz4Hvnk99LC8PTBHBL6HzsE9JYMEvRArID77EwY+yxIePndgJT2qGxU9T8BbPnASdT1t2mc+z+hrPWSJpT34CYo8vSU9PiBsBT5nzVY9bXQQPOsekj1PwAm7046IvTC7lz1tMbm9K7kDPqI3xj3yXTk+7R6HvUra1zyXkaM9xkAfvQMw1D1LIXk9vX6wPa0vyD1jiYi8N3kEPi343r1or+S9K/QmvcB+Wr3uuy+99VJhvQdxwr3HxX+9GF+ZvYUcpr3yvI8965M5PceL2L1xXkq8nJafvTeRmT2Af1K7o1wAvlWtDLw729o8d3M2PKDQ+LwbzEK90FlUvSjph73T7ms9RU2QvU3+6bqeUgm+b0i5PJv/GL7Dsg+8h8lOPFx0JD7PLym9bHOAvXOxMLwb9ou9z9V5PeAjxTz3jaS9IpsAvTOZ2rrPvS2+XZcPvcdTDr1AMiw9B5ebvOGYB75HBCq9nP4lPN9SgD3+GYE80XEcvrjHYbw9Qoa7ANT/PYP72j11jKi9VxOIvvCQtLxTF829cH5fvvFsjD2yxkC98MLlvLr4br1NNZm9jw3ivferlr0cGQ6+6RUAvueqHL5s1KS9YOdaPQIx470aElG96vUovs5bvL3wc1A9SuPGvT+mNb7e3Qa+mmJkOvtp2L3NQh2+MduTvff3r71T7Tw9tyq6PT0EVL6Ff0e9qn9hvqfDerz9XWq+DWcBvsdDQzyTD+s8d+kvvsgum70QQaW8eUIovrBU+D0Dcfq8szBTvrhurLxdWtw90+OmvTX0Ar4gWRC+T7dwPZMeXb2V53O9J1W3vYMY/DyhRSa957BAvi8RRz1F82u9+/U5vroMojwATk29m9CLvcqUCr34HDg+k20svQdm+LwV8wI9Vav8PPedoz1ZxJ+8pwt5vR0sbrwN/l29mFaSvLOYaLtDf948qx2PvR+09z3lDR69GAGGvEdCsTwTXbY8b5srPQvdC70okpK9YPlHvfcKsD3dIs88gsr6vYvkQD1QJR49jQVmPfFJKz6NspC90tUavIPblb1Gb0k90t7ovRtiHL2wMAI+KsUEPVpTALzSc/M7Y+GNPQDQAbnldRo+sRKIPQAHr7rTOi8877nlPRyJk73VcCc9ryw/vbupxzwleD09uHebvToCXT7AgX29DzQLvjKmkbz1J807WBeMvY/Jr7xKQUg8j+FEvbNrfD3buvQ857MAPkB7YDwPuA2+wBTdOzFCvD3FfaM88DRrvbN8obyLl+G7hW57vW1qRb16S9a8j6oAvq/Aor3FNbq8X31BPV3SuTv64YQ99sBMPe7sgj0KVNO87gLJvcYvr7xn4eQ8age4PLpnBb56+dw8B0vKPbT5EjyxfB4+p4+JO8DuKru1ss291fWUPCv+3r1/oKu95MqBPTWxrz2liRQ9heKDPS8yPz4HnMG9ybobPt+OCj1CHCs9MJACvbRIDz0paLO92jmpvd1cir1v2D69t1L2uwpdfz3n2Tw9WK/AvV5UpL1wkOk9t6fCPd25g76/vUo9d3NHPnqLDL3K2/y9R/SgvSclDr4vql69uoSgvuJFE77WYoW9B32wvTKyM76gGdW924YAvGg7sDwXkzw9KpJXPc2GQD2nsSy6gGOXOnPwPj6aP5I9Q2qWPKHqGD6hyxE+WlHKPR0EpDz3xxY+73tHPqfRbz1tUcc8c5fIPSz3Hj7Nkvw8PURvPofAsj1qi767R17ZvWeyoby7I4C9cuViPYfITj4QXQQ+b+LgPZI0RjxA2G89zYq7O8bzRD51mbM9s/HZvQfOjD2Rfpw91fklPUA+2j1lTsG8gDXXvfAWtjsVybM9Ynp5vSnGJL5SoD89P4rVvff4RD3txwu92pV2PFeCzL0VU/m8Td9uPfAuG731FHy97eJ/OqqpRDwCSXW9Mn34PM1kazxgeeq9abDKvYixwr3zJ0a+tTUSvvdDQbzKJ1u8j+uCvWcgvLnKu/48i0PQPUWQmjtnrCq9o1aVvHMWF77pTqS9RyTOvYimkb3oCYy9S9I9vhsSMD3zUcM8RWTovB5MKj7v8Iy9Hz6Lvb13n71ABgs76/9Uvn3Axr0zr8S8TWJPvW2OBL2pY449/TEIve8skL1Hbwk9qFUsve4QD75nxp29hJ0XPVX0EL4zW6271nWwva2wlrx0dhA+c1wiO+eN+rqv1Bc+taglvg/Mwb0XWKk8T94JvvC5ZT4bqTC+Ok5aPMp2zL3a8qe9f4suvrGgyj0GJgw+wui2vFsr6T0K9Ma80PgSvP1fRr3qdQG9J8kVPPEhC74gMwC+h7CRvS1j2b0HwVi888OIvRhQU731MqS7Vh09vuX3E71aeoO9MGoBvrSUj71dgmW9HbdrvULdKL7CVRC8gpUIvS82F71q9Ds9erD7vWY8lL0j+CO+8NeSve2AK756FB285JkZPeB9wby/BDa9FNGTvat7ur0l+TK+1z1avddZer1tPQq+SOHFPLOvzTw7Ewy+jchPu8VQYb6z5GO9rfYnPtcGs73/daG8pTxfvc0TnLx184A9AsNRPiij+7wfz7u8c7sePkAwDTziJD+96mXrvICIkr0ashC9iOjLPO+6qr1gPoG7IJJUPc2vELooLpW8AMgru9c6NT3ND/89V7YzvgCpz73C8Aa9U51svc1Adj0jjw++HqcmvrKfMb6NrWE9dYmrvQvaEb4s5Mi9aCHYvUx+Fz2xQzK+BWWuvQoACL0tylm9+DVePaZWJb5lRk+9w5XfvUAA37uBwQC+PT0mvka2ob1lrxe9/yMAvnfVp7y3D8e9+hCSvQBbEjzz3bS9Wv1Wvm/QKb5TDdk8GhcVvsC+B758Zkq+BL4lvfU1Qz5gWze9g/+7vakJjjwHRVg948EkvnXjjr0Td6C9yGbBPVN6cj1xK449zzxzPPPYI7y8+6A9OtOzvQuIkr10Wpy9OxisvM8PFT2KQjI8e8/1PJ+WNj3Facs8QhKfPcoMMj3n0du9KODKPGiNJL7ntZE9J7c1PNWVHb6PZlK9M9lEPHvWC77MVAO+FUnwvbaMtT3X5kA8aI6IvZcyi7uY75q9ILpZvQpK8T268Ae++L1APEL1KL5wLy69rdr0vcqjg72aDzU7P8CePWvYBr4XDcE7Z/qJPWpk57033qo9VEWivc24gL0GNyy9TYcbPFaLPr0KDVm97bQcvrM9h73zVEa7C4vlvVKRYD3xXC2+KmvrPQWDjD2nsp07ivX6Pc1Baz064wK9r3wWPgOWUD3vOA29VbqEvQ8vLb0TnI+8qn9OvWqAR74Y34C8tlEKvoLSnb3L5Z+9mq5kva9GH72sAAG+q3R9vYN9AL7vsAq92OjBvXsE1Lzo7le9MstCvSOHWz13DhS81KWEvWBJOL6yara9+i+9vWvMHr4oMSa9dSSnvd8QM72HSPE8Am4DvjJ39r3wrGa+86DwvaqLa75DfXW9hTRYvQsLAD63+qG9ECsrvSBQAL36fLW9+itIPB9WFr3nHeu754OCvfUliT3AGSS+Ovo6vfhypbwHkc87oluqPfoUgT1j5LG9M1VzvaL5kjwwaS88D/8WvhrMpjtnRlK8iszSPIJLuz0S9gW9Zw4svs1W4T3KUmC9m7GavfDZlD0wjMS7Pc7FPTfTqLx1HuC9myXbPNOmrzypBbA9gneIvZ3OKb3qQes9r3OyPdXvKz6zKYo95VaMPToPBT5H8vk9Y+NkPXO/nDuKLwo9D5vSPYoxvz16Rxe9rxEPPh1wWj0H3KU99iVBPmKV8T1QW9M98wGCPPNCnTytpsi8Fh2aPe2YET7vpEI+GoBhPLu6Jj4rM4o+MxnTOgsAJj6laDE+wY2yPejdbj1WCwc+QKOJO5cmBD2w3zi8ySSEvXBayL0YwNW8tzpZPETVLz0wB8a8YNsWPjByOD0jSnU8pu7EvRk4izz4ev+7VtAEvc9nwjzgZ129pcdFPqNXsL0NAci81+KpvDAbADzD4ti9YoLDvHI1/jyrtwK+L6iivUwXt72PMQC+LaOFvEoP9b2riKm9jSwqO1MfgzzdHrW9U5FKvCWJmr0PB8W9a8dWvUCznz0CFYq9s2JGvjcNCL4fbqK9EJ1VvctWIz4HAPi9U4cUvQygNb6av0U8L1JWvavgC76jCd+8OGRFPRVtib1tqzM9xo0xPcNfDr5zsZU9gOZ4PDeWu71P9v28+e6FPXs3g71je+M8YwWXvf4mHL3VWYq9ehGoPGVBYL1q3gQ9FkO0PFB77D2V3Gs9zCCfvFVDUL3XK9e9k+6AvRjk9j0XmEQ9OaqfPeggJz0AgYC9UFsrPs0Xpr38eSe9Dc5OvW3KlzyrZfc8VXxNPYMnuT1H17e9SVeGvLo0Crw6wJI7nRfxvcgUHb1hyho8PcetPYp8iz1tpFm9Z2M6Pe5Cv72Adou92rzjPJ+Y6r3zME+9s5VpPZ2P1TxK/tg9oELCvYP/dz0BJqO9ULBFPMDus7yfIcu9sx5wvC8M0zwPKSC9TfXlPKQeiD0PIiC9vcg5PS1DxT1ELZS9uNCCPQOkvz3TrhC92gpTPVEOn7yXpBc9ks3WPBAHmTtfMmY9Le00PW34yz1LWDm9aAcrvai5uL3Xnia+N1SKvVLB872Hl9m8c3ZWvQU/GD1LQRu+fybnPYqD8r3SSNO8j4s+PWfBbbtnOpy7E9ojvjAiAb41px++S9OGvXtDqT2Dxs49D58SvW3IHrx1Dvk8ozRGvvnms73/WTe9GUwhvvUjFL5bsVe8F/EFvPKr1r1/ThK+T0w+vDf3aLxYh4W9h8uhPAu4n70SNzq9gg8Zvp1NCL0OVRO+GDYevoIUOj0As0G8Dbl3vKcPzr1rhqo9lvQDvjNy27tbMKi9YlAqvjJxWL2nGKg8uvUsvujAdr31M4m8uly5PUIUnbzLZ4O7k/jUPHvuoD0LG8e8a1lmPAEOD76HOu87rgAGvcqmDj0f5Ak+OEymvd+UzTwAkfy6dX25PfAUET71Pto87b9EPUjI2r0o5yU9LvWOPUvaGT7AHqK9beMpPXPuAT4qyge+Pb8Cvb/u3r3Pvtq9S0PuvSu7CL5r48m7PdbyPRMxHrxv26C9YxPJO0B35b3QFFQ9tWZAvr9r7rwdvDu9jT1/va0TOT4DhYa9o6+6PHcAAb6v+/A8oK75vShn+L0jv8w9slTGPFcjAb7/8oq83YMRPsMY0ry64KE98wGdu30rxL3jl809ly+sPd4XRb1KNYE9I0Epvd3WDrr4Ni69Rz6fukta2DyVkL09M3MaPZsekz1WnpO9NFmBvOIsML7l/qA9n826vHqYbj0gM0K+k/nhPdeGXr0GeyI+WHxrvtjnwj1mlBi9K3kRvWXVID2/Ujg95u0NPRn0xL1r+8m8zZkQvl1Fez16waY9Va/kvIpkgzzvIUI9WaGmPZnxyD2Tj9W8XacNvvDN5TxpCUW9aK8/vCWgI75N1RC9DYOxO5PaL7vXIfs9tQmNPMNh5DtyxyC+BoYFPb8as71qCw++FxdxPf+VgD3Ntay9pztVPRtCHT3fFaq963+IPe36/LvgbB++bOLFvdf+/D2rVQi+ilZyvdN4Or5PBZE822t8vdNC+T1zlO09Q7pxPUBqPT6n5/g9la5AvV9Mab3P3gQ+VYJPPV+SK73vLSE7nufCPfAxa72PhsS9Pp0OPpoce7v4hg89DzXGPB6lFb2HJXI9qhUivTqqDb1S1JC+s3fMupXaFL7fyr29p2d6vcoAWDwvFqg8EJzrvcNdYL2HC5c9277UveCbtr1/Ubu8d0QTvcUClrzm+j2+A4txvXWoBjwd3sK9QNgBPh2ter2og3g8NXwFvufEZD2Etgy+JrLFvZoaHT1fJ249O5SCvQ+JO7xNJWY8DSv0vc0FCjxPaKg9M6cjvlNwZb0afDY8IEkvvaDIrLzQFRC8AzA8uoDM6ToocZq8+tIfPmcfjjx1t5E9yhSzPVOU27z3QCw9p5PkPGB2oD2kXTO8e2sEPSrU5TsTx0q+23M0vl0tqr1Scfo9S+rCvYAy6j0vuBw+85esPCn1jj03SmY9mzADvipRwLzf5gm+p3XIvVXPnL1UZjO9A0xNPRqnnbpKRKQ9Cjy6PGct27qXaJc8TzXDvfHurz1/hr09Mh7/vWcv4D1vGYs92quCO1t9ID6Fzbc8R8E8PSpAdb2NNG+7F82kvUhiur2PvqY9/PklPTeXRb0oFdM990AiPjMvZ7rKIaE9X+kJPTCRkTzJsyU9c/1ePoc3V7xSjm09z6cCvQCV0b3soyW+/0+rveKZOb2vGNa9IIocvmRMNr4vACi+cFEVvpBiXL567c29sf2zPNgH7L3oZge+mLqFvScJBb0zQH28t7VVvbN6Gzv3y2y9qrx/vaCscz2C/vQ8J1BFPLvwqz2dCz++YHt9vZh7rbyE9xU9fTHVvCDO1L2PR5W9MrtVvC2Yubzf0iy+H3w4vg9ddr0AnyY9y1QtvAN2rb3lmY48Yrx+PYVOyDzCuDA+QnldvPbBir3thRS+bO7DPFiKJL5nFNC6wegbPoKkCj7Lywi9uBfWPaDqzD0dh3O9pBu7PV1GvTylpJa98+b6PO3byT1Z3DO9lZRxPftdAj1QNmE9yhflvaZbBb4uiY899c2LvPVnmzzaCue8A/UmvmPYWD2KGkA9sz8Evc7CgL1Q48a9lRbgvTc1Vb4FRgS990YXvqMz4r0/XJu8h4VaPACGJzqiUTK+X/SgPbOlLL0hoi+9mojovDIUlz2460M9hWfFvTvsD76kOBA91aRgvQPpPr13GZE9pUsFvmX0PD03xwC9x+vzPDQEAT6o9tu99m5MPbZNSD2L+Lo9W/NlPoOHirzC8Qm8YWGdvZCKlLsARKq6u03ZvCUTZD2nxac9Qxy8vVeyprwAfxo+rCKevfMXiz0nn4E9TcVcO42LZj3r5A0+fFyrvXVWEr3fvzo95Fk2PdrjQzurqO89aqrVPfFFAj56NEQ+lQRSPePc2TxHqac9GKqaPcciAT6ItBQ+yH/EPVMSLzwHFIi9WGSyPD+jjL3mZ7W7qt4nPkrjV73NxuK9kwinvaNaYz3nQgy8Pc3nPRrwN70Gywg+G5eqvYABg72Htb88wHkyvL8UAT0jtPs9lcU4PqDRRTxdj+i7o8VrPRCmfj2QKJA798EPvckKsz17ZUM9MKBePZ0pej5HJsw75CowPnodcb0fWDy9u1hTvc2v0LoyxBo+c8/zPZ5OmbwLIqa8fa6+PW7yl70P3tQ9Y/SgPXX+JD3tKVg9c7MqPTOUMryz+dw9WpNBvUqtRj33PDE81fPBO3vv0z1/Ur49h4TKPVu9CT16/kC9h6PgPRGnGz2BuAU+QVTBPas6mT1P75E9+4lsvrhzC75fUjm+OmfPPSLfSb0S5dA98OdGOweRhz065AE+A28iPqCUUD5/sAc+9sMdPgnVHj2/xQc9VxQTvg0XFbwnWn09IEWou+9rCb3nLcK9l2LevFerub3USBM907PwvQUwAr5XyIG9OzzMvaqZlbzt7cw9o9iJPG6Vpzxy+0O+/12nPU9npr3onDa9w5e2PG3vf7yA4qq902YtPf84FD7TKPe7DURjPU8sdb1Hsra95wrevYVHhD0Llr+9ap3nvP0hGb7Xf+U9ipNvPAoqkjxDcim9C94DPsdwSj582kQ+8KaKvGfYmD0Q2uM9o9fQPAmAJD6qQQg+nevvPQ1dhT1wI149KJkTvYLGmb3FDcC9s8lfPfLAzz2ewpq9gNHJPbq3PL0PVZk9oVuGvROpH734gYo8pPySPP2g/L0k46I9jWFIO+JXI76vwJ69zeSRvO8shr0q1Ae8ccYJvelNDz2Dr/w89FgOvrf3rDzLFKe9gwAPPSTnor1f9388QwRjvugzHr3eORu+X92OvcfbPzyTBIm8hDgMvmLIHb1tfwq9COOFvid4kT0TmS08qAF7vrtGz71pU0c9J61lvKL0Db1wOAy+Q3g9PfjWoT0jAVY8YDsAPROtmT2DWvk8T7asPYsiZj3kJrg9UyE1PW179DxjKhU9tw3dPXh5qzyLx/Y8woR/Pm2Tor0trjG+S8YMvZe45z3f+1m9isBivYgwej0Xa969YZYAPmakDD0RAj68iuaHvJrmqb16Nl09OrJVPDi24L03Gzi+q72GvfJlub18u4c8qFz/vOtnwj0zDB4+7AiaPFqnpT046Fg9OpTwvM+p8D1YXPy9M79yulGnJL5PkN08refAvX9nNj0DR1I93x4ZPfjrh72rAyi9ECNmPdfH4r224p09U9tPPeCfIb5f2tI88ADTPSOe073zg/q9y0PkvcMs2D06Pra75SCevKtTAD03iwE+Cdi5PZgqUz037W29EbQhPbRkgrvLUOI9C4yrPcgo4T1jhbA9W02YPT+HWz2DHog9WLYWvkPWi70HzRO9vQ5qvZCR+z194qq9hW9GPjFDyj11eBA9NR1avNrI9LyQX0Q8J4fcPB090TsToTI94wJ3vb9OU70d7VI9wuBNvao0iL0aeB67kr6QvDqI3TwAbO+5K/acvb0GnDylgU0+YAeHvUXYdr1tp669XQCPvGq2gL4rIwK9Z3XRO+s14Dwzake9rcJGuxsurz03ANu9PU3XPWrJjbz9zHS9yQUpPM3dnTqZZQS9x/0DvfCd9LzClBO8SwufPJEDR71DWu2888p8PEpuorw7FhE9YypBvh2zfL1SdaK9W1YWPItM8D0QYMa8M/iHvQ9YJL2YhIU9rZqLvYr9IT3HNoM7GlRIvS09Br0fPB0+Z+cougKaizx7DpC9nR8bvfzXlb2zJ988yOULvuLpaT1rdUM9/h+ZPaOPKD4MSLU9Tfk/PSfy+b3/7BW8iQRMveuBoD1zgZO8sJmRO8qZkbwtr5a8anZePshRgr2RvMw8n5QavtJpT71CEBa+J4BJvZurHT2nOcU8etLevZ1EJTwzftU66MKmvKUW4j2LeE69pNSuvRMKAL1107k95+/rO2Q+lL2tdxq9sG/fPPJcwjwrCxM9RfVdPCc7gT0g9ZQ8hSDlPfOlNL2YRRg+p/kJPRz3Nbz6xJk8THM+vUuVmL1cpQQ9F/PCPTx8HL2+Rgo+1CWFO0enILuq+9E9hfa/PZtfKz0zg+Q4PdMHvssftb1vubI987XKvc2YcDvwoAc9sySou1M3CL6BZAg+2IC/PKOesLy6png8ChB5vJgvgDzz9H09NYpyPeBW0j1YItM85Xm2O8d2Ej10VIy9/3ArPeMvwb1NANY82EvPva1P9r2D6Fs9uxCtPfrIGLzP3oK8ETAlPlnepL0qIjQ+QhyFPVP+pb0NJO49H4YUPSdA7zvYlL09E6xkvcMXej3Tjoe9Y7yIuzBrpjyvZuk8A/ZfPQDMXD1rrTS+paCZPbDWuL1cdQM9ywXbPdoxYL2td3S9vQ2kvWNleDw3rpa8dPCuPQRUTD1vK2Q9J2qJvI0X5DwyYeu9q4kvvWA4Tj0QfT8+OAVavdWwub09YBQ8N2FIPdOJ970S24k8NyrSvT1mBD1bxpc9kxdgPBY4k70Aavq8OlZnvQqyrz31bso9x6MXPqPA9z1ATnQ9Td0XPQsnVr1XJ9m9iFaLvD9PML7RQpu9JDaOvfqjRr2BnBi+2l/xvOOt5j0zGRo7ZdWyPbMLW7tq31e+2beSvXpofbzDHhG+2y8bvQWB8b0icbG9zZyWO3daUr07dCQ9j6IjPmNJM7wDLbu9ws+ove+Urz09t+Q9CkJ2vVpKxT0A/ie7sfDAvKddAz7rQ+i8NYv6PDTGuT0n6HM9e9g/PGXywj1A+9A850UsvOgSIr1Q2Ak8AL19vRdS67xal6c9+7YjPYUYnD0goFm95icBPuglgL2969C8krnYvR3M5j1Jkg29F9sXPjI5jTxV5RS9+rTpPXM2sz06Ev49wN/kPS20Dz1yIxI9EDY4vhc4Rr36Qjq+iuI/vXz8CD3z+xO+t3d9vVPyA726rqE8jzEJvkqs2T2tsV88ULIovhWZ8bxtwPW8lW89vhR8h719kme9zteYPe3y9Ttr2lk945aMPc5mHj2195E9Y+S3PW0fgbzDl9Y9RbeCPSOKCT0aAg8+q18LPeJeQ70APSg78kHGPDPDnrqdFpk9MXuhvDuqxjvNhH+5oA9ePb/cK7yjJ0y94Ik9vdArNz3gY9q8rra6vQKn5Dy4YLo9ysu4PS2/hT0/y4I9BbH7vQvxJj4jGNW9gyYqPZqO/jmHVHc9qymuvSBCTb6X/u+9K7I9vLs9br3ILjq+uJ5FvhTFwL1z01y7k8B+vW0IATwNWBI+IxBoPgePNTxkgpk8iCDYPcNY772w1c89N94CPfCBh72j2zM94ECCPbBq1jxjPlI9wxplvDNlyjwN+3Q7YBmUPYX3Xjz621Q9kCNHPQ2NXrvYeqS92gcSPcKnsD06MzE9WzDhPYN3vD1NfW87JL41PW00kD2jhnU9EyWJPXUHtL0biew8rwM4Pkr31D04wN09S57SPBe30L0wKy6914L5PTMQ1z2qMNM9LQkEvSo5zjzGUi2+b+/FPUCi0rzINWM98kpcvtVWMT0tuDu+fXf+vbH6vbwp9Iu9IKuePTKjmLzygR29f6fBvfmFIr79MIK8M+m6vTM1Or3D/oK9cKWdPGfXCz6dzI08ik7IOyAWkD3w4w49VJKrPWL5ED6iq5+93BeaPXc2wz2Nie87Exz/PH2xb7wvWQy+A/ZvvriX5b2geRi+ZabavTMbKr6TSnq+CJtqvgBw67jNmcm9Cqkavumdvr2ftgS+O74RvneLW76bXEi9mjfIvbNmRb3adP69qxUTvsKkG72/Vr69+zUNvo8kdb7NiSG++sMAvRuJWL5nksy9mgPVulBuST3Cjke9qhKwu3NGNDszyhM+Q3LWvYoGZb1/4as7s06eu+9PMb2HlWc9T6umvUHUprxCh+K9E8epPJVpIb1QDk+8n17PPfwSOj1lbSS8s2BuvGe71z2gcAs+q+Z5PVFvg7ySnGg9rWDOvfeZ6z39sIc9Z6aVuJeRvDz/TqI9gB0CvQDosTjz36Y977v0PHhQmD3FOk4+Pn8BPiz/Ez6Ns+s97QX+PeafyT2DrJM8GuNAPiAi5jyw2B8+VwPHPZhz3z0X+2Y8890rPodBXj7y3MY9ssPmPdq6kj2fyks+htsCPrPWxj2vGoE9N4n7PSQ6kDxgf7g8P+7LPE1RDT2dwic9Cn8DPiMdxj0Rzy2+19RgvCWfM74T42w7Aie5vHDSOD3HvHM98Cy+PV1ZubzlHle8q7yiPTkiAT1C/cK9V6qvvTpNkTwjRkW9eH0yvRt9WT33eTI+QISTPcAotb2wIN09VxqwPUfryDxDZVE9pzXiPeBmsr0En7A91WTzPeBog700xDU93hwMPdMPl73/eMK9zlMfPAjMEL50rzo9e3JRvfBAz71Ccme+jqqPvfIrAb2MbcW9xw1kPU+sEr6DxOG9D7ghvrtH2Lywse6817E4vfCFXb4D05i9h9gnPGUeLb3n4tY6Ih9Rvt1J+70AKNM9O/HWvfjo0j1/d7c9CiaEvEEiwzx7mdy9QoXwPSPkHT1juaE90nqnvckeorwLP229R9EdPClKxT1bXUm+iyAgvfCdbD3TyZY8gxSGvL+Sq700Ygq9TV7XvSl3wb1T/5g7enBEPdeYwT2W2kG9a+QgvUAitjzKuTm9cqTpPXuxoj3N0U69NcOnvYMD6j3TkPu9wyQzvPV6NL0jbWA9wOaFPeJSyrxX0xy9X/HQPHP+tDytlRy9PycSvafUd7xB7Zi896e5vEVvED2NyK47aPnZvcuZtr0VM4a9BeWOvCeHSD1q8u68PxEIPZ/KtTz3g6Q75zalvdk4C74X9bO998D1PVKAK733MzQ85U8JvfvxVL0z2ag9tpikPWCWNzxaS4e901sCPDvfj706sMs7nXJ8vZadHz24cqK8rZWKvbR9Kj2Te569l79ZPecZkD0qplm9JznSvJccir0y7hO9f9IPvvVOgjwTJfO9ugz4vMXvRD0zMnE8LXU4vBbbpj3jg5e8vXD0vaeOtT2hUo89GE8yPUgZrjyjOtG94APIvLntRD2gYl48nrGiPBZcqj3mNKI962/hPfCQvzytLwI9YAgJvQCpcbnNK9Q9jZsJvNWdQL1MfQQ9H/KBPeMzHj3cmkY9XSBSPNJqprwK0xI+u0jMPTsBLL34g7G9G/T3vfu2+D3gjjg90H4TvFAgjj25vTe85X+UPB+nTr23L0i+O1gyvZfrS72TNny70CwwPR8TkT3dKLa9GCTzPHLIUD1rYZK96bhDPRAEJT4mlYa9k1v/vRXWAT09zGW9FzBnvHo02bw9YWI7OpEtvqeyjD3ghVE7WlGHu/UtBb4tDSO9ctauvNP54r0RwYe9Xcd7PWeOFb56MKO8D+gnvoDg4jvOWDu9b8YYPC3esztujDk9r6yTPdVuFDywVxy+o1rpPbv+oD2YMoq9XWjQPQIHDrzbaRi8CuzWOyXA/Dx6AdG8yxjqPFhkN72kwR+9aOCTPU8MNrwgTno9AMoxPNrXyb0aBxI+GIJQPV+agj03HO08A66hPSImDj2bi4C9QFV+PL9Xaz1Xzbg9aNWyvWABsj0OeL89EiXTPVF3sD0vRB29a3rXvScZXz4c+Ks9i2MLvdjyiz2Flj68T5csPuVEXr3rUnY9O2DPPcKI/b3gP9c7zTnjO1FWtj2ocO68PmcOPjv+K72DDCC9l9xdO/g0pz17vjM9V6givKVH6zwRUhU956gOvA+Yoz1/iao9ak37PS3d+D2F+Wc9ynV4vcCxbTxaLC08P2A/PojvLz7Xtg+8x6I9vTqQbj2P3eU9r9iqPT/DvT340Rc9BQV7PYcv+j0CKzS9utNsPa+egr1jepe8ZfekPttZsT2QzIg92IboPdGCwj0X4iM82t0APAUrsj36x6G9ULjqPQUGqL1RvyQ9fy8TvgWrTz1lpwk+XeKmPP1xoj06U2++/BYePK1zSL1cuTy+MnDJvO+ASj1V3OM9vZnmPD8ED73gpKa96IjwPfdAzj2PpKe9C3TLvX1zgT5BfMI9AAV3vXVRsD3z/YK7AJfZu/PpuT1jaGQ9eNh5vSN5Ub31bYi9MyqWPDAaObwaJgO8qsQYvYpfbr1D0lA9w7qovD+Vnrxz+8C8IoKKvPG6Mb1d+9U9S7sAvR0/jD1gn8E9BaFauxexkz35Kps9CsnSPE3fwjuK36m8oLcHvZvHpT1XZG+90lg+PDE7CL3wC+Q7UgOhvfPGmbtCxbk8/3lvPXej6j2spSO+6Dzmvbxql73hhhE9c2ravaP/W7sBZSA+jYnKuwNBnb0HmjQ7+I2kvf0B27ydeOu9a3kOvQcjWrxDbi09am8JvhyBAz3CUue8GpVQO40FajuQ1gU9ZxIpvOjjq73yDi09YLyVPZeHar3ghTm8UwkhvLXCKTyAU8m9a8xMPUpuJj0wTjw8bcA6vIdE4TyXEoq91kWUPKpL4Tvg34I9Ki3SPbMO/zt6a9A78MGfvRA3NT2t8q89LycLPYIDdL0DnEW9sxI7O1dOBr2jrx+8c6obvnctsr0dTSE+8GUavXKDyD2C8gQ9ikx9PUZdMb6vLZY9R5YTvl/Wabw1bAW9nS1IvOU0Rz0aW++9MkJdPZJowbxTQMm8WRcOvt+PPD3g3F2+YpojPq3K9rtlm1O9TdyouxWizbzz8X+9Uyc4vfI+Sr5LFRo97kUDvntdNL3dP4W9nR2/POdn6Lpvn8O9MXMKPZa7qD0KuVm95zYSPcN8BL6gegy8q395vcW6271FHi+9B/SWu7dT+byH4+K7C35ivb1K1TzSe+K9JUTXvME3oj0SJZm8Fdq3vI9tGz2bviA95XHovFI2eT1DYD29C8xPvc35Yzsy+EU9qiSMveKaKb3+58q9s63JO8JNJ7xaa/S9M+75OibNlTxnK1O+zWYBOqA+Aj7IKOC9O7j0vZHOmT21oFQ+u1I8vcDMpj33UxO+TRMmO9D3hT27ypk9X18hPncszL1474+9OnRGPsuM/r29fPO8rJcUPLeo4j0Cctg9i/fOPU/BCD0qD148xEEUPYGLnb0XA6C8GmZzuz2YPzurxhc+XWqCvD4xJz4z6rY9M4yEPAosWLy1ydI8gyYGPZRzvj14u8Y8Ova7O+1LIbyD57Y9x0E1Ou4Olj0VWok9ndZ3PbltTD2LbLA8FQDGPXB4nTx9XK09xiS1POP4XLvY5cg9+y+vvDWFCj3b/j29jd5QvQmHGT7VAl49z5vDPUtLzT3goOA85H8avtUt5jxnYro96n5fvYj3+704eKe9u8D3vYW3GL4dU/C9H3fMPCmLvL2FcHK9Z62NuaAMtr0QvC87Gl0zPcP6Dj0vcC6+a7VLPNOKL73FoLA9/PRGPWLgbj3v6Qi+TqhDvfA0tb2vriQ9t8eyvT9sg70Sfus9f7GaPj8phD0DwIE+7/YMPsgYaTw+whW9mo1vuWYhub0AcqM8d6BhPQWhKzzHcCS+ozLAO2s3nT3H/Ck9hzo9PRm5PD0qw3C9j/3jvWsrsbxXOpc9prgAPWB57rxv4vi86HUcPso3tD0LsvU9uCy3ver9lL0a1hY9n4GnPepBFL2ThJM8HXc1vZ+TLb5HYTS8ZO4RPa/6572bkoS96gESPrOCEb2DEqy9LfvsvG/QqL1D5AI9k8ElvC47nD1gJCG7naYpvABp6DvH1d69aqDavFtUgL2/dUi+M/eVvT9+VL1oD4e8xbslPRtrgb3QXTy8a7DHvW/0nL1bdUK9WPR9vd0zZT0oioW9Lw2DPf9Zy7wnxCy+RfRUvvYLx7037JO8rAQavj8Hcr2AE3e9U++KvTtRbL2NWhS9J6t4vUpR1b2XGyS8v+ecvRcoQ72DJzI+BCsfvlzqrD3dPaC9lWlEvTNUCTyIqnu9MviHvf1B5b3gXVe+f3jTPRDsdL3TYKa9dwV/vUcC2Twt5Fq+zRnyOtr+jrzaRcO9oLBMvcfwcj1Dyh89C2QJvbYwSj29M427b2cFvmf0gLtqCPg9Y817vQs/bD174hW9CgvFvRpQlL2zec67d9j/PRGNCT2XPJk94JNcPFkAAz4FsxA9gEbWunZ9g728Qsy9Z8wsvvytnb3pL7U82hdtvPMB+j2yg4A8J0D+uyGBAb7NqPO8OHAhvk13Kzzviam9wzZyPQ87hr3ARZE8Mz1bPWO0qL3DC/s99yr+PPTrh70ltIc8q4Q3vfMtdL0ADCA6/WtrvYiOqL1tVdY9w4mmvDfuTTzXI/69REVFvleH5T3tZNC9GjiZOwhImr3XR8Q9mmDevd2ttDydxOs9RyQSvj+AxryALB88akiFvaNxH75v85G9Upj2vVK0yL2V/Yi8KmrjPU1qgLznPVW8hc/EPWfN6juf9lw85+G/PYi6v71rciK+6PX1PaeTdz1N9t69pR/zvcwKGr47/1+9sLVnvXsgBr3nz+Q9Ez6ZPWSrAL27LZ89X9K1vddJMrxq5wi+406jvQxkjDxLUK+9B7nlvRs6kr00lr+9KiH7vaMcUr5T0Mm9hSOyPNo1XTunNPO9OpLQvctr3jysmRy+Jy3xvKsfgL2740S9Y8a8vHhcP72mni6+L5wtvmjA9b3Aqgk82zw1vUC5LbyPUoW9UK4TvaAueb61yg29y4opPSXQCL74avm9CLz2vNU4q71X6Tm++LfJPOvknb0NUxg7D8a+POcM072t1Gu91xkuvSfop7zvZNU9ZbT0vX8YDz6TtcE9av7DvRPMcr0bsz4+1QLjPfj7M77XuVG+sHlbvpoek7vInow9HS44PDjjUT2L+oA9dkmqvSLshDww73k9kvCyPTvriTs5dCE+lKIIPjNgzz1o5q49b+0rveVkBD7YIHM9DUhvveUTXj1nwUw9jrwSPUhVAj3b1RU+UUIovZ+Y5z2Dw0C9icCqPYC96D27KbU9IwbaPEA1MT1Aic887XlPPjr2fj3zTeU9y4oFPRu36T2a61A7n6UUPtr5uD2AnvS7rb4ovShf0z2TlZw8o/NnvT/yqb3U4Je9ExUKvS0KNL7BL0Q9GgdDPd8TML6zbOW9fU0xvDP5Jz4D7Ci+bmwHPYccWT7gMLK7dmW9u/gxED0xDMO95U7oPKV5vjuwsai91ugGvjpOzL1Vfx6+Gomnuw3WAL5zEgq+1yVLPcvXwL3NNYA8A3+TO3V9tD12Mqe9YrOiPFNb3r0SAiw9YtfcPAeTNb04mNE8S2efOydOlr14ROQ8HLaTPBkQDb6Eyxw9wlxpvdcyrr2aY/k7vCPFPC3osj3K6Vy9DQLZvWXiFj4HMEy7zR4ouqNTHr33ZIW9uzUXvh14Tz31rZU9nwHDvaHplL0X3GU9x/vxvWNh2zxauP29QPeUO32i471BYyO+MA71vYM1Bz0AZus9Km/nPJGoGr6d18S8nPQ/Pc8pED44Ue085ZTJO3gsUT2ULCE+3KiuO0gCAz3kRIM9qFEUvv/lTr2Pix+983X2PffArLxntdI7cOORPEbQnz0WAzg9QgyPPQsmGD7b+oQ9zQtsPUXWWD2KZvQ8KlQHPur/vT1ylO89B2mNPRReJD74NOo9TeS2PSL0RD7j+Qq8y8rHPR9ZzDyecMs9NXurPYhd/D3r9B8+w8ECPU3trTs2UhE+90E1Pdg+Jz6rkP89s7ayPbORqL2tlgY8h6K6Pc4ItD3H4bm8u8PQPfvEhD1GQwO8TTQROzGPHz4A7OQ6PWYUPUfxZb6QQOu8IIi3PFL8Ur1nejY7u2stviJpPLxnd9a8YLT4PV3Xpb1D4iq952jYvVo3qDyaBSc8HKAMPU9Wlr2HnyU+HzWCPQWSVL27DEq9F1wUvqSZj72Ja0M93/CHPcreIb0f0zo8u+qOPYNEjj1jstg8P5Xgvb5unT1nt1Q9524tvYXHED4wj6Y9gGgavProDTsE6Jw9gAK9vGdKkT1fTc29+g5YPTPVejvCd9k9n/GJPOnDir3yvsG9agb7PeBS0z15gJ+9oz3DPdIVVD3FUzS+mvC3OhdWMj6Ly9+9NLyQveiKwj1jBby8iHQ+vfvyTr0NSFg7zWnvOvsed72XZ+O95Rt2vQ01izvikJe9ou7TvZ1Z7rzIHGs+L6MHvVknjjynvQM+i5UoPgITJz3JRpA8yjoiPtRvjD1dUf+8sdWTvf9mvb3PtFc9BpUTPWPtEr2XKcG9UkK1vUm1GL1TSOU65zbDvQcGtz3Kb488ICtxPSrPOb5Brau9eHcjvWOXobxHt+s9OlMgPHnlnL0nz8+9k0J+Pe/KnL1P+tE9IAZivW9TJD3jQ4o9pgPGvAzHJ70N/vq9YJm4vaFtvT1Ldzu9N6ElvHO8U7zTi/W94ZAQvhWpLL0tVoC8Q2BHvuaoGr6FtZ+9CEX9vXusSz38Pwu+hwl4PIH3pr2I6B6+uuxuvecS5L2nvXo8ikHQPELx6L33SrU8GFdCPOHQJr4XytU8p7yIPcPRij0TVGA9igpuPNroGD6g9RM+KYQJvmiDy73b2oo8D8oiPstjlj0sxgS+ZeD1vf1WxDx//628+BTuPUMa07zgE8Y9B6T/PcP4Jz59g5W9V5aQPUPkaz3L3Ng8Y7qnPTftbD0fogS+tG46PQoujD38K7U8hnM9PjhiV7wD/wI7JWFHPgflLDsAMJw8dx1SvU1KAryfcSQ+VNecvDOwBz2z/2o8Uzg1PVJcpr0U5rY9wOBxPA35kDsfjlI8M/L2PWeUkbsNrow9XS3LvHFeCj7AVGO82C7XvRc/9z0LjwQ+V3oQvea4Cr4vR8q9N+IaPqPThr0A5hy6M1jRvQ2Xhj2rCUE+8FiFPc5GlD21bzW9u7G2PE1GTzsgPyK9q9WYvd/xkDwjl1q9L3ZGvqS6pz39YEC8qD0Fvi8tmr1qBt+9F4B/PUN7Uj0DnBG9r33DvT/3H73cvAa+aSvIvc2A1rll7KS9Q43cvSUvsr2UcBK9AIttvdB/g73obd+9Wy8LvpOCTT3Mmh6+N3kqPLM8jL1LwiK+UyaXvXIKiL3i3Bi9TR3MvSVwkr3gePW9OnT/O2CMFb3L1mO+NTrhvS3FWb2T+MO98hvbvV1j7r3Q2JA9r5W+vZIjSb4UZ8S9s4/avOyoIL1oOTK+ai/TvWsA3b2j0IK+hH6IvoOd7rwasFE9SFfTPffsnT2zvGu7/YmJveAnJz5/VDy+ADrvvJivwD2vZ5C8ssPaPOZkQj09ttC9Pyx9PfBOCL4yYbG8+zUJPlBaUj2aqTI6KgxLPRUxdD0ltRW9d9lsvBr7zT0XQ0E+51gjPcPLZL1UNwA+5HwaPk0LnD1rHlE9CHbePMKSvTwNJvc9LNEAPmfgyz1A4bW92hXkPDsPMz6n9LU9knoRPoD887rHlmI9QPH1vQDXgboFBgU+A76vPaSFqb3AnnA9zZ5rPBPQ6zwgaje9KlJCPJdynrwNGgS+10C/vQ1tHT6XG1c9KsVZvVJMAL67eLS9KpDPvdyukT0AvzO8M9wEvADNSb1+YS481iY1PZP3yz0+cIC9QP2/PKOqPT3iDY+9Rw/7OmOSpL2PWNc98qHZvCeN+Txbxqm9BduSvIDrTbvP1Qc+nQ30PEMu0z0aPb+8akskPgzsOr1Sk6E9qtYSPhViW7z3MT89oubgPHDLTD1f6w68ijI8PdKF+j0be688P6wIPkDLgDs9Qd87WswZPHUk8r3gwgM+KyQ+PagshD0rUg89KHuJPT+WjL2qu9U93SpbPXSNgr2odnI9rTbDPdi7xL2AT/k8g1oXvkMfjj2DvF09/uQXvmPoEL2dv/o945SovbH7xr0fW4Q9NWCEvJhHBz1btfk99yIivnEcAL4raLQ9j5DXPNM8WL1kiSq9l6J9vTfCLrxtfgI8wx8EPvV/H7xJn8C9TjUove3A6bvX7xy9t5gTvpJqib2QfAu9Co47vJavxL0qTkG9EA4Pvs2WcL15rDi+famkvVcevT3VV7K9NugTvviwmb3iMOC8EKxIvN/Lrr32a0S97e5avRAbjLyFAzS9YP3nu7f4173LW1m+Y0ywPZjDgL3r/Cq9WnZovXQphL2HnRe+4JUnveOsa71HvTG+MIrEvTr0+ztKH9C9ET4JvrCE7bxVMMW83pamvUf+Wb4fIZK9Kv2yPNwpR70HpO+9BYjUvFK7DL46Szq9rVEiPcXf9z1FJfu9g8K6PLpPojyXAo68450Xvaiuzj0wqCy9EAi+PMVWzT0rR/C72AfdvLN/sb2IW2e9Z08SPV80Bb7nA389urLyPBoPaT5imiM+IfkQPbRFg72y7rc9KrrdPeDQkb0t1wq88wuKugK2pTxz4/c8F4bgPf4bBr1zTCC9s4prPfjfzrzTMUG8b1w6PeJRpD3Kj2W9jIqhvUaBCz4Xtas82tpfvAdZQzvSnYs9o069vfcAkT2I1EM9ssCCvVLB6r39VF49leYePadTh7372Kq9B06gPQB3iDzXqgO+FjwJvT1Fn7x9fqu7aGoAPbjwMT2l3MG8wca0PQhK5z3DZP495EkEPss8ibwuaUM9SujxPZAxVD56FPs9+5N2PTUSuT1f9xm9B3HuvffF0rw4AIs8kGfTvPaaFL7c8yO+ZmcrvuIMLL1vy428klFBvSvGqb0oQEW+ultEvKMio70YiUe9UlIevXcAFL3oepu8U+4lvYyqIr0x5g+9h5wtvFW6dr3JVZq9Gii6O6Ml2LzQs6y91DE0vnN2Tb5A9jS8OdvFvUqNXbzzNqa9Sv7zuwJjOr6N3OO9B51hPd04t72T1gW+404RvFtTGr6OdIC9CDVFvkgX071uug++/xdAvtGIwL0rDWO9W1fYvfuzB74SOES9vpO1vb/PFb1Szla9rVATvRCo8ryuNCc9D6+7Pesurb0aDCo6DW7DuwJumj0n6da7KTg6PeMPiLz/zBC+5E8Wvdpc9D3bgyC9wXG2vfpI+Ty4p3q9qgjtPUrpYz1nCo092oTlvaDPHDu6KaG9VBM7vbRjmj2ShoQ9GFnBvdshML22BZc8c92Uve/Ymz3z6VO9kpnCvDoRFT03dwC9YCwlPFcXur1jd9a96wCfPf1zzzxzqNk96w1/PMgEfT0NK1W+8m8kPdq8TTw3kSK+EkSVvbs1mT1Qgnq8tscpvIvDJL4jGbM80wV6vZmYAr4VRPi9c2qOO4Y0M70Vt5e9oAvivUgIeb3wbEk98CTAvPBgOz3IULy8DfZmvaNeYL1gTIe9Nzn0PRrB8ryV6cg9WdwePheEAD1iDbk8PZ9lvdkawT2DAOQ7+DXePQQPCb36A6w9sNfavOV/zj2HQ188wIOlvDQOK70f1cU9gMZVvNs0br2DpKg9oyACPTiGyTzwnXE94r5yPUgjAT6TdsA9zz5dO5fqbr0PIKg9TW7hPF/pwT3TyKm8evLevf24Ij73HaI9z4ALPmWltj03oLA9053LvQsJtD1oHyo+WyrsvZ+DjLytDVI8jfz4u64Tmj1CgJ+9zV5TvS3Rd72P1l69qJkLPccf2j0fBJa8qbIDvgOgez0NRm69U/Y/PgAemrpr9pU8/6yEu9vKFz1hprE9XoqgPfuWOD6DA0496lwXPt/JrLzFGgM+w/nRvKw+nL0C+pM9GGbxvbqB2z3iktM8u2LAvO96Br3fYIo9SAgJPSeAQD3FAIS9ayxNPQ5KpD1ga8M8eshWPCPUqT3KCG09F1AXPh1ueryVtsU8LdcuvSND3TwgI9m8PyIuPk7tgLzDOHK92sa1vaMXLr6FgCo+wNOhvVl4Iz2Yt6S9LZ0xPJNnKr3VfIU9nFEgPWWZkr0c3aS8sBlcPXIR7r2vQ7Q8+oFRvnPvdjxLaa+9LdK3vZs0VbwN3tW7MmmZvQnLm71A4Sk8PbmIvSekBj11BYW8mr3jOE8BSjoHwzE+kx4nPW2QaL3Pt5o9c3scvb3dPrwCQXM8KyFFvQZ9oT23Nhg9e56nPdv0FL7HuWi8NCwTvr+M6rtbPts9R17nPVAzYT3sMIU9f4SSvR9ckz3tJ6o9H0hGvVCp8D2rbck90L4CvN2Afj3CzV8+4AgzPcW08T1HKiG63xxAPasKCT6Ar449YLc4POD4krxD9R49m9ZAPirK3z0iMoI9QnzIPfw9nz03mwm+ldHMPR+tjj3okIS9h+LPPEi8pz0CPJC9z/WnPRrCe7sOskU9QzfPvffdu72AjQE6G8vZPd1f4btABxm+UmsCPYpJhLx9iwA+l8lIvTxjFj5SBFu9gp0rPv/QST3fzlC8WqNRvfKp1L1Kq469m8JFvafouT0Pjsw8Ulf4Pbprjrqap2w6", + "polygraphy_class": "ndarray" + } + }, + "attributes": { + "span": 8, + "factor": 0.05 + } + } + ], + "config_fp16": [ + { + "inputs": { + "data0": { + "array": "k05VTVBZAQB2AHsnZGVzY3InOiAnPGYyJywgJ2ZvcnRyYW5fb3JkZXInOiBGYWxzZSwgJ3NoYXBlJzogKDEsIDY0LCA2NCksIH0gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIArzN22wLjkYPn6zfrNRPiQ6g7dXOGq3dLe+M6e/5r6AuA28BzVEu6a93T06s1Iss71buBkvm7wDNs64q7TQuGk/6aI7vJU64ryvMte/UL1NMug5fDFnr9G06r3CuV+3Ojx/NQ2/LzUptmq55TggPHM7t7rztE01zjurt/GxbbzJvIA6bT2crAQ8yTUpucg1Jz6WqEI+PcGTOpItybTfLfO/CLO3Nek9Jbh4ugS4UztDNT24Gzg3LsA7nrk+tUa22729NC00PB2Cs6m9u7Z8tWu6KbF3Nos/ljEfNMSsrb/KprYr7UAostM0caitvJI8BDpUOka7nD2bvbI4YkDtu4i4YS4HuDS+YyxAvJQ3W7szPkS6J7WCOuy8RzM7PW6+6TEoNEE687xIvS04wDQCNIs1cblvM7A0t7l3P5U3xLxBOcy7TDqiPJG6tTubNpQ6lj/aswi6HruHuu+sdTVtNJ46qCLQPTy0cUEBOdu6Sby4NyeztjmSN6msxroPviW32jraMvy8izEqNhK76zBzK5K8uTV8OFU8NzyDvYG7HzgcOB84tUOROIs8ojs2OQu1EjovupSzxLc9LaFAeL99OXO+jbdbPB0sULy5uXA52LntMtUpNrlKQBI5DcD3MUu50jpXulivCjjtOs28WrWZtzq5ED97Ngu9WDs/QCE8FL6/txE9qbkaNzI6arufq3zCGbwLtP68iD64vQq3LzDEPb69pzw9Idq7ZTdfMs24eCwqtkQvTDlYPvS8REDPv9uwtTh/NPu4qbLjt7e4zDq2NYu5MjvrNIE6CTmiunu4+jniOFqlgi8cPby4YDh4sveyZTyaOoI6OT1hJXU597QwNSqwNS7DOIy6L0AGvNu8ojxVOv44BzlFoi672ixruc07tbCbuiS1mzaDuJS6zDPXMw64ibdtM8u9ob2/udWy+TTnPdw6HrHepAO8vaSetCo1nrooOCI+9q5tNoU5a7YsM3MiQC4vukYm+DfOPa07TkAkuvs63jFhQHe6uLrMuD/ANbgTutAweDWBP5o7nbgwu983SL1TP7g8grfavmo9Va/0PGG+zLhfHQMqM7f8OEW8jrCzLx44sTmAvCO+HD1RNf25ND5nL7g8UiwfQAU/+LPGOyo5eT24u305PDwJv7y8FMBPtL45Aj6+LIQ+hb3QvhyrJTYvqCPAtK04vVs53jWFuxy4PbwDrKQ747sIOD64WLrariS8brjLvNw/hCiZudkyMK8Ss+o4Dzo+uJu4Z7SbwBC+eD2UPviznTj7NChCejwYsKW7bb6DMg26sL0suVS8wD4OOxWg6z30LOS6GD5QOCa8F7IBu4i9aTujP5i9gTg1ucu3vbjpujYqprpUNG6qpbNDu524CzoCONK7Wy4DOq2+WThNuZE4G7o4v4O+KCooNDy7HDmlvjus2Lw3uREq4rontgY8nbivOoW8PTjEPfHAYLqeOH+y8DXVuIst+7CsPBI0ZzWXts237LZQNry2ozQnQPg6N7XOPIe2FMAIvHy/oLW3JLU+OzUDs6M6bMCKMys66r2TPGo1pbYQOYtA0jHxM1q3zLqlOtm6lSykt6o3VzUmPBW4UbTVuxy3CTYOOmG79TpsPZ02gj8xuvu8Hb/8PTw5Hat7NIG85EAjMAAvzjmyNyozU7qLN4c/Yj1fPhe467sGsCIrYDzFvh4+DrHVtgy8n76WOrEsKb0uvV+1rT4ntAO+3bNdtGXB86pks5I5ZT+CPE20bbwmQZQrIiMtplcyn7CXuGC4MahZuLS50C4UtAQ+TcFePPw8JsB8tfG1ob05unG8Aj98OxY9xjmEvDK41DfjvLQ5sbP/tbA5HDfHtaM8U7ztOL849LQ4NQG9ZDvrsS+4MjyiuaK9Or7ZOB+9BT8qwMk+wTIxrly4YzbRqGo8UC/PMNG1SqvtNNe+Zb3yOXgx47G4JJA1Ubg6ukQy1LuINs++HjyQNxk03TupPg88Xb8evQC5riYkOM65+jELuuS4oL1ju2i9z7s3PJi7REHlN+ox3rqaOZu4zy8fQSaumTyguXuoFT8EuUA/qTmAuA85yDv5OEi+0bnss8Ss9ziwMVe9FTbiOHo4UzysOlk3fqylvuA2pTJYNBu9U7w2PBCpdDlAJ54ngjshuCcuZbf0tvK0HDOptwY9KLv7sQm3yj1KMiE88b1GNB47RC1DPCO4oz2ZQM61IbfQPVE+L7i5toK0Yb1ZuwS8JLpxqH8zND79u+A72bJVqmY5frweNlQx4TegNOlAGrk/uPy4crgZucI8rz2RuKm6izdruBA5fjIQvjE+Lz/nuDS2kzRaNUU5BUCqsWO6hL3ZuT6oLj8kuCkzNKTBPA5BP7jVty08dTljP6w4wLW6OG88kDoPOEQ8rTyHPTE5WbGyMNM8ibrmNUu2XCcdPR0y8ilwvfg5KjlUQO20AzP7M08+Ga53NN04+TEltzYySzwbvEEwmrnIPBi+ebgJNkM+NaxxuIY/y71mwAo3BLgWvKs5zTODuB+9+zo0OVmuYz9IvBq+ibnWqckzuLOiNQK9xj1CrXg8fDVPN484KjckOVE9SjKsOb6twz1puTQ/JKm5vRkwc7m6Ojm5JLePvz232cBWvhU6STrPNry7G6phm6K8Az4FOxKz4iarMhXA6bN1uQK8f7QxPyA5kriVOJk9ZjuiKy25ljlMNik7FTkzPEi4RT1TMidAg7nyPlUyNrm+tyC1yTYvOJe4PKZJQOk++zbdKK8v6DgXvB60rb5jNi05u7dMPue83L0vMzA8vD5Yt1E87qiGsRI7ODlOvug9hT0BuVU26DcsNGe4X7mLprE8WTjutSw6ssGYPPW+zbV7vC69pTx8t4s1AaqiN+ssIr34O+a3Or7atgE+zTqUtZe1JbUnQBw24TYfPKQzJbRIspWsxKjSOaYq3DkqrQgt/r9VO4s1/DvLwS1AeLBvPCm85zg3vP24qD8asvUy9jruN9Aw1zXPQGCrbzI0PGw8wDwcOZK8iT6WvNg0CbobrEM1JDXANnQ+QjfQs7Y7wjzpvMc4nDnDtIE9zbAFMIqx+iNjvMO9YT7Huu67T8AduUu9kT4KPIG5gUDbOzO1AMGVQI+9lb4XPOFAiT2DOMI41DoSOoA0qy4CrAi6frTGvkuu6btqvMIxkT1ZO0i+67uHO9y7MLNnOL+7vi5WvdC4HjVfvgw3B6VrOCozdT0CMN+21C9ZOEEqMimeuU65nL0AP/q8i7m/uSk7uLT+PGO5dzSvukpAwLz1NBI5nzbusSewnCm0sLY7bEB2uHu9pq0pQW66jj62Pm64jTiDPhG2hLKnuA+8MrnlvF0oKbp7Mzm+SzWrOvq//DXpPNe8sT60NqS5I6t3ON0sTzheu2sxqL0erzu74rnyPF484DhfvBC12jyJMKNASzYlMvK0RjDhsKo5pztKulO9WL8QOGq8TsA4NvxAN561Oj0tVK5aO6W0RzQmNVi58DuZsQy6Szgwuzcnq6BYPJg3aKaLOpA9djhNIT+9Q7zitOC4/LFAKz04g6zJNyAs57+Eu5yw17zNOB8+4DzVsvc9wjBltei417Q2tnQxIzE9Gv42wzyZO/G9HMF5O3i9MbOuvDW/VTgTOp24L8FfuEU26r3eMdejojinL8m7yTwTsf2md7sYtxS7ibHZPn29dL7iPbOyWrkpPNi4Tj9sOc63UUDZuPA5yjQ1PT8+GSgHulw3bLkHQF4w2LXpMWO9xrvNPEG5MLxLOL48wDn4Ow66sL0BPiq1A7RQPXM4SzdUQCa5bDtMK0w0HT4QOE44SjzXtbe6Lrzevx1AarwVs2606zSHOuI6qrhZsYU09bNuPts34TlOObI8yzEwvWU2Nrk7uLE49DxyJfE0zz60MzRBhjgLvwc6GTYpPWM5brDmvLCyzrqluLU4rj5RNsm8HTfJPOG4SrCFI0e6MDm+r7Y2GrsAt8c597XoPmW2MTN2O6y9C78avg09argeQYS46DErPgNAH0DVPBk8vTg6Omm4jLrpmnKxQLeSOaQ7qC3pPZG8MrK8uXe/S63Lrw4+DDkZvGs/4jyoOD+zrbv1tVs8ij8sPtK3eryCMBO/LDW5sHW3Yb4cOEO4rry/wQunFz+lPlC30biAN/271DQhOug8aa6Esga7nbo/s+E1Tzttuvk9V7R4pfq52cATO+U5gLRKLCE4QL47uFs6BL2yNG29djeQqHa+qTzguXu6azKYPBC85SvcNos5pTHftZ+6gy1KvNjB/DY7O7rACrz0OB1AUyXTudqxgD0ruWW6ubegu9ovgD4rNQq0q7RBvhE7+6zGsWNCyDQEutK2mDxAL8G9WztYuX4/UjwotyA9WCzSOsE3xbomuR88W7V1tqS7xzYgQEW8MyamPRmtPTdAvNo2/bHjO8A8LkGjODY1ODKntWo1urRkMUU9B7yPPEU9jq8+wN24MD3bpf+7Cri6OmA4pbPftUW2Ybt2Pii13jwWPv076LZ2NjKmO7sxNbe8wDxvt3AyiDQktLI4mbf4OmK9CzDBPwC8bLkcOL8xnDXUNxQ5cDyPNrizYTmZPz+wzLtuPLSvWMDIOki4y61QNRkyrTn4ths4J7TpOew4fLtYPEq4dzrgNVo/J7OXtfmk2rRmOne+N7xFvJo72D6vrmexfCymPGu7oTPNOwI4ETIBPGjBbDk8uVO/Fzh/PWawnztzPkI9jz7wOdQsaL7gs7+6WEChseMvaTiVKcg++7g6MvG5SL3luL6o3raKuaC9Uq0FvhU6Ry3VvfO0BLobNV09gL9dLyCxXznTMgS6G7Veuk48dSWbP8Srq7kOvja/Vr5GNBI4U74pO7u3szBzPi07TLQhu07AwbnCsua7M7DrLDKzM7lmMRI3XbylPU+u0ySqOXczoDuYNOa4yTWTvPMuQaipsh2wh79kuPEtHjEcvBA97rrBO9Y2K7kaP8a8WjsBPF25kj0AtJ40KjRMsH06WTr+vjg9pr4hPII8XbyTtmy84bLutD06Pj2VPX+4srK8vnK6uDt2PvC8vbjDpnw0errJNpW3ZqNfOJQe+7YEr6it7LUktGU+fTi6tJM5V7WxPOo13q4qN0m+grzHvJMw7j52QBs5Ajg0v1e4Tbr3uGGxjrfrv/w5SryoMyZAW7sQwZO0ZzzVP+u88jdyt8iuSkEDvg80fDdYPEUu7jREtk40frX5OOq1CTZ+p4E8lKoYvww9P7s7ucS4fz1FwEdCOTwlMwerkjQrOCk5cji8LVGy17A9sok8wDjhwT85OzLKpDe2fzyVOy+6gzbGu4W9BLnmOqA7GzjNOSE4IrnpNmc6CTrBPKs5oDVIPMqmDrs4sfa5Z7mgsFe67bSTv9My8BSJukY5gDtuvhq6J7qFu6M6NLI7tALAFTn1vKwrcDRxPTy9CsLiMTQ/9Ty2Mt63dTrKu583Czg+PIVBRzYSuIymFL+PuYy2MbjgMJS6fDzJCsOgP7X3MJo68LpEudy0Yr2Oup+3/zozNDIyzzplsD82m64/NKm44cBMsLE9aTu5O/I8rS1QMvG4D7XtONE8drA0t1MQzzjHvZjAZ7jivBG4u7BBt889OjXPNPo4jrwoPNmsXTlKvDe+izoGNje79bqAPMK8kj40uxs5QrXTOFq4NrE9KQK87TkbuFGz9LsgwR2y00BHOu6kNbTAJWA4ubx1PLk5vzkENwYlYjm8OKq1l7iFLjI+9bzfvUYxgyqMMc4zJLP2PWe+obqcrpO+obGlPlklYjMKve64AbYVtSA9djhyvOMz+TeQPFI+D7x9ugi9fbN2N+Y73KwetdswrrouQG6+6TEMQPceFbK4tcaxfj1swCI+sr1EtN62tThkvmU3DEB0vRIyTLnQNuckIrnONzg/HLLCOSy9p7uPN/A9sTUCtc6RAL3WOA87PLeFt0E0/bY8rDNA6LO8tS659DnNsTK5ST2uPc64d78IPHq5UzrhvyQ72LzZOU8joruDtn45xy6tOOg/Qr54Pq4uMbtSvQ2yYDsVsAs+zr0xogK90TUYO7u2NcFeMv02eDbxPEm8cTnFPB2/HTUJuDitkDXMt2i5XyhZvFi8bzmYvFU5Zzfnvmy5xzzZu263ZTdEOga0yLixPfU+1Tt2LXe6pbouOLI2mz00OQO+NTz8uyW2ATT8PzhC2zjdsUc4GjshtS4/XTP3N1Q5vza2OvC4d7hnvAg3OzpTN7M+u51aOV68MraQOcs6tLSVrBK+trUgO5o4AThfKjwfSLmXObw23zc1uE/AYzypt+e6jDlGtj088Dh4OXe92TwuNOm1lzAbv4o2HrxpvRe+czwJuSM+SbjUvne88Tz9sGO4HzEEOHg8yz3CtU69nrYqNLa7qLuANTqqMygRul+zZbsfOyQ8Yr9wu/y9M7lXrcy9YLsEvKIycCzGuakxYLhZtLI+XT0zvaM6fjqYvI06Jz59vFe7EjxYNGk4czVANk69MDyuPFezkakgvh04lDj2q388WLWEOBS8CKaUsTYz6rU1sJw6/bZtvgA/hz0rvYU5Brg3NLU0gLNFuoa5VLuouk2surl1OfM9pLipM/83jTfcLPE5tzfzvPQ6GTsbutsodzmzsks8ukBKuoa93DTGOWOz0D1bvYs52bjhPvg/IrpluOA6MrbQqYAmrr8Wo4W55LfGPQe9gTp3tHq0UzpxNZE4vztNtea4V7yauuZB+zxovUq9tjdhOGQ4FLQEsD81fy1wwFuz0LqbMfhB4TUEtWA7uTe5Nts4HUCGvJQ3abtyOFm7rra5tM47WTv8vAgrrbkJvfKy8LTaQO02g72FuBY8LUBYvoW/fD88NvK6RzhGweoaPzVlOw68fC1nuxY0KruItve7NjneOoKz5SjLvcu0bao+QXO82z36O300CT/0NTo2zarlPGM8AblHvX+yZroRrPM8Ubd7qW0ryjp+wNu4wzLNPN+3gr/1OBW5wrz/uOix17hpwC07GT02OY28c7Kunso4WTnguT8tUTfTPaM5UDpdLaQ9jjbkup09lTkXt6y3vzRpN1Ay+zTNPkk8GjKIOyG8XjY9P/qyyLo3uVy8R7rutZ+9/yM4O0O7FD4WOB88Q7nWOl68HzuAMW04tLwpu8c4lrtpN3i9yTruvGs4ATmTuag4KzRQuAm82r+YNUK+Gi43tG851bRFtds5XDUPNYI3JL4OOuY4EbzQswipTrBXNbo9VDxAvfo4UT0wNl08BkAYPPozLjyjMB8mn7VBPoy6IT7/N5m94jUzwAE5FTu9uOgv0T8MuDy87T3aP5IbDDxePfG5xLfsPL0+gTgKu/M/P7hdtXo1Nz7VOqM2ajeSKXc4D8G/tLkzm7wuNouyBj9LPnS31LC8rDq3PjIRuoa8+zgJOW+6KjsNuQ00kDpNqEc3IrhJspay+bmosTK+3LePtBm0urPoq6w3/zozudC8K7zMt6G1KbovvUy3zjHDOHa4nLZquwaoyrqWOCW/wbXRNOIxY0GZNQS8HK4bvymtqrpTO2W4ea8WufU+JLVVP4M6tzfmNU02tr92tMM6QqqfvbWq0T79PPCrnTvdtdG+ZbtAPmK0yrPMtJ4/gT5wQBexuzQRvug9rLzwMmS8trizut24ULhjuKs6a7wPM988G7i+uWCzuDw2MkC4vjdpvHM5ijbttLW6GLtHOOo8GblWNy3Arbjzp0e7frtYua00/7F6wD7A27hTN3/B/7c2uI49KbYhNoUwQ8AlOuUyETjaQyvA5j6ZtJk006nKto+4RjUSvgE6qbaFvDO3Bz1IuLw12bkQOnw5ZT+esVk5Ry4vPcC59jk7shys3y0JNHCv3zJQPuI79DpKtx27pDsCO+Q91bhYs4++SLb5O3e3IDkZsi81l7x0LfvB+7GGvtM8ODp3NxY+l7v9PnU7krOLPG28mbreuDu4OrzkPCS0pDWQuEi/UjSmv2Wseb3zP0o7xS4OPcW6WThlMjk0Fz3cOZ80n76uu9qv+i2FvNNAET7ROJwsyrKeu/UsIDT4vFk1+LCiv+K6nraNP3Q4WL3HNzC+VTyKt/6tTj0mvZe9q7gnPBS+qsE4t2o4zTxpt5W2njx7vze2GDIwNxS4aSj6wES5Qjfcu40rJjd7tXQxtLudsuI4BjGxuC0zuDkawKM8YbXPNso8fb2tuZ60RbrxPtu6criKMs+8VbYUNVO1+q08uA++JTUFP7YkNTOLORS9zz55Moc+3blGPzQ6bTh9M/SzzTx+MN6/eLzysfY0PqvgPM6/mDBGvxQ6DK63Num6Hj0rPKs4JbCkOKi52DqZPkg81rnINSy9lDg2N3u/pryItNC017w5NgY0N7IMujI8nz69t+a43De6tXaw6zmjv0Y9pyyVtratz6jtvvs9SikXN507FryTN0i0xjpBwFiu07jrNoU3qbmzuRWvLLu8Oui10MH/tSe8hr7zvAEvUT0DNdq4SzdYt4+5nrwCvz22DzEvrqi2kbveOEW9NToCvAW63r0DuM07IDjUOy04arxLtT66Uz3KvCY7JTtSP462szmQQPG4JL6Fv7Q5iL/1tf827THPNh0zHj2eu2q5LrqkOjQ7ODe8PLa8qz4YPuM5IT+gvjK44rnGOTO8Dzp7PZA5ijTnu8a6AD08Ov+osLYNwEm8pT8fvRAyCDwFvekxgTtmIr1Brb48PIixLToQN965VDNuv9M4xTQcOTw84jW9MAy7s7m/PL89n7PjKT27sTxTOcQ/B7sMtmgzLDnnsvy6DTvFOVW7bD2uPEwwGC1vOOW6sCdOwAM7P74EPki1xrIGuZy0rT36wBw9aTXUvE28tT6Qu528jTxrNYG77zIavGc8PzxCOMk1Cz8kjbq8MTcsQA+8ybWqNuaq3Lt9PKRARzI8uzK+ITRqPJo37Ji3uF68rjpPOy++XD6YOJk9Xr13vcSwBjgvP6Y5xLMbvOw8uLuEPo20XT5uOWKwKLh0tdo28CzAuE2xBy1CwFU31rudvMC+Lb9svay50D81uLAxaDYyMPWsyLzOPTs/u74ZvHq0t7sMONS5VUDDPM0yHDxsPIO4iLoALeU6czBPvmy6vKzYrOQ/i70LOPU9i0B4tt03jzhBMlOu+jYRwXU5CjAfsxhAZrlztvc/qLpnuLSwujqmMiy9QrjcuPGs0DaxNhu/QTwLNIo9GzdoPHc3Yz0uOESui0AdO5c4G70nva80qDDquIMwWz6QOZC8I69xujS2nraktwW9FrB8uNzBG0BbPAW20SSwvMg+lz8EMRg8nDFZvZe2OTBGt/+y2a0trbAqPry8uSO90zs2wJ08jD3WtDXByLUcrAu8H7gfPlI5Zrtkvju11bLwN0i4GDi+P4Y6KKrcsbW1uTwFucopjioEuH29KzXPqwA4RbjiPAS72T7+vvU2mzdduso2Hj1vwAY4/cC6u4Q+p7iyvFayI0BUvzW8/T1uP5+u9bwxQGE+bjmAukeqIrFLNc49CTtPvII9AzV/Od49cryWqEC4Rb6MNQZBXL8iqCA54S89rzK9czMOumHAxzyrO50qVTNMPC4zOzvCtD89IjU2MhW9lzSouhu5hrpEvEBAUz2uP+S8jywtvZG5WLv1PFe2RjzVOJtA670JPZY8ybsHPGE1R7OXusG5GkCYEUI6VLo9ulQ8Zr2ltlsoIS+7sVU2iznhOeO7jLTHPCg7fr2gNeq/AiqXP5W7rbodOiy+ELnGOBO5jbP0Ob82RTRttew1669TN/O32LHru3uofbp1vCM0zzJDOua2hzZoOB43m7rIMNQ1Wh8swCMxbUDDtdO5+TUANcU8CL3aObk4jLBrNR48+zqSvJA6JKyqLK8sz7DEPFyterxMtrgx6zzTOM66+EA9NBo927BEOLc63zw7PPo6C7WTuFI1dzshs0M8zz1Vu7K6frDsNDK4aT3JNg0pvr1DvX80RMANPBCxfEKeQM6xzq73O9A+jb4lv/q4qTjzN0c8zLyiwN06oLoJwKQwWDrQr1O3GbHutWS9uLksOCY1YTH2u8+xW8C2Mbc95r2wuEQ1Az0st4q54L+fLhZAcDS0pSc1rqGBupY6OTOCJM67Nz1ROWy4Ajz2t0K6FLoWv4s3U7/qvCfAgq3PsDq1LLywvG43aLgQNRW7yzE3PbA4mbYdNLSzCSA2tUKxyTLyuxC8jr1nvt68GjEkuxw4cijgvz05fTrzu7k22rpALg+9LL8gtuuvXba8vGa2TzS/qdcxXLmwPLS8zTe6PLI3e7I6uCU8sb3Du1y28zwUNr+797TquK651Tt9vW8+oDpdPGi3zLC8vTO45jcvPW682bXXsJW9uzjLuUGyO7dRujef5rLztdS5HTWdPnow+DuQvQwxZLzoveq5xTyaO8czIb5IstQ0njFgvx+7GqxKOGTAMLGUMMw6izmpOGa6or9KtgI8kz2vOd82EzZzuCmwrT6Ku3Y+J7VNPaG9sDjluam9/jIKPMK6/73hrZmtYziZO5urbD8tM4u1cr40tGK1rztmN7i8gzFfOR0+GjsWOrErNzb/PFW9l7UuuhE27DzHuMjAmLZPO0042jZ7tB69Hzitul5AkDiluOS4862qs3g89zHauZA/UConOt64zzX6NI+/CEAqPVu2abyQNYwtsTUhMg04yj2LODO8cz2QPk5CfrzGMyrAbThjuLI/MrrCvoq3578COiHASScowCC1kz7FNei6/aedJJA3eL2+OGnBCrnQtxE5yyWfveG7XDIPraK4sbeSOSS4o61XQLK9aL9FOm65GLVAuii0ZT5rOuY6qr2bvK7BerqnOyM4F7O1pYO5jTIhOne+yzR2O6S2mzmhQa6uSzbouc0xXTc8MhS8mzLZPS43sjuWswA6eDSbuxk177sQuQ8ywjTCvDc8kzijuSa+o7Ajtz4vz7UEwDe25LQSOZa5tz6mvz89LrPgOhstjzwztmW8rz1Drxcz8DwhvA==", + "polygraphy_class": "ndarray" + }, + "data1": { + "array": "k05VTVBZAQB2AHsnZGVzY3InOiAnPGYyJywgJ2ZvcnRyYW5fb3JkZXInOiBGYWxzZSwgJ3NoYXBlJzogKDEsIDY0LCA4KSwgfSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIArEOLi4O7CgLAC+O7pnPRc7AqzIM7W0bT1psmszljw4ujw+H76FPva1xLOPv0e3KbzcuGWwqjulMuw0YrwMuzI/4rmgPsezsBhIPBY9GK82tiiraDUhO1Uc8ziOPho2/TQTvkPBNzYSrh64yzO7q4M8gy36uew787I2OKO3M7ljLZEirTdMtFi/7bZaOsK04rPkO7K5LL4ksb+3u7udsd03waDpNEE/fLV0NBO4xbquuKm3wDWzQLq5qrYKrFU2ODQjPdrAxsDvt2Q8Q74EwpI4zDNcvOK7wrsTLwO56De2wdu6gbwatE64Yjpwv9MpdzhtP7W3MTU3v8nAwjqynbc8fr0pOHo/tzZtNGEgSb5sPeO4CThBvri8vj2PtdA9Aq1evyM7uLiXOXQmLL5CMWRA4zddq008vjK1Ml68fDMkPLC6sz3frRq7+zuXtBM6q7Soq+Y3qDT7v3y8bzs6sDG4b71SvAvAe67KuLy7XKzTNTBADL6jPMk7PLqBMlK38TEJPtg4EjjjtvC9rLLUPBy+OLwKMOG7V7rHtna9y7uhOTu9ZL4Kuds5TbJvt64p/L7qNyM2jbYGPIO8gzzyMGO1FqXdruI8T7wauiU8xLg0O8w+brZzOZ28CjpiMpI3Krmlvgc4IMAUtYg7JDLLPxo8O7n+JhCqVL6Gu3y4E8AUKvizsb54Nro5RDMvtbs4cTnrNJm6bTTIPMa4MkA0uCYoYbklM/i+XrhevFUw9TsrOb7AFkC5NDa6BzGjMVa5VTLcuHi2fbm7vaswrzgeOI86MDVDuLg6br01u7e7UjyLO8w3SjwJu5Q1AbDGOmCvKbZUOAI7WbnRqzy8/8BqMt84MTTMv521cMB7PVi8Bj2ROzS7iLybNcG6jy66tyC/dsBBM364bbiRObI8XLRztQA4br0kM2o4cTm/vxe6B7tlLrE3/T3rt/a3mTy/NBI9nzShvFW9Nb5MOcE8Zj+/Oqc8pLigvM06rzbgLjM2LDUjPS49WzjEOh04YbcNOpS9NzzHvJM0w7ptOFioYb67NqC4DTtuFVQ2qbu3uFe32zq2PnG6TzxXwMe8rLtpvVa+nDbashI2hjlANEM5c6y/vDs8t7h8u7KxTbuutoE1kzUvu863TDzxNyZAuDyUPI29HDrMqmi4YDGQOwI4B7vxORq57zLbtAGzITopvMg/Sb/IuOw1rThurlmrkjZls5M9tzr8Oru7Fa3PtAa8wzVNuD45SLzgPiy7BTNWOpG40jrrvM89PyPFuMm0eTqtuP+5F7O1vEg/iLinOrQy4LsZnkY6BrLGPBY8Wjtbt1K86bhIMl44WMAnoA8xcbQqPcA2Pj3psoA7ij4/PV68D7SGtdQ3", + "polygraphy_class": "ndarray" + }, + "data2": { + "array": "k05VTVBZAQB2AHsnZGVzY3InOiAnPGYyJywgJ2ZvcnRyYW5fb3JkZXInOiBGYWxzZSwgJ3NoYXBlJzogKDEsIDY0LCA4KSwgfSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAp+Oba6VywKwMU9GbgnK/S0ljytwUGkxq5Ct+g+GbstOrM1DD1Wud25YjzONi6+ULgvNrQ6b7nUwTA44rwJwKrBgzoeMDmzjj0ZOXc+kr1sqHi+V7P1swM51Sn9OKE5T0ALtYe74DNruu49iLSGuIu3vzkMsSK5qT7Zuo0oODmGvq83OLyVOrS7izlsPOQ6BTRns5244j2ZuSa8Uzleupq7xr26t0utxzOcOzc4HLJYshU4Fz0LMIK38bvIvVU5oDieOB6woT2/t6Q79qjeuL44jja2NTQt2qjHQL08yDkoLwc8JTi1Jtu0Lq1Xwr0y0bdfPvQkwbxiqi+8kzeoNKo7VLpaKHa9nTknOO83ohajNms+kLRwMFi88rpDLmo8GrSNs4O2R7i4NP29ID1TucY5FL1aOWA5Jjb1u/wzu7wLtB48YDrGI8y7Kjn2JKTAQzo0ps88Yrxawnc0qjTDvtS7g0HktNk5NbkuwHOxQLRmtlavOjTOsA69xzfMvxrA6jkOOFmrlb2quTS0Yr7quTg5tj22tg06qL9HOW666zTus0g9/7oFOXm0EbHiscS6OKMCrLgzaS1iOmM7NLzzOr+8tDj1OKW/8y1/txK6Zz07uXO62Cx8OYiwqjw/OFvAPLh/uKs3QMEZQUA4xDC6PLBA9zqXOy8wh7Cdt5y6f7lnvdusvb4cMqaotDAtuCdCcT36vHQz3bgPvFy96jZzub478zt/PdK6mzcPOZa3LbpwPkgzFjmUuuG8jrjwr0K8CqHBvnu8XjnVOEW2ErwcvPm1KDltO/K3nbxJNJe6JbhNPZ46DjEWvcQyQDwDtmg9LD2oPuI0szioPSk3A7gtt6IzKr2WsGM8YsAjPn08W7dZOHIa7DzdO5q9sziVsk0t07n+Pik867lXvSy29DawMwM9UjrCOzs1j71Dsj48mKoGOBq1H7zlr/I0P7mLs4S7ejy2tNA9HT4wrQm8LMDjvX+9hD1qLzw2ccDLvBk7lzS2sIU4iz4Ts3EsKjLJQDPAdzlZr4k4QrlEqrE5OkJ3Osm6yLZBty+/SLXdORm9MjzON+C5h7BlPt45fyCbs+csGLg2wBZA+DBfNmqr4rnzsWa+jkDwvCG8C7cnPW4j8rwEPdk5Tzi6OcpAX0B+uX8+ITb8N8W+dLGBuqA6STwmwKSlfTz5sdU5njV3NVe03rx8vK05+yRvsMi24rQwPF21Xb3zOY6yBShuPkOloDS3vAiz4Dg5vm0ycy5JKSY8WjXqPfO7WTBoLUe8pjumQFukCjhpLbW5SL+luZG90T5er4m1HTt9vCO6WywdPAM48jMALWc2dDIfvOYpS7wwLbG3FEEbqZ69S7AoOLI+t7hUvok6", + "polygraphy_class": "ndarray" + } + }, + "outputs": { + "output": { + "array": "k05VTVBZAQB2AHsnZGVzY3InOiAnPGY0JywgJ2ZvcnRyYW5fb3JkZXInOiBGYWxzZSwgJ3NoYXBlJzogKDEsIDY0LCA2NCksIH0gICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIApn5jM9zcyMvJqZeLxnZjU+Z2aOvZq5171nZp47MzMDO2dmeD2aubk9AEAGvpoZgz0A4BA+Z+a5vQAASLwAgCY+Z2b3vc28gD5npmC9mplVvgBAnj4zkw09Zw6vPmdGwb2amWQ9Z45aPpqZnD0AILo9zcxuPTOzTb2amWi9zeypPpplTz0zsy6+AAB3Ps3MIr1nBrW9Z2aTvTMzGzwAkHs+zcyMO5q+nT0AKGQ+ZybePZoZ9r1nZiu9ZwYbPs0MYz5n1qg9Z+Yivs3MlzxnBkK+Z+ZgPgCARj4zs1k+AKCHPs3MPj0z89I9M1MdPpqZbD1npqQ8mgkFPjMzNbwzK4O9M/NZPQDgjz1n5mE9AABIOzMzlj1nRoG9zcwJvQBAJj6aia29mhkCPs00077N/AE+Z2Z2PTMznTuaKdQ9AEA7vTPT271ndkI+M3PxPWd2Pb4AgIE8zSxxvc3EuD5nZng8MzMMvGdWQD4zuwo+AMCsPWfmkLxnBvW9mjnQvTNThr1nJoE8zSzWvc24vz1nZuS7MxuivpoZhDwAwCw9zayjPQAgG76aIw09ANi2Ppq5uD0AAEu8AMAwvZrZdr0z40I9AABAvDNjSj4AoMq9MyM/vgA8Xz7NDE29ALAhPs18OT7NPBM+Z+a6vGeWRz6aqXu+AGB5Pc2Emj5nxoW9Z/6Ivc0s9bwAgIi9Z+aOvQDEpL4z8ws9ALADvpqZrb5nZmK8M3Osvpq5eL4zM749M7OrPc38cT6auac+MzPEPWfOqj5nZtI9mvnJPs3MCzwzM9O9mjmfPs2EOz6auek+Z1Y3PprZDT4zy6Q+Z2aiPjMToD0zc80+Zyb7PZqZn7wAAJ4+ACBQPZqZ4T2a+b0+M7PFPTOzqj0A8IU+AECkPs0MAz+amRE7zXGlPQCAdz4z8xw+zRzuPQBgJD4AkKQ+MxOXPs0sFT4AgIQ+MzNlPZqZXj7NbOQ+M7MSPgAAyj0z86E+zVxRPs1smj6aOYc+M5NJPc0coz2aGb89mtkaPjM3cz7NbC4+M1OavmemS72aGfk9zeyRPQDAsD0A6NC9ZzYTvgDgqr1nZnu8zYw9PWfmwz0zMxC+mpnovc1Muz0AgBk+Z+a0vZpZ+D7NTN+8zcyXvWfmDD4zMwE7mrk7Ps3McDtn5rK9AABvPWdmujzN7HC9AABrPpr5p74AAEa9ZyYKvmeW7L0zM5y9Z4Y4Pc1MFb7NLJq+zcySPTMzUb3NXAI+ALw9vueTAL4zM6g+MzOnPQAgl75nhpK9AAA0PGfmpT0zGxu+M7PqvWdmq71nJj2+Z2ZAPc3MrLxn5m69MzNjvABAPz4zs109Z+YAvmfmi70z2yM+zYwGPjPTML4AuOe9AABpPZqZBL0zM/i9AACpPWfmnz1nJmk9ANilvjMzQj1n5g49AACKvM2sjz4AgBk+AAB1vTOzn7xnZi8+zcxsvc3MFL1nGLQ9Z/ZHvpr5Hb4z80k9MzsLvs0MVj6a+fy9Z2Y8vDOzCj7NTF4+M4NQvs1MSz4A4LK+M2MUvjMzlD3NLFm9mjmFvmdmpDsz88y9Z0aavmdmmz3NzDc9MzPBPM3M5r3NAi69M/M6PpoZjD2a2UC+zcwivmdmDj4zMzQ9zWSyvQBg1b2amZ28AMCvvmfmVT4zM6m8M/M5vZoZGD4zs6M9zUyoPZqZ8T0A5i6+MzNNOzMzgLwAAJC6M2u5vQCAIjzN7Cs+ZybWvZqZOj3NTGY+MyNhvWeIEz0zM9U9AAAYvJqZHz3N1IC8AACSPQCA6TwAgAS9mjlFPgBQDj5nRiu+mln7PTMzRjyaeUG+mpl3PDPzhTwAyJY+mrmKvTMzA7vNPCQ+mpmWvM08HL6amZi8Z0bcvZqZ+7xnhmo+zZyUPQCAFb4AKLw9AECqvTOFNL7NzPU8zQzgPTPznz0zc6m955gXPjMjIj6ameg9mhkBPWfmmb3NbB4+zahVPWdm77tnRgC+zdScvTNTKr6aCbw+AGAnPjPjhD6amfQ8Z0YcPmdmsj0AcIw+mjk+vjMjpr1nZr88ZyY2vjOjfr2amcE7M/NRPpqZvj3NzIw8APBNvWfm7TwzM+M7milTPs3MMrwz8wy+zQxGvmcmO77N/OS+AIC8vTNDgL1nZgY+Z+bHvmdmUr0aIDO+Z1iMvppZwL3NXLy9MzOLPADQNb4AkLa9mpktPTMzUj1njoq+Z2Z8vgCA1r1nBjO+AAAPvgAAj7szh4e+mhmGPTOT+r3NzOq9Z2a0PZqZwL3NzOQ9mpldvmc+cbyamZa9MzOLuwCAn70A4La+AIAHvpo5mb7N1EK+zcz6vc3Mmb2awbW+AIB0PjPzXb5nZou+ZyY5vZoZLr3NZKW9M3OYvgAwhb7NOIy+MzODOzPzmb2a1X++zVyXvWemprwAAFS9zZQ3vs28v71nZmQ9zSwEvmfm5b3NzAO+AAASPmemNr7NzIm9zQzfPZpZEb0zM769zdRBPs1cp71nfhY+mjk+PWeGLr3NrG89M3NSvQBsgz4AuG6+Z/a8vWeCkz6aNZw+AMC4vJoRAD7NbCe9ADyFPpqRVD7NzEC8Z25Yvpq5V70z01M9AIhxvgCQjb3NjNo8Z9aIPc2sRT3N4cY9mgA7PgA4cT4AgH68ABDbvWcCmT7NvPM9Z0bavWdGWL0ACBy+zTCuvjNvqD6alYs+AJCmvWdeNT6aGW08AGAMPZoZKTyaYTW+zYyzPM1MbDzN/Jo9mpnRuWfGor0AgBy9mnkLvgDgbb7NTLQ8zYy+Pc1cnL0A5CE+Z0YQvgAATL4zo8+9AOBtPs3MKL0AQMa9zQyBvQAgYD3NWFS9zQaSPQA0Kj2zZJW+muPHPQBW+r1nw1g+Z0ayuwCMC73NRtg9Z4NvPmdMgL0zEBM+ze9Evs1M/rozEG4+TbqBvs2vT77NKSI+mtl0OzOWBb4z0647msO9PZrGGz5nljS8AHzdPGfjVj4zJyA9AIaivQBGkr3N6TY+moGAPDPbmTxngyo+MwN1PGepd76zwbI+mo0JPQBjHL5nNlA8AEbrvWfaJT3NMoM9zYaOPTPHLT0A2os9AHr+Pc0rZb4AACs9M1NQPmdGHL5nJvW9AOCiPWdmZD4zs4U9ALiMPgCAAz3NXAK+M3MavQAQaL4zM4O6AAD4vc1MSLwzo++9M/OivM28lT6aCTc+AICivWfGMj6amd26mtmoPmfG/z1nZmY+AMAYPgCgoj4zY2k+Z4ZLPmemUj4zMw+9mpl6Pc28sb2amf09AOCGPgA47T1n5iy9zcxOPc0M2j6aUY4+AACgvE2fLz4AoKQ+M9NMPpqZSbwAQO49M3PZPjNTlD4AWHw+AACoO5oZl70z8/69APSpPs3sjz6amam8AEC6Ppp5HT7NbAU+MzNvPQBAKr7N9EE+Z45HPgAAHjzNvA69M/P7vc0MPj7NrA2+Z+b7vjMzsb1nxgQ+ADDAvpp1aL7NzLC8mpmuPZqZnT4AAAE9MyOXPs1MNb2aCa4+AOAdPc1UET0zM3u5zWyauwC9Eb4AzDY9Z7ptvbPyij4zrYi9AGiwvDMmHD5nTNg9AKMSPpomVL7NZOI8zTKKPc1m+L0zRy+9M2AmvgA0Lb0ANGW9gMaPvgBzdD6ajDM+GtOVPs0Abb0Adei9zbL4PZoMAj5nsx6+zZgnPQBG5z2aMZQ8M19wPZoZkzpnQOK9mrPDvTMZ8z0A8wg+Z8zdPZp/pD3NMtI9Z5pTvZoMHD7NRvO9zWA3vc1/RT6aTQC9MzwVvpoZJL6aGSg+zUwfvpqZpr3NvD2+Z6bSPs3MrDkA6BW+zcxEu83MzLiaiS49Z2b6vJpZ3r0AKIQ+AIBSPc3MAz3NnI89AIAnvpr5Dr6a2c29AMCTPQCwjL0AEGs+zUzJvACAUb3NzIs8M3PUPTOjG75n5lM9APCYvmdmlrrNDLE9zYz0vM2Ml72aOV8+mlmcPWc2zb7NDOS9mpnivAAY+D2aOQC+ZwH2vZoZFT5n5jW9Z0YQvs0sX74AABi7M5MBvjO7IL6amXc8zWxdvjMznzszs2o+AAAzPWemq70zkwk+zUwNvZp5Hj2ayYk+zZwgvpqpmj3NzO+8ZyY6vM0wAD6amRW9M5OhPgCAlz3NjJC+zWzvPZpZvT4AwJM9Z6aGvmdmHDzNTIC9Z5anvgDAdL5nZmQ+M1s/vs3KSr4z6ze+Z547vmfmJr7NrEy+AOAEvme2ZT4zUyS9mtnkPmcGDT4zM1M+AAATPgAAHD4z42g+Z6Y7PgCA7T0zs5k9Z7JfPs3UWz4AAEO9ABhzPjPz1j3NzKu9mtklPjPzbj4A4Hw+zcz/PZrjID1nrpE+AGAnPs0Mdz4z8/I9MzO9PgAAFT0AGBM+MzMzPjNzgT0AAMg8AOCbPjNTrz4AANE+AEBRPmeGHj6aObI+zcyxPmdmvrwzQ8I9M7NBPpoZCz1ndgU9M3PVvpr5kb6ak6++Z5oLvzMjCb7NDC6+AOBvvgCo8L7N3oe+zUygvprZ1b4AAB4+Z0ZMvQAsmT7NjMm9Z2bmPDNrLb5nJkO+MxNZvgAAyL0AgL69zUQQPjNT4j4z8zS9Z2YGvM1sWz4zM889zaymPZrZ0z3NzFS+zcz7PQAAlT5nRja9ZyYOvpr5Gz4AAKU9AAChvQBglD7NDLc9MzPjPTNzeL6aT0a9Z45IPjPTkD7NzHK9M7ONPAB4JD4AgEw+M/+HPpqZkb0AwMi9AICnvQAApD4zc5A+mtkHPpoZzj2aGR4+mplWPppZKz7NzMa8Z84OPjOzaz7NbE8+mqGuPWdmBTyaORc+Z+b6PQBAnr6aubY9mnltPpoZPTxnZqg8AODMPTOTDb0zs8y9AMBAPppZBzwAgBg+zYwnPTMTF70zgwy+AACIPDO7Jr7NzJG8Z2bkPM1M1zyauRQ+mjmfvZoZlD2amTq9AEB0Pc3cJb4zs709Zyajvs2MSr6aGSA9AJCBPZrhXb5nZiu9ZyYAPpp5zL4z83a+AAAoPQAA+jxnRqK+MwtuvM1s6D0zM4G9AIB7vmdm5LyaGfA9mjkgvQAwsz0AwHC+M5OfvpoZu74AyAw+MzNAPGdmdb3NzHw9zUwPvjMzCD7NDAe9MzOWvZr5L73NTEg9MzM5PADwN73NzBo+zeynPgBAqz1nZsG9M/PXPWdm9D4zM0s7mhkKPgBQDj4zM/a9ZwbjPc1M3D0AwIU+M5PSO2fmjb5nZgi/zSwfvmfGSL4zM32+zQzCPTNz4T0zs+o9AKAzvmd6LT6NoXE+M7NZPpqZ7z7NxIg+zaxjPjOb3D0z4xk+AICMPc0EHT7NzOS7AEA9PgCgOD7NTEk+mtmgPs1MWD4zk9E+MzNJPqePjj5nmrs+AMBMPjPzZD4AQEg+mnnTPgCAnD4A7I4+zcy3Pc3MeT5nZqi7mvkeP2dmQj4AgNE+mqGiPs0MRD4zs1g+M7NbPmfGAz5nTnw+Z2ZePs2UZD4ztco+AACOPjNDgz4z+649AOgKvgAQRj4zs34+mlmAPZoZR75nZk0+AIDXPc0Mij2ambM+AACUPQAAgDkzs+i9AI0cPgDgfT4zc+a9ZyZ3Ps2c6z2aWeo9zWyFPgCAsT0zM6K9AAB6PWfmEj5n5oq9mvnUvWdm7rwzc0S+AIB+vmdm8L1n/Ba+Z+YTvmdmhjwAQBu+AIBqvpoZt77NzHY9zUwUvmf2mr5N+n6+Z2a8vc3MbD1nJl2+Z0YHvgCAhruamQe9zXwQvjOznb6ameO9AGDLvpqZhb0zM9u9Z2aAPWdmdT3NTM+9M3PIvQBA3r0AAO68ALC5vc3MsLsAALw7AHjwvZp5hL0AwEs9ZxaNvZrplr7NzHs9AOhZPjOzlTxnRje+mhnOPM30mr4zE4M9M1OtPTPjGj7NlJK+M8MbPs1MJD4zswA+mplBvc3M3rwzM0e+zcyKPQBguT4zM1M6zezJPTPfBj5nZgY+mhmwPTOz/Lya2XM+AAAXvmcGx70zMwY+Z4YrPTOziL0zM9E9AEAcvs1MiL1n5uO9mplUPc3M9DyaUUm+AGzkvJpZFT4zM3Y9AACMPQAAOb2amVY9AAC8PZr5PT1npme+MzOZvZpZTr7NLAw+MzMZPgCA1rzNbII9AAD0uwAgczxn5uC9Z+abvgDQ2j0AAGm9Zyb2vQCej77NTP89mlkoPprZqD7NzBg9ANAWPgAAKT+aiYo+MzPEPDMzr7vNDLU+zcx4PQAA5z2aGcM+ZwbzPs08nj4zU74+GsibPTNTcr2aCZK9M7MLPs20QL2aCVA+MzOJvM1Msz0z26+9M1MZPWemGr6aAZq+M/PbvWemAL6aGRi+ACCCvgDclL5nplu+M3M7vs1MPL4zk5i+AMBdvpo5ur3NzBq+MzO/vhqHG74zsyy+Z2Z6vc1CX74zMyK+zczDvDMzDzwAiDu+zfyJvmdGe75nRq6+mmlGPQAw271nZlG+mpkNvmfGgb4AgIU9mpl9vmemtL5nfn2+zcy8uzPTMb7NxIG9MzOrOwCgVT4AAAu9Z2YOPZoZmbyaKbM+AED1PQAAlbwAgOE9zcySuwBgtD1nJhY+M7OEPs3sqz4AAJC6MzN/PQBwCz4zk8g+mpl7vTPTrj1nJrs9MzNrvZp5qDwA3Ds+AGBKvZphi72a/Rq+mllvvTOzMD0AYB2+Z2Zlu5oZcz2aSYU9mjlsvpoZOj4AIB8+AFCjvmfGGj4AcII+M/PBPWcmsz0AZbs9Z0YdPs1U+z0z85U9mtngPDPTdj5nZmA8mjlgPZq5Cb4zcxC9Z9aAvgBgUD7N7Hs+AGBVPmfGYz6amTM8M3O8PTMTbT7NDBO9AESUPs3MNT6aOd49mtlrPJqZP70AQAY9MzNJvM3sXb4z0+O9M7NZPTMThb4A4BC+zUzqvZrJrb4AACa8M0OOPs2g8Tya2YY9mgmPvWdmaD1nZtU9M7OrvWfmEz0AAOk8M7PHPQBAnb7NLD49mhm/Pc34Qj4zMy88mvFsPZqqKT6amas9mhnvvWe2Eb6ama49ACBEPWfmG75nrok8mplEvs3Mt75nZku9MzN9vc1Mgz0AAFS+zTj4vM1crD0AgDQ9zUzfvWcGn75nZiK9AAAqvZpBXb6aGYe9MzODu2dmML6aefs9M/MNPjMzmrwzM6e7AICXvTOzIb3NzHu9Z+YpvmcGcr2aYbg7AAAAOc3cAD1nZj48Z2b9vZo5ub4AAHi+ADBFvgDwDz6auV2+M5PKvjMz3DxnZpK9AAAFvs3sgb6amUO9zcwEuzMTf75n5nw9M+NLvM3MpLya+QE9Z0ZqvmeiWz7N7My9Z9+IvgBAor1nZjq6AACmvZrZYr3NjHo+AADUvADgfb6a2a09mll2PZrJpL0zM9i8AIAGPc0sTD4AQNu9zcwePJrJlT4zc8w9zUx5vTPHxjxnFpk+zaxqPs38Db2a2eK9zSw2PmdmjbyaUTw+MzMOvTNz7b0A0Eq+mlmyPTMz9rwA4Cc+mhkmPTOzIr0z8/k9zawxPpoZCb0AENE9mpntOwCA3b2afQ8+zczbvM1MtT1nJuw9Z8ZDvjPToD3NvO8+zcyZvWfmCr3NrIk9mhkCvZp5LT6aeRQ+MxMZPmeGHj4AgEi9M7NFPQAA2rwAYD6+M+OgPjMzy71nnoW+AADDvTPTc74A4GC9Z+aCvgBAVb4zQ1m+Z+ZjvZo5kb0AAHc9AGASvmdmyjwA8J29zUwSvgCAlryamXi+momHvs3MC76aGdm9M7O2PQCAEL6AmQi+zQyPPTMz6D2aGRO+zUzCPQAAcD1nxoK+mmmTvZp5D75nZqS9AOCOvprZnz1npj2+zczavZqZ3r3NzPu9zcwsu5qZvD3NbEi+mvF8vpqZIbsAAMM8Z6JNvs1M07wAIsc9MxPpvWeWTL6amUU7AEiNPjNT8L2ayUC+AGDtPc3s6L0zgy8+ZyYVPTNzzT3N7Ps9ZzYwvgDAfL3NbL29ACD0PWemET0zI38+M3MlPc0MSb5nRiE+AGAvPmcmbj7N7J8+M3MLvmfmir1nZjq8AIBLvmeGPb4tmpc+zTQ9PmdmIj2aucE+AMCnPjMzSj2a2dc+MzM2Ps18tj7NzAw8Z/q8PDN3wD5nao0+AGCoPprZeD4zs3o+MxPnPmfSoz6amWY+ZwboPWfmxr3NvMw+AGDIPgBA5T5npvM+AABtPjPzLz7NnJE+Z+aIvQAUgD4zK5E+Z6ZPPgAQPD0AgAe+Z+YDvjOzpb5nJjm/M8EivjOzdT7NTAq9mhm5vjNTQL4zs02+mnmLvpqZhL0AAIq8zcyavGeuWL7NTJ69M9NLvmemhb4AYNO9Z6YQvmdmijzNbKm+zUx5Ppq5eL5nFjW+mkmevprZA74AQDE+Z+ZIPs3MIz0A4Os9AABUPs1MVLxnZti8zcyoPpqZRT1nZjo8MzOjPZp5jz4AoKA+mhkPvqcPiT5nJoE+M3MYPjNjDj4z83E+M7M/PmeqWj7NpDo+zQw2Ppq5wj2aWcW9mnmXPjMrbj6amb89Z2buvM3shz6amaE+zczIvJqZSr1njmI+MxPkPs2aRT4zu5c9AACtPDPzkT4zM0W9Z+aVvjOzmDzNDEM+mlkRPWfmjj2a2QI+Z2ZMvQAArzzNzOa8MzNfPgAQlz0zM2M7M5P4PgAgET6amfm8mpnMPWfGIz7NzCM+M3MvvgCA+j0zE72+zczevWfGBj4zkxu+ZwYcPWempj0z89c9AEDfPWdmuzyaeWG+Z+YBvpoZIr3NDPy9mlm9vgAAsDpn5js+mpk7vWe4bb5nsBM9AGB7Pc3MmTwzc3m+mpm8vZqZYDwzM0s9AADsOpqZ4z0zs/e9ACCRvpoZET5npoq9AACyvM3MurzNzJC8mhnOPTOzFL6aGbq9mjkOPc1M6LzNHI6+mlUuvmdmm70zL8y9zSyovgAgAr+aecC9mpnDPZqJe74AsL++MzNBvmdGub7NzAa9mpkYvgAA0DvNzDi8Zyb9vc3M+b0z8729M4dPvjNzRb4zcyu+mllbvmdmHb6aqYW+AAClvQCgKL0AYBy+zcy9PM0M2D0AACu+M9OSvs0sy73NDAQ+AEDbvmcmUb6amaM8zWy4vTNzh75nZo68zcx8PJoZFj4AIGC+Gkcevmfm4z0AgA0+MyM0vs3sg75nZvE9AMCwvTMHuL4AALW9M/ODvmdis77NTAU+ZyYevWdmIrxnZn69MzMLvGemqL3NzIQ7Z6YsvjMLXb4zs4g9M9MTvgDwXL2ameS8Z8ZoPgAg4b0AILS+mgEgvTMTGz6amVm6zXy3vs1M6b0zE3G+mhkqPc3sRz4AgHg8AEAhvWemmr0AoDY+Z+atvQBAEb1nRgs+zcwQvM0sgj0zswC9zRywvgBA3T0zswq9MzMMvmcGFT4AgO89M+PEPWem+z0zc30+M4OHPjOzCj0AIAs+M7M7PpoZtL0zcxO+Z2byPADgsj6aCUs+zdyJvTPY1j2aHWo+M5OYPgAAN70zsyE9mpnnPZqZ1bzNDL68MzNdOwAAib2a2Wu+mtkLPpr56T1nZlG9Z2ZzPme+Az4AALS8AMCiPZoZAL5n9rc9AICLPgDAyL0zY2k9Z2YwPQAAcD6aWWs9Z2aJvjPzXz3NTGc+ACAaPgA8kb0AUJw+AAA1vGdmJrpnZma7mpn9vDMzar3NDIs9Z2ZyPs3M2byamR0+AEDpPZq5oj2auag+M7OnPQAAvjzNTJk9mpl1vWdm2ruaOSm+MzPNvTODC77N3Om9M9MmvjPznDzNNCa+mnloPWcmkDxnnkY+Mwc5vmfKgz1nKhs+zUBAPpqJEr3NgN08AIjkPQDI5z3NyBe+Z4aovDPDBz4zAwo9AKDXvGcibr4zbwW+zSBQvmeKdz6aiQG9Z6oyPjNTwTya3Sk+M7vrPZoZubsACJw9ACjnvZodNz7NcA0+momVvTMzMz1nZkq8AIC3vZq5pL5nxqa9zYxtPgCABL3NDM6+M3O6Pc3Mpr5nZpo8AADZPTMz6r0zc8G9M1Nlvs3MHzwzs1y9AABIvAAADj4zs0s9AAAuvZq5O76auYu+zcyRvQAAxz2a2ZM9mvFBvgCAmT0z62+9Z2ZhPgAAcD0AIP09ZxaDvs08lrwAgBs+zdzEPGeGSL5nxmQ+Z6Z5PgBAmT7NzHQ82gKpPpo5zT5npmA+M7NUPppZ2z1nptk+mtmGPppBFz4zczA+Z2Z4vQAAHb4zc44+Z76DPgBAyT4zUwE/AACcPpq5lj4AAFA9mpmFPDMDzD0AgEk+M8NFPpoh8z1nZgG+M1OCPgAAcL0AgCC+Z+6cPjNzZz6aeUq+zYy2vc1scz4zM1A+zcwkvc0MVz6aGew9MzPTPTPjoj7NzEQ9M3MtPs1M/T3NDJE+AFCyPTOzWD4zMw29MzO3Pc2MET4zc2s+mhknPWcuDj6a+Zu8zYziPpp5iT1nZva8zWykPTMzjr4zMwc8AEADPs2sgD4A4Iy+AADqPpqZtTzNTLk8APaZvhqHDL4AAHk9MzO5vJrZEb4zEz++M3NEvZp5/72a0Um+zczCvQCAOL7NrCu/AEACPgCglr3N1PG9M/O9vTMzjTxnZkY7mtk6vs0sZr7NeIK+mllcvjNTQb7NxKu9Z4aNPWd2CD7NTL88mklovgDAgL0A2IY+ZyYaPWfGqb2aSRg+mhmfvACApDyaqS0+M2N7PZr5k70zc3e9zUzVPACArrwAwBC9zcwYu2cmUr1nxtm9AACTvTPbD72amUW7mikLPpr5lj1nLiG+zbxCvmfWDL5n9gO/MzMrPQDgJL4zy0C+ZyaIvgCMUD4zA2y9AIjTvWd+/D2aRR4+Z2b0vM0s/r4ANj+9mjmUPprZDD4AQJy9M3MTvmfmHD6amdm9M3PvvGdmar0zMwa9zcxMOM1sTj7NzDC9AMCTvABABb5n5oK9zcwCvc3MaL2aGRm+Z34OvmfmCj5goc+8M/mPvQAAjDtnliQ+Z+bFPTOTmr7NzKS6M5OBPprZ6L0AQDW+Z2Y6PZqZdz0zc5g9zawUPgCAWT4zE4Y9Z2ZuPWcuHT7NTAs+AACCvACA1L0zM/A8AMCTPpqZgD2aGf89mvmHPgCIET4z84A9mpmeOzNzqL0AgIM9AMAKPmdGJz4A4CO+jWoZPZqZ9zzNzEG+M7Oqvs0sq74AQEm+zSxPvgAwG74AIEy+Zwv3PTPzOj6aWaw9mpmxOwBALL6aGV4+AMDEPGcm+zwzszK+Z+aBvppZHb4AQHY+AAAsvc3Mfb2amUw+zUyVvZrZWj4zM129mpl3vZr5Sb2amf49Z+YgvZoJOb1nhsG8mtmhvADAvzyanam+zQy9vM089D2arZm+Zz43vpoFij6aGVq8AKCXvABgDj0zo4Q9momNPTObJ77NhFc+ZxbuPZp5Sj3NPPQ9AGCkvGdeGD4AIBG9M8OTvWcmojyaGaE9M4sIPs0M+r2aPYa+ZwY7vQCwNr3NiII+mlnGvQCACj7NzDa9mpnlO5q5DL6auVG+M7MdvjOTrL3NzOk9zfjBvWewXT0z86I9mhlFPWdmHr2amfm8APCXPs1sEj6aKby9AABVPJqZ6Ttnpoa9zTzVPjMzGzszM/O5mpmMPDNToD0AYBs+mvlCPpoZnL0zM+w7ABAKPs3MULwAZHE+ZyYCvs0cJD5nJlo+TSdfvpqZLD1nJno+mpkZugAAHz1nhga+M7P7PWfmx72aGcQ9zcylPZrZCT6aeRO+M5NTPprpnT5nZse9mnkfPjMz3Lxn5j0+M1sAvQAAIr3NzD8+AOCePs3MqTwAaAo+AMA7vgCAxb3NTHE9MxPlPmfmDr1nZsA+5yhoPc0ML77NzO29zcyavJo5jz1nZhG8zayiPWcmEr1nFg4+M5OiPgAAVLwzMwq+zUwlvs2MFD4AgOy8MxsTvmdmuDzNzKw6mvmsvgAA1rxnZto9M7OvvQAA/j1n5kW+Z2bFPTNzAr4AgL+9mu1qvQAAl72aGYu9mpkouwAAGLqaKR8+Z3YnPgD41r4zUzY+zYx7PgAg8L0AeI2+AADVPGfmijyamVG8milqPmdGh70zy/M8Z/YdvmeWGz6aiRk+AEBEvQBwLL4zM2w8MyMRPmcmGr0AoNM9zcxbvGe2HD6a+ek9AEgjvmcGib3NrMS9ZyYpvWfukz4A8Ce+mrm0PZqNBr4A0A2+mmkZvpqZTz2aOQq+ANCSPprpjz6amY48Z+aEPgAA2z7NLIO+AIChvgCgYb4zM6u8M7NSvs3gnb4AgAy+Z+YLvjMTs77NzFy6M7N6vgBgir7NzJS9mpn+vTMzubyameO9mjn5vjPj/L0z832+ZyZxvmdCXb4zU4A+M7PwPs3MtT0A0GC+mikHPpq5xT5nZjY7AIDJPc08BT4AgBY9mtkDPjNTOj7NjGY+zcwAPc3Mubwzs4o9zWwrPjNzCT6auUw+M5NlPueGIz4AAFE9mpmJvQAA8D0z810+Z4YRPgCgUT1nhjO9Z2YKvpqZ2bszk88+mpmROwBAiT6aJTw+MwPNvM3Morwzczk9M7NdvjMTFr4AAIC9Z1aKvmfmRD6amYA8mpnzvM3MvrwAgAS+AIA7Pmdmaz0zM327AIB1vs3MVLzNDHu+zSwFPjOz2jwAAEG+mhm9PWfmvb2aGfm9M7MbvjMzSL3N/Ou9M/M5PmcmmL2aFXG+zcxFvc0Mlz6aGcs8mpl5vs3M2j1nRrg+zawiPjOzmz2a2ao9zYwIvpqZBz1nZjY+MzNqPQAAIDwzc6M9M6s9PmdiDj4zM+u7zdwSPs3Mv7zN7IU+M7PMPWdmBb6aGZo9Z2YiPAAAlDsAIK09mlktvpqZgb3NzHG9M+OyPpqZCr0z8yE+M8NVPppZTT7NzNG8Z2b8vDPzkD0zc3q+mhlnPgAAXjyamTi9zcwHPgBASj6aeYm+zcy1vc1Miz4zs+09msmbPWdmxTwAAAC7mhmfvs3Muj6a2WM+AEBzPZqlDz7NzEu9AAAiPjMzGj5n1pW9zfQcPpoZtD2a+QM+mpGWvTMTk70zM648mhmPvTOjt74zs2G9M+OEPs1sBL5ndrm+AIAlPs3MmDtnBkC+mtm0Pc3M8bwAAIC6zUxtvTOzCj1nZpq9AGAIvs1Mbb0AgBa+AGAHPc3sVL5nZii8M5M8PpoZAL1nJiG9Z+YkPDMz4b0zM8W9mpmMvDPzeT5nNpi+AOA8PmeGFj5n5jw9mjESvTMzPL0zs5i8M+NNPgBA0z4AAGm9zcy8PWdmdz4AgI0+M7OavDOzyz4AAFg9M7MyPgBAv7yamTY+M7MyPjMzdL6a2WI+AMBUPmdmdT1nJiA+zUz6PZoZjb0AxN09AAD1vWfmADwAYJA+zUwXvc0QAT7NzAQ7AIDhPTMzsD0z00C+Z4bqPc0Mvz7NzJ68mlknvjMzuLxn5u69ZwasPs0MeT6amXW8Z2Z2vTOzdj0AgFg+ZwYhPgAAIDzNzPY9mhmMPTNLFz5nhoS+zcwIu2dmSz1nFhM+mkmrPgAQxT0AgLa9mpmzPc1MZj0AsKw+zcxcvAAgzj6amTY9M9OJPgAIBr3NDKI9Zyb9vTODqL6ambm9zcACvgCA/D0zM4k9AKBCPmfmsL0zM9s9zayePc1Moz2aBZI+zUxhPWfmIL7NDFq+mpnEPmcGrD6amV+9mpmxvDPTnz4zcys+MzPnPGdmBjya8U2+s8sSPjPzAT4z9xE+mpkavc2sOD5nZlS9AAAnvmdmzjoz04k+AIBVvc3MXLuaeVI+zczDvGfuoD0AAPg7AMD9PZpRzj3NnJ0+mhmiPc2ckD4zsyw+mnlHPjMTez6aGVc+AAASPWfWST3N7JQ+mvmJPpqZ7Ttnxh69AMA1vQAAUL0zkxA9Z8bnPs2MsL0AxGc+AORJPmdGrj5nKAa+mplhuwAwfj4z88I+zexUvmdm0j0zk5m+mrkwvpoZXr5nhri+Z2aLvQCweb4z84C+Z9bVPZrZ4z2amZI8M6OYvjNjbz6aA8o9mjkLPs0sUj4zM8I8zbzePc0MCD6aWZQ9M0PXPc2sRj4zw5g9ACQnPmcmWr7NjG2+Z+YLvpp5374AqIq+mpljvc1Mdr7N/K2+mnk2vprpkr4zM5W9AABuvc0sFL6aGfG9ABDAvpoZs70AACC6zXyKvs1Mcb4zg6C+zeyBvpp5zL5nppm+zUyUvjMAIL4AYCG+mmWhvs1MQr7NDHO+mhkxvgAAX72amYu+Z+ZIvpqZGL0AADi9AMIsvjODwb6aWTi+Z2YLvWe2or4AwOE9Z7aKvc1MCL5nZmO9M/MkvjOzwT3NfC++MzPePTMz+rzNzD6+mlmHvmdm/r7NzDA+zYwxvjOz+rzNzIi7AABIPWfmO72aGX69AADuvZr5RL3NzPk8MzNLuwB8fb4AAMI9M/NAPjMz2L1n5oO+zYwuPTMDgT5nZhQ9Z2b2PJq5Oz4AgDS+MzO9PQCAdj4AgBc+mlkuPs0sTz6aGX4+Z2buPTPzAz4A8As+ADDePADAvj0zM949mhmTvWfmjT7NLCk+M/PqvQDQrj1npo6+MzNBPc1Mrz3NjGY+Z+aUvc1Mwz4AAMU9MzPXPWfkST6a+VU+TUc7PpqZjz5nBlU+AAAevjNTxb0zs04+Z+bKPZoZOryamZk6zdwdPpqZXj2aGQe9AMCJvZo51j0A4NG9mnGVPjODET6aiZ09M0NDPjMjI76a+Yk9Z6bcPQBwRL5n1o49Z6a6Pc38cz5nOjQ+Z1aKvZqpkz3NLFK9ZwKjvppZ1TxnLhk+ANgZvpoJur0A0Mg9M2spvjNDoz0AAJk7Z7bBPc1c0z0A8Cy9AOgaPmdmRrlnhhC9mjl8PQAwuL0AkJA9mpF4vpoJor1nBmM9mjlLPZp5H72amR07zax4vTNTAz2aCd29ABD+Pc3Ear4zQ8c9zZySPc1cwD0A9Pa9M2OVvZqBJD7NlEs+zYyzvGemvDxnXjC+zawSvWeemL4AACA+zczwPDMzB7vNLF0+M3PHPZo5fD4AwDA9M7PkvprZqD3NDHa+AFDAvc3M2r0zM9s8MzPuvc0csb3N7Ji+ZwY/PTOzob2aWbM9APBuvQAAPT0AYBI+MzNrPJrp5L7NzCC7zVyoPmemvbyaqYy+zbxHPc2M1T2a+Qg+mtmkPZq5Oz4AAGA7M/NCvpoZBD3NDGg+zczFvJoZnT0AANi8zVyYPgDAvz0z01c+mvmxPc1M/T2aGRe+zVQzPmeGU77NzHK8ZwYJvmdGUT6aeUK+AEDVPTPzkj2aOQo+mtXjPQCA873N3Ds+Z1aPPpqZuTqamTs8ZyaJvZqZiT6a2R4+Z2Z2uzMzG7yaWfe9Z0bDPjMzfT4zs5Y9mtlTPZrZhz5nlqg+AOB5Ps1MDD0zE1M+mtm3vc0Mgz0A5Ng9zawRvjMTGL0AwI89AEDwPQCgzzzNzA2+Z2aYvAAA4DrNDPi9Z9YlvjNDkD5nZoc8mpm8Pc3M0j2aGXQ9Z4biPZqZH72aOYE+zZwYPgBA8j3NzBo/zUxPvZq5hj4zkyM+zUz1PTOXQj5nZsS8Z2aePM2MZD4AAPQ8M5MqPgAQlj0zM+y9Z2bCO5qZQT5nJpg+AACMPM2MNz5n5pM+zQyxPjOymT6amXs9AKChPprFgj7NHIo+zUzKPTOz8T1ndmM+AACVPQCoLj5n5jA9AIArvs2MH77NTKY9zQykvTPzd71nZtA8mvmwvpqZWD1nZrI8MzMTPmdm170zs4O9AABgvpo5gr5nZvm7M3OWvc3MPrzNGA6+mvkEPpoZuj3NDKo9mlnovmem2T2aGTk+mrlOPmdmMD0AAE0+AAA+Ps3MYDzNzLg+M9OPPjNz7T1nxkU+mpnoPc0Msz3NzCs9AEAXPs3MJLvNbKg+mpmZO81M/T3NzBQ+Z2Y+PGfGBT7NfIO9AAA+O5oZl71nZow+Z2boPgAAgTuaGcg+M/MPPpr5rz5n9BY+mpnwPQDAxj4zc6k+AIA5PjMzjTxnZiW9zTx3PgCQQD7NzDK+ZyaSPWdmLj0AEN89AADZPM2MOr4zY2a+Z3bFPWdmWr4zcz6+Z2bPPGdmHj1nJja+M7OOvWdmHb2amQA9MzPIO81M0r3NzPo8Z+JdvpoZOz4zswo+M3MsPTMzOb0AgNu8Z2Y5PgAAALnNzDw9zcyvvDOzi76amSU9mplfPgDAkT6a2Y89AAB8PJoZoj4AgCS9AIAnPc1MST5nBgA+mtkHPmemMzya2Tu+Z2boPWcmXT6amZC8Z/7gPZqZAD0AALw7zUyOPc1MeT4AQE69mtmaPjMTXz5nplQ+zdoBPgDgDj5nZgY9AICzPs3Mwz0AIE8+M/MbPmdmvz0AgK09M/NAPgDgur4AAK0+AIDmvTMzFLwAwJi9zayjvc1sYD6amRi9Z6aZPZo5/r3NTEE+mplIPc2scT6aWbs+APACPZpRJD7NTJM+zUwuPWdolz4AYIy9M/M6PjNzPb4AACO+zUyFPM0MWj5nBqe9Z6Z7vprZsj0AIA6+M3OMvs3M5L0zM4y8AACAvJoJ5r0zsyM+Z4ZYvgCAjb0AgCu+zczou2dmNDya2ei9mpk8vgCAEz6ameQ9mpnGvADQk72amfK9zYxGvmdmzrya2a4+zcyvvc3MrDvNzJk9Z2bWvM3MzbqaGU29AADmvTMzN72aGX+9Z2YJvTPTgL6a2dk9Z2aQPM0MDj1npoq+mtkzPjMzez7NzJ48Z2YfvWfm/j0z00k+zczevc0sID4zw6m+mikRPjPTKL3NjLC9MxOKvZrlY77NeJG+M7PzPQAwmr3NBKK9zYxtPgBAEj4zEyU+mpkRvmf2fj6amS4+AMDiPc3MY73NzHE+AED1PZoZnj3NDB4+MytdPmfmtj2a+U8+mhloPmempr2aWdY9Z2YLPpqZqLwAgPo9zZylPZp5hT6ayYs+AIAwPpoZQT5ntvY9zcymvJqZVbwzM3e9APC5PgDARj1nmpY+zUxkPgDAbz6aF2k+mpmpupp5wT6amUk+mlnuPQCAmj2amTc9mvnHPs0Mij7NTME+mhmpPWfmLj0AICG+zew/PgAAFz7NjBG+Z+YTPs1BC72Nr3I+QB1XvprkVz1aums+zUzOPZqZkbsAAEy+AOgrPgAAOLwzM7o9zfACPgCA7DzN7CQ+zcwdPGdm6b0z82q9mplBPmcmqr3NzB69mjkgPs3MDLwzMw8+zcyYvJrZiD4AwFE+AECHPTNzoT5npqm9mpkWvZqZIz1nprQ9MzNvPDMznTxnpg0+MzM9PmcmCj4zc6i9AGA3vTMzQz2amVw9mll+vACAbz4zs5e+zcyPPc2MqD4AAJg9zcRyPc2M1j0AAEY9zZxBPmfGIj6adcY9Z8aCPpqZmbkzM0o+MzMLPs3Mmrya2ZA9mpkUPQAAYL2ameW+ADQOvQAATL0AQIO+zeyIPs3Myrwzs9Q9zbywPAAAgzxn5so9Z2YUvc1cxL0z81Q8ACAGvjNDBj0AwBA+MxPBPmdmE71nZly+Z3YAPs2MxT4AwBc9mhkUvmdGYD4zs6k9MzO7u2cmmT5nZpA+AIAFPprZhD0zk4M+mhl6PQBAqj3NzBi9zSwsPmdm5DyaGSI+MzMGPTNTIj6amd09M7NVPQAofD4AAIa+ZyYsPjNzsT0zs2A+mhkMvmem0T4zM909AAAbPs3KXz4AgNE9M/MTPppxgz7NzN08M1OKPjNzDj7NjJE+M9NEPpqZyT6AugG9MzPevGdGYD7NDGs+AADsvWf2iL0zI9A9Zya7vJqZLT1nZpy8M6uAPgC4cz0zc5W9Z4aLvgCAwr1ndsc9zYzrPWduljwAuKy9M3NrvjMzkT1nZla9ABAHvwCghj0zczs+zcxEO80sxr4AIE6+AIBMvgAAc71nJhc+MzMkPTOz0r0zE3e+ZyYEPmdGFL5nZkI9M7M8vc1MND3NzGY9zSxOvpoZOr6amUy9mpmVPJrZ2r2aMRq+mmlmvjPbFr4zE7a+mvlCPmdGpb4AgI29Z6YlvjOz3b0Awme+Z8ZKvgDAcz5nZla8zUxFvWdmljszExm+mlnjPJrZIr4AQCs+MzM+vZqpi77NzP29M3NPvgDADL4zo749mlnlvZp5mz4z04g+mplevGdmG7wzI5s8Z4YuvppZqb5nxpO9mjkSPQDAMz0A4DS+Z6ISvjOzGz0AgBU+AABhPprZsL4z0/w9Z6bhPjNDgz4zMwK+AEAaPgAAHT0z05s9AJD+Pc2MNz4AwCm+zQzEPZo5PT5nxsM9Z8aCPgAApj3NzDA8ACC1Pmdm1r3NzJi8mtkKPpoZZz4zM5Q9M5NevZqZw73NzEu9AMCAPGfmtD4zM4q8Z2YOPZoZjz4AcIY+AP5aPmfmqj2aWaM+ADCBPmeGcD6aWTs+AADMu82M2z4AwFw+zYx7PjMzQb7NjIm9ACAyPgAAyDsAgII9M/MgPQAAyDrN4j0+zcxjPmd26z2aGSw9mpWQPTOzOj1nZsS9zcySuwAAvzxnZoK8zXyavmcqHD4AAGg8MzPGvJol7b1npu2+Z4bRPc3Mrj7NDMO9AAD/vQAArbwAwJW+zYz/vTMz7TuamUy9mnmYvc1c+b1nJsQ8zcyLvGcmSL6amYG9zaxevgAA5j3NjL6+mpk9PGdmGD6ameu8M6Oyvs18z70AYEG+AMBrvgBALL4zs6U+AIAHvpqZpbzNzAG+zQyEPTNXsL1n5lK9mpl5PAAA7z0zUx0+M7OTPWfGEL4AADE9zUxJvTMzKj4AIIe+zXyXvc1MHjzNjEu+mtkbvgBAqb3NWGu+Z2YmOmfmVr7NjII+zcwrPQBQMj4AMIC+zSwrvgAAxjtn5lK+M7P/vZqJJj6ahWy+mpklvc2Mbb7NTFe+Z2Y4vjOT+L3NzBw9AMABvjMzg7wzE1++mhmFvpqZjz0zMzw+AIAxvGemML4AwM49MzNIPpoZSz1npiq+M7P1OwAAz73NTFM9zcxMvJqZQbyamb+9mpkSPgAAWr2amfY7AAACvTPzcb4zI+69zcz4PQAAtL4zM+M9zYyTPpqZqb0zB4O9zUwHvWc2rj2aeW49Z8Ypvs1MkjwAQFW+mhkdvjMz4T3NjBc+AJxEvgBAU70AgO09Zyb6vZqZoj3NDBC+Z2a1vZoZyD1nZqa5zcwyPpqZKL2amUi9ABipvQDwiL6aufy9M/PiPc3MGrzNLDo+mjHWvc0sdj0AAAI9AAAivWcGlb4AUFE+zfyEPjMTEz6amZm8zbytPprZIr7NjEI+Z0aDPgAAPD0AgEg+M7OMPTPDID7NTDA9MzNfvM3suD5nlHA9MzN2PgCAvr1nZgu9mjmIPs3MaDxnZpA8M/uvPWcmwbyamSk6zYyQPTNzxj4zM7c8mpn4PTPzGz5nhro+zYowPgDAub0z04A+AAB8PM1c5j0z86c+zUyrvc3MAD4zs1c+MzMqPZoZ772a+QM+Z+a1PQCAyLyaGVQ+M3OZPZoZSr0AoDM+MxMjPs28pL0AAJK7zUwgvZoFDD5nJra9M1uMPQCAwL1nJim+M/PVPWemeL2aGec9AADMPTOzVj0AgCa+mjnaPWcmSz4AgAg9Z2Z6vc0euz3NzHO+M/N8PTNzsD6a2bU9AACAOzMzXT0zM0U+zaxnPmfmr71n5lc+mlnLPTOzBD6amck6zYwDPmcmaD4zM4c8M3P7PQAgbD0AwDu9mpn0OzMzH7vNjNY+zYyfvZqZtz3NzPs9MzN6PgD+Pz4AQLU9Z2bnPTPz4j7NTO09mll9Pmfmt7wAAHo+zYxMPjMzmT7NzAo9M1MNPs2M0T2amZy9mhkXPgCA3D1nBu+9zUyYPmcGfz7NDOi8zayEvjOzAT6aNZO9AFCUvc0ECz4zkwC+M89Rvs1MYb6auby+M9MAvgCghL7NzJq9zQzYvgCQSj7NjKU+AMBAvs3kl74zM+M6MzNDvTPznL6amXm+Z+YAvjMzW72aGfq9Z2ajPZo5Tr7NrMK+Z2YPPTOTbL6aWXy+AEBTvs1Mq73NzHq9M/PTvWeWJ74zSyW+M7PAvmcGhb5nZv29zQwTPs38dr4AALG9zYw1vZoZOL4ARNm9mpmOvDMzDb4AAFa9zYyHvTMzaD2aWXa+AACJvQAAir7NzDS6mpllvWcm4b3N7Gi9Z8YzvgAsmr1nxoK+Z2ZNvmfmzDxnvqm9Zw5kvmdm1r3NDPI9Z+aqvZrtOz7N/Am+ZyazvpqZhbwAAHQ8M/OgPTPjcb4A4Iq9Zz6RvgDAoL4zcya9Z3YkPjPDMb4A2Jq+mllKPTNjML5nZim8zYxYvTMzYj1nhvQ9Z2ZLvADABD0zczg9zSyTvQAUUD0z06O9zYwiPZpZyr1nViW+M/NLvc0sxL1n9lO+M7sUvpq5/72aCTW+zUzCvGfeXD4zS7W+mqk3Pmd2Hz4zcyE9M6OaPM2cFL5nBvY9zUyevM3cNb6amce7ANDbvWfGgT1nxri9mtlLPQDQB74AcCm9Z+aDPTMjMr4zQxM+zXxDvgCQCL5ntmI+M5ODPZq5Nr1nlj++zYxXvTOTq74AAFS9zQwMPpqZvjxnZsm9ABqavTMgLr7NzGS8zUytPTOz+j2aGa++Z3ZxPprZwD6auQ8+mhkGvppZtT3NzCO+AADIO80MPj6a2Yg+MzOtvM3M5Lua6SI+AAChvDMz5T2aGaA9Z6YJPZpZ5D0AgMm9moMaPc3sBT7NDNo9Z2advJop3z2aGQA+Z2bdPJoZAD5n5jE+Z2YnvWdm1j2ameE8Z2YpPmdkST4AYAs+M+NwPjMzXz0AQKk9mtkmPs3MvTxnZp48mpkiPWdWPD4AALC7M5MfvmemBT6amQE8AEBZPjOzxT3NzL68mpn9PQDAl73NLBc9MzOIPWdmqz3NjAI+ADubPTMTMD4AADK+zZydPc3M7DuamVe9zcwZPppZhD0zM8k9AADPvgAgmT0AQCU+MxMtvs0M+b2aORU+M7MjvjOzGb6ayYs9ALCPPWdmrz1npoY9MzvgPTOTfD4AgIY8mhlqPWfGKr4zs5I9mhnlvQAAmz1nJg8+mpkHvc3MRTwA0K49Z2ZcPTMzMD0zMy89ZyCFPprZnL1npo8+Z2aKvAAgFT4ACHC9mpmlPGfmjD4zs7Y9Z0ZIvjMzBj1nZhw9M3NLPmfm5j3NDNQ9mpl7vpoZZz0zM5M7mtkcPjNzAD4AgLe9zUxOvjP7Cj4zMws9Zz4EPpo5l72aGUC+MzM/Ps2MMD5nhmI8zczMOs3Mvj0zqwI+Z0bNvTNzmj6auaw+zUwZPZpZhr4zQyY+Z9a1PjOz9D0AAFO8Z8ZTPpoZOT7NDCw+zcyrPTMzpz4z05w+zXzLPpqZ6j3NjEk+AIDSvQBghD4zs1A9zazNPjMzNb0AgJ+9Z6YhPmdmprqauTk+mkmdvTPTeTzNjBG+zcw8PQCgBz/NTAM9mhk5Ps0aZj4zbJY+mpc1PpoZML3NTKU+Z2YmvABAvD2a2Qs+AAD5PZpohD4zMyc9M/MTPpqZXTwAEN49mtkQPgAA8Lozc38+Z2bmPJp5Gr1nZvI+AADdPACgZr3NTMc9MzORvTMznj2amU0+zUwfPs0cQz6a2YQ+mhlIvZo5l70AAMe9Z6YhvgAAsL0z8w2+ZwbHPTMDeD5nZuq5M3M7vjOTwT0AAPo8M7MHvpr5Cz7NTE4+Z2aYPGemjz0zM9o+Z8ajPQBAgD2amUE9ZyYrPQBgMT4zM/G8AIC+vQDwEj7NTIU+mlmNPQAoEz6aWb+9zcxvPQAAxDwAwEA+zUxevTOzrz3NzKg9mklqPpoVnz2a2du9mtmSPs0McT4AABg7mplOvWcmHb2auQQ+AGDiPc0M/z2aGZS+AACYuwCAsT0zM+E8Z2bkPJqZKT4AwJq+Z2acPjOzhD0A2Ac+ACBivQAA7DwzM0G8AACyPc1Mdz6amUM7MzNMPDNz2T1nZji9", + "polygraphy_class": "ndarray" + } + }, + "attributes": { + "span": 4, + "factor": 0.1 + } + } + ] +} \ No newline at end of file diff --git a/plugin/disentangledAttentionPlugin/README.md b/plugin/disentangledAttentionPlugin/README.md index eb27ae287..4a6c1afde 100644 --- a/plugin/disentangledAttentionPlugin/README.md +++ b/plugin/disentangledAttentionPlugin/README.md @@ -9,13 +9,13 @@ - [Additional Resources](#additional-resources) - [License](#license) - [Changelog](#changelog) - + ## Description This TensorRT plugin implements an efficient algorithm to perform the calculation of disentangled attention matrices for DeBERTa-variant types of Transformers. -Unlike [BERT](https://arxiv.org/abs/1810.04805) where each word is represented by one vector that sums the content embedding and position embedding, [DeBERTa](https://arxiv.org/abs/2006.03654) design first proposed the concept of disentangled attention, which uses two vectors to encode content and position respectively and forms attention weights by summing disentangled matrices. Performance gap has been identified between the new attention scheme and the original self-attention, mainly due to extra indexing and gather opertaions. Major optimizations implemented in this plugin includes: (i) fusion of gather and pointwise operataions (ii) utilizing the pattern of relative position matrix and shortcuting out-of-boundary index calculation (iii) parallel index calculation. +Unlike [BERT](https://arxiv.org/abs/1810.04805) where each word is represented by one vector that sums the content embedding and position embedding, [DeBERTa](https://arxiv.org/abs/2006.03654) design first proposed the concept of disentangled attention, which uses two vectors to encode content and position respectively and forms attention weights by summing disentangled matrices. Performance gap has been identified between the new attention scheme and the original self-attention, mainly due to extra indexing and gather operations. Major optimizations implemented in this plugin includes: (i) fusion of gather and pointwise operations (ii) utilizing the pattern of relative position matrix and shortcuting out-of-boundary index calculation (iii) parallel index calculation (iv) log tables for relative position index calculation (used for DeBERTa-V2, enabling capture of long-range dependencies without significantly increasing the number of position embeddings). -This TensorRT plugin is primarily intended to be used together with DeBERTa network (with HuggingFace [DeBERTa](https://huggingface.co/docs/transformers/model_doc/deberta) and [DeBERTa-V2](https://huggingface.co/docs/transformers/model_doc/deberta-v2) implementation), but also applies to generic architectures that adopt disentangeld attention. +This TensorRT plugin is primarily intended to be used together with DeBERTa network (with HuggingFace [DeBERTa](https://huggingface.co/docs/transformers/model_doc/deberta) and [DeBERTa-V2](https://huggingface.co/docs/transformers/model_doc/deberta-v2) implementation), but also applies to generic architectures that adopt disentangled attention. ## Structure This plugin works for network with graph node named `DisentangledAttention_TRT`. The corresponding graph modification script can be found under the `demo/DeBERTa` folder of TensorRT OSS. @@ -26,7 +26,7 @@ This plugin takes three inputs: * `data0`: Content-to-content ("c2c") Attention Matrix > **Input Shape:** `[batch_size*number_heads, sequence_length, sequence_length]` - > + > > **Data Type:** `float32` or `float16` or `int8` This is the content-to-content attention, QcKcT, which is essentially the BERT self-attention. @@ -34,7 +34,7 @@ This plugin takes three inputs: * `data1`: Content-to-position ("c2p") Attention Matrix > **Input Shape:** `[batch_size*number_heads, sequence_length, relative_distance*2]` - > + > > **Data Type:** `float32` or `float16` or `int8` This is the content-to-position attention, QcKrT. @@ -42,7 +42,7 @@ This plugin takes three inputs: * `data2`: Position-to-content ("p2c") Attention Matrix > **Input Shape:** `[batch_size*number_heads, sequence_length, relative_distance*2]` - > + > > **Data Type:** `float32` or `float16` or `int8` This is the position-to-content attention, KcQrT. Relative distance is the distance span `k` for disentangled attention. @@ -53,7 +53,7 @@ This plugin generates one output. * `result`: Disentangled Attention Matrix > **Input Shape:** `[batch_size*number_heads, sequence_length, sequence_length]` - > + > > **Data Type:** `float32` or `float16` or `int8` This is the disentangled attention matrix after applying the scaling factor. @@ -69,11 +69,12 @@ This plugin generates one output. - [DeBERTa](https://arxiv.org/abs/2006.03654) - [DeBERTa HuggingFace Implementation](https://github.com/huggingface/transformers/tree/main/src/transformers/models/deberta) - [DeBERTa-V2 HuggingFace Implementation](https://github.com/huggingface/transformers/tree/main/src/transformers/models/deberta_v2) - + ## License For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation. ## Changelog +- 2024.03: Migrated to IPluginV3 interface. The legacy plugin (version 1) using IPluginV2DynamicExt interface is maintained for backward compatibility. +- 2022.07: Added log bucket for the relative position index calculation (since DeBERTa V2). - 2022.04: This is the first release of this `README` file. -- 2022.07: Added log bucket for the relative position index calculation (since DeBERTa V2). \ No newline at end of file diff --git a/plugin/disentangledAttentionPlugin/disentangledAttentionCommon.h b/plugin/disentangledAttentionPlugin/disentangledAttentionCommon.h new file mode 100644 index 000000000..5440f4376 --- /dev/null +++ b/plugin/disentangledAttentionPlugin/disentangledAttentionCommon.h @@ -0,0 +1,48 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef TRT_DISENTANGLED_ATTENTION_COMMON_H +#define TRT_DISENTANGLED_ATTENTION_COMMON_H + +#include "NvInferPlugin.h" +#include + +namespace nvinfer1 +{ +namespace plugin +{ + +// Version 1: regular relative position index +// Version 2: log bucket relative position index +#define kDISENTANGLED_VERSION 2 +#if kDISENTANGLED_VERSION == 1 +constexpr int32_t kDISENTANGLED_TILESIZE = 32; +constexpr int32_t kDISENTANGLED_BLOCKDIMY = 8; +#elif kDISENTANGLED_VERSION == 2 +constexpr int32_t kDISENTANGLED_TILESIZE = 64; +constexpr int32_t kDISENTANGLED_BLOCKDIMY = 4; +#endif + +template +void disentangled_kernel_wrapper(TDataType const* data0, TDataType const* data1, TDataType const* data2, + TDataType* result, dim3 dimData0, dim3 dimData1, dim3 dimData2, dim3 dimResult, TDataType factor, int32_t span, + dim3 block, dim3 grid, cudaStream_t stream); + +} // namespace plugin +} // namespace nvinfer1 + +#endif // TRT_DISENTANGLED_ATTENTION_COMMON_H diff --git a/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.cpp b/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.cpp index d9bf788fa..df593c26e 100644 --- a/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.cpp +++ b/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.cpp @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -19,6 +19,7 @@ #include "NvInferPlugin.h" #include #include +#include #include using namespace nvinfer1; @@ -34,10 +35,14 @@ REGISTER_TENSORRT_PLUGIN(DisentangledAttentionPluginCreator); namespace { constexpr char const* kDEBERTA_PLUGIN_NAME{"DisentangledAttention_TRT"}; -constexpr char const* kDEBERTA_PLUGIN_VERSION{"1"}; +constexpr char const* kDEBERTA_PLUGIN_VERSION{"2"}; } // namespace -DisentangledAttentionPlugin::DisentangledAttentionPlugin() {} +DisentangledAttentionPlugin::DisentangledAttentionPlugin() + : mSpan(0) + , mFactor(0.0f) +{ +} DisentangledAttentionPlugin::DisentangledAttentionPlugin(int32_t span, float factor) : mSpan(span) @@ -45,24 +50,14 @@ DisentangledAttentionPlugin::DisentangledAttentionPlugin(int32_t span, float fac { } -DisentangledAttentionPlugin::DisentangledAttentionPlugin(void const* serialData, size_t serialLength) -{ - // Deserialize in the same order as serialization - deserialize_value(&serialData, &serialLength, &mSpan); - deserialize_value(&serialData, &serialLength, &mFactor); -} +// IPluginV3OneCore methods int32_t DisentangledAttentionPlugin::getNbOutputs() const noexcept { return 1; } -int32_t DisentangledAttentionPlugin::initialize() noexcept -{ - return 0; -} - -char const* DisentangledAttentionPlugin::getPluginType() const noexcept +char const* DisentangledAttentionPlugin::getPluginName() const noexcept { return kDEBERTA_PLUGIN_NAME; } @@ -72,211 +67,279 @@ char const* DisentangledAttentionPlugin::getPluginVersion() const noexcept return kDEBERTA_PLUGIN_VERSION; } -// IPluginV2DynamicExt Methods -nvinfer1::DimsExprs DisentangledAttentionPlugin::getOutputDimensions( - int32_t index, nvinfer1::DimsExprs const* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept +IPluginV3* DisentangledAttentionPlugin::clone() noexcept { try { - PLUGIN_VALIDATE(inputs != nullptr); - PLUGIN_VALIDATE(index == 0); // Only one output - return inputs[0]; + auto* plugin = new DisentangledAttentionPlugin(mSpan, mFactor); + plugin->setPluginNamespace(mNamespace.c_str()); + return plugin; } catch (std::exception const& e) { caughtError(e); } - return nvinfer1::DimsExprs{}; -} - -template -void DisentangledAttentionPlugin::enqueueType(nvinfer1::PluginTensorDesc const* inputDesc, - nvinfer1::PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, cudaStream_t stream, - TDataType factor) -{ - nvinfer1::Dims dims0 = inputDesc[0].dims; - nvinfer1::Dims dims1 = inputDesc[1].dims; - nvinfer1::Dims dims2 = inputDesc[2].dims; - dim3 dimData0(dims0.d[0], dims0.d[1], dims0.d[2]); - dim3 dimData1(dims1.d[0], dims1.d[1], dims1.d[2]); - dim3 dimData2(dims2.d[0], dims2.d[1], dims2.d[2]); - dim3 dimResult(dimData0); - - dim3 blockOptimized(kDISENTANGLED_TILESIZE, kDISENTANGLED_BLOCKDIMY); - dim3 gridOptimized( - (dimResult.z - 1) / kDISENTANGLED_TILESIZE + 1, (dimResult.y - 1) / kDISENTANGLED_TILESIZE + 1, dimResult.x); - - auto const* data0 = static_cast(inputs[0]); - auto const* data1 = static_cast(inputs[1]); - auto const* data2 = static_cast(inputs[2]); - auto* result = static_cast(outputs[0]); - disentangled_kernel_wrapper(data0, data1, data2, result, - dimData0, dimData1, dimData2, dimResult, factor, mSpan, blockOptimized, gridOptimized, stream); + return nullptr; } -int32_t DisentangledAttentionPlugin::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, - nvinfer1::PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, - void* /* workspace */, cudaStream_t stream) noexcept +void DisentangledAttentionPlugin::setPluginNamespace(char const* pluginNamespace) noexcept { try { - PLUGIN_VALIDATE(inputDesc != nullptr && outputDesc != nullptr && inputs != nullptr && outputs != nullptr); - - switch (inputDesc[0].type) - { - case nvinfer1::DataType::kFLOAT: - enqueueType(inputDesc, outputDesc, inputs, outputs, stream, mFactor); - break; - case nvinfer1::DataType::kHALF: - enqueueType<__half>(inputDesc, outputDesc, inputs, outputs, stream, __float2half(mFactor)); - break; - case nvinfer1::DataType::kINT8: - enqueueType(inputDesc, outputDesc, inputs, outputs, stream, static_cast(mFactor)); - break; - default: PLUGIN_VALIDATE(false, "Unsupported Datatype"); break; - } - return cudaPeekAtLastError(); + mNamespace = pluginNamespace; } catch (std::exception const& e) { caughtError(e); - return STATUS_FAILURE; } } -size_t DisentangledAttentionPlugin::getSerializationSize() const noexcept +char const* DisentangledAttentionPlugin::getPluginNamespace() const noexcept { - return sizeof(mSpan) + sizeof(mFactor); + return mNamespace.c_str(); } -void DisentangledAttentionPlugin::serialize(void* buffer) const noexcept +IPluginCapability* DisentangledAttentionPlugin::getCapabilityInterface(PluginCapabilityType type) noexcept { - serialize_value(&buffer, mSpan); - serialize_value(&buffer, mFactor); + try + { + if (type == PluginCapabilityType::kBUILD) + { + return static_cast(this); + } + if (type == PluginCapabilityType::kRUNTIME) + { + return static_cast(this); + } + PLUGIN_ASSERT(type == PluginCapabilityType::kCORE); + return static_cast(this); + } + catch (std::exception const& e) + { + caughtError(e); + } + return nullptr; } -bool DisentangledAttentionPlugin::supportsFormatCombination( - int32_t pos, nvinfer1::PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept +PluginFieldCollection const* DisentangledAttentionPlugin::getFieldsToSerialize() noexcept { + try + { + mDataToSerialize.clear(); - PLUGIN_ASSERT(inOut && pos < (nbInputs + nbOutputs)); + mDataToSerialize.emplace_back("span", &mSpan, PluginFieldType::kINT32, 1); + mDataToSerialize.emplace_back("factor", &mFactor, PluginFieldType::kFLOAT32, 1); - bool const consistentFloatPrecision - = (inOut[pos].type == inOut[0].type); // all inputs & outputs should have the same precision type + mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.fields = mDataToSerialize.data(); - return (inOut[pos].type == nvinfer1::DataType::kINT8 || inOut[pos].type == nvinfer1::DataType::kHALF - || inOut[pos].type == nvinfer1::DataType::kFLOAT) - && inOut[pos].format == nvinfer1::PluginFormat::kLINEAR && consistentFloatPrecision; + return &mFCToSerialize; + } + catch (std::exception const& e) + { + caughtError(e); + } + return nullptr; } -void DisentangledAttentionPlugin::terminate() noexcept {} +// IPluginV3OneBuild methods -void DisentangledAttentionPlugin::destroy() noexcept +int32_t DisentangledAttentionPlugin::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, + DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, + IExprBuilder& exprBuilder) noexcept { - // This gets called when the network containing plugin is destroyed - delete this; + try + { + PLUGIN_VALIDATE(inputs != nullptr); + PLUGIN_VALIDATE(nbInputs == 3); + PLUGIN_VALIDATE(outputs != nullptr); + PLUGIN_VALIDATE(nbOutputs == 1); + + // Output has the same shape as the first input + outputs[0] = inputs[0]; + + return STATUS_SUCCESS; + } + catch (std::exception const& e) + { + caughtError(e); + } + return STATUS_FAILURE; } -IPluginV2DynamicExt* DisentangledAttentionPlugin::clone() const noexcept +int32_t DisentangledAttentionPlugin::configurePlugin( + DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept { try { - auto* plugin = new DisentangledAttentionPlugin(mSpan, mFactor); - plugin->setPluginNamespace(mNamespace.c_str()); - return plugin; + PLUGIN_VALIDATE(in != nullptr && out != nullptr && nbInputs == 3 && nbOutputs == 1); + + // Validate input and output shapes + for (int32_t i = 0; i < nbInputs; i++) + { + PLUGIN_VALIDATE(in[i].desc.dims.nbDims == in[0].desc.dims.nbDims); + } + + // Check data types are consistent + PLUGIN_VALIDATE(in[0].desc.type == in[1].desc.type && in[0].desc.type == in[2].desc.type); + PLUGIN_VALIDATE(out[0].desc.type == in[0].desc.type); + + return STATUS_SUCCESS; } catch (std::exception const& e) { caughtError(e); } - return nullptr; + return STATUS_FAILURE; } -void DisentangledAttentionPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t nbInputs, - nvinfer1::DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +int32_t DisentangledAttentionPlugin::getOutputDataTypes( + DataType* outputTypes, int32_t nbOutputs, DataType const* inputTypes, int32_t nbInputs) const noexcept { try { - // inputs - PLUGIN_VALIDATE(nbInputs == 3); // 3 inputs + PLUGIN_VALIDATE(inputTypes != nullptr && outputTypes != nullptr); + PLUGIN_VALIDATE(nbInputs == 3 && nbOutputs == 1); - // check for valid input dimensions - PLUGIN_VALIDATE(in[0].desc.dims.nbDims == 3); - PLUGIN_VALIDATE(in[1].desc.dims.nbDims == 3); - PLUGIN_VALIDATE(in[2].desc.dims.nbDims == 3); + // Output has the same data type as the first input + outputTypes[0] = inputTypes[0]; - // check BN (batch_size * num_heads) dimension consistency - PLUGIN_VALIDATE(in[0].desc.dims.d[0] == in[1].desc.dims.d[0]); - PLUGIN_VALIDATE(in[0].desc.dims.d[0] == in[2].desc.dims.d[0]); + return STATUS_SUCCESS; + } + catch (std::exception const& e) + { + caughtError(e); + } + return STATUS_FAILURE; +} - // check S (sequence_length) dimension consistency - PLUGIN_VALIDATE(in[0].desc.dims.d[1] == in[1].desc.dims.d[1]); - PLUGIN_VALIDATE(in[0].desc.dims.d[1] == in[2].desc.dims.d[1]); - PLUGIN_VALIDATE(in[0].desc.dims.d[1] == in[0].desc.dims.d[2]); +bool DisentangledAttentionPlugin::supportsFormatCombination( + int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept +{ + try + { + PLUGIN_ASSERT(inOut && pos < (nbInputs + nbOutputs)); - // check K (2 * span) dimension consistency for in[1] and in[2] - PLUGIN_VALIDATE(in[1].desc.dims.d[2] == 2 * mSpan); - PLUGIN_VALIDATE(in[2].desc.dims.d[2] == 2 * mSpan); + // All inputs and outputs should have the same precision type + bool const consistentFloatPrecision = (inOut[pos].desc.type == inOut[0].desc.type); - // Outputs (same dimension as in[0]) - PLUGIN_VALIDATE(nbOutputs == 1); - PLUGIN_VALIDATE(out[0].desc.dims.nbDims == 3); - PLUGIN_VALIDATE(in[0].desc.dims.d[0] == out[0].desc.dims.d[0]); - PLUGIN_VALIDATE(in[0].desc.dims.d[1] == out[0].desc.dims.d[1]); - PLUGIN_VALIDATE(in[0].desc.dims.d[2] == out[0].desc.dims.d[2]); + return (inOut[pos].desc.type == DataType::kINT8 || inOut[pos].desc.type == DataType::kHALF + || inOut[pos].desc.type == DataType::kFLOAT) + && inOut[pos].desc.format == PluginFormat::kLINEAR && consistentFloatPrecision; } catch (std::exception const& e) { caughtError(e); } + return false; } -nvinfer1::DataType DisentangledAttentionPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept +// IPluginV3OneRuntime methods + +template +void DisentangledAttentionPlugin::enqueueType(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, + void const* const* inputs, void* const* outputs, cudaStream_t stream, TDataType factor) +{ + Dims dims0 = inputDesc[0].dims; + Dims dims1 = inputDesc[1].dims; + Dims dims2 = inputDesc[2].dims; + dim3 dimData0(dims0.d[0], dims0.d[1], dims0.d[2]); + dim3 dimData1(dims1.d[0], dims1.d[1], dims1.d[2]); + dim3 dimData2(dims2.d[0], dims2.d[1], dims2.d[2]); + dim3 dimResult(dimData0); + + dim3 blockOptimized(kDISENTANGLED_TILESIZE, kDISENTANGLED_BLOCKDIMY); + dim3 gridOptimized( + (dimResult.z - 1) / kDISENTANGLED_TILESIZE + 1, (dimResult.y - 1) / kDISENTANGLED_TILESIZE + 1, dimResult.x); + + auto const* data0 = static_cast(inputs[0]); + auto const* data1 = static_cast(inputs[1]); + auto const* data2 = static_cast(inputs[2]); + auto* result = static_cast(outputs[0]); + disentangled_kernel_wrapper(data0, data1, data2, result, + dimData0, dimData1, dimData2, dimResult, factor, mSpan, blockOptimized, gridOptimized, stream); +} + +int32_t DisentangledAttentionPlugin::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, + void const* const* inputs, void* const* outputs, void* /* workspace */, cudaStream_t stream) noexcept { try { - PLUGIN_VALIDATE(inputTypes != nullptr); - PLUGIN_VALIDATE(nbInputs > 0); - PLUGIN_VALIDATE(index == 0); - return inputTypes[0]; // version 1, same as data1; version 2, same as data0 + PLUGIN_VALIDATE(inputDesc != nullptr && outputDesc != nullptr && inputs != nullptr && outputs != nullptr); + + switch (inputDesc[0].type) + { + case DataType::kFLOAT: enqueueType(inputDesc, outputDesc, inputs, outputs, stream, mFactor); break; + case DataType::kHALF: + enqueueType<__half>(inputDesc, outputDesc, inputs, outputs, stream, __float2half(mFactor)); + break; + case DataType::kINT8: + enqueueType(inputDesc, outputDesc, inputs, outputs, stream, static_cast(mFactor)); + break; + default: PLUGIN_VALIDATE(false, "Unsupported Datatype"); break; + } + return cudaPeekAtLastError(); } catch (std::exception const& e) { caughtError(e); + return STATUS_FAILURE; } - return nvinfer1::DataType{}; } -size_t DisentangledAttentionPlugin::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, - nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t DisentangledAttentionPlugin::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, + DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept { return 0; } -void DisentangledAttentionPlugin::setPluginNamespace(char const* libNamespace) noexcept +int32_t DisentangledAttentionPlugin::onShapeChange( + PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) noexcept { try { - PLUGIN_VALIDATE(libNamespace != nullptr); - mNamespace = libNamespace; + PLUGIN_VALIDATE(inputs != nullptr && outputs != nullptr); + PLUGIN_VALIDATE(nbInputs == 3 && nbOutputs == 1); + + // Check that all inputs have the same data type + DataType dataType = inputs[0].type; + PLUGIN_VALIDATE(inputs[1].type == dataType && inputs[2].type == dataType); + + // Check that output has the same data type + PLUGIN_VALIDATE(outputs[0].type == dataType); + + // Validate dimensions + PLUGIN_VALIDATE(inputs[0].dims.nbDims == inputs[1].dims.nbDims); + PLUGIN_VALIDATE(inputs[0].dims.nbDims == inputs[2].dims.nbDims); + PLUGIN_VALIDATE(outputs[0].dims.nbDims == inputs[0].dims.nbDims); + + return STATUS_SUCCESS; } catch (std::exception const& e) { caughtError(e); } + return STATUS_FAILURE; } -char const* DisentangledAttentionPlugin::getPluginNamespace() const noexcept +IPluginV3* DisentangledAttentionPlugin::attachToContext(IPluginResourceContext* context) noexcept { - return mNamespace.c_str(); + try + { + return this->clone(); + } + catch (std::exception const& e) + { + caughtError(e); + } + return nullptr; } +// -------------------- Creator class Implementation -------------------- + DisentangledAttentionPluginCreator::DisentangledAttentionPluginCreator() { mPluginAttributes.clear(); - - // consistent with the ONNX model attr fields mPluginAttributes.emplace_back(PluginField("span", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("factor", nullptr, PluginFieldType::kFLOAT32, 1)); @@ -299,53 +362,40 @@ PluginFieldCollection const* DisentangledAttentionPluginCreator::getFieldNames() return &mFC; } -char const* DisentangledAttentionPluginCreator::getPluginNamespace() const noexcept -{ - return mNamespace.c_str(); -} - -void DisentangledAttentionPluginCreator::setPluginNamespace(char const* libNamespace) noexcept -{ - try - { - PLUGIN_VALIDATE(libNamespace != nullptr); - mNamespace = libNamespace; - } - catch (std::exception const& e) - { - caughtError(e); - } -} - -IPluginV2DynamicExt* DisentangledAttentionPluginCreator::createPlugin( - char const* /*name*/, PluginFieldCollection const* fc) noexcept +IPluginV3* DisentangledAttentionPluginCreator::createPlugin( + char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept { try { PLUGIN_VALIDATE(fc != nullptr); + PluginField const* fields = fc->fields; + std::optional span; + std::optional factor; - // Set default invalid values (for assert in case when attributes are missing) - int32_t span = 0; - float factor = 0.F; - for (int32_t i = 0; i < fc->nbFields; i++) + for (int32_t i = 0; i < fc->nbFields; ++i) { - std::string fieldName = fc->fields[i].name; - if (fieldName.compare("span") == 0) + char const* attrName = fields[i].name; + if (!strcmp(attrName, "span")) { - span = *static_cast(fc->fields[i].data); + PLUGIN_VALIDATE(fields[i].type == PluginFieldType::kINT32); + span = *static_cast(fields[i].data); } - if (fieldName.compare("factor") == 0) + else if (!strcmp(attrName, "factor")) { - factor = *static_cast(fc->fields[i].data); + PLUGIN_VALIDATE(fields[i].type == PluginFieldType::kFLOAT32); + factor = *static_cast(fields[i].data); } } - PLUGIN_VALIDATE(span >= 0); - PLUGIN_VALIDATE(factor > 0.F && factor < 1.F); // factor is 1/sqrt(3d), therefore must less than 1 + // Validate that all required fields were found + PLUGIN_VALIDATE(span.has_value(), "Required attribute 'span' not found"); + PLUGIN_VALIDATE(factor.has_value(), "Required attribute 'factor' not found"); + PLUGIN_VALIDATE(span.value() >= 0); + PLUGIN_VALIDATE( + factor.value() > 0.F && factor.value() < 1.F); // factor is 1/sqrt(3d), therefore must less than 1 - DisentangledAttentionPlugin* plugin = new DisentangledAttentionPlugin(span, factor); + auto* plugin = new DisentangledAttentionPlugin(span.value(), factor.value()); plugin->setPluginNamespace(mNamespace.c_str()); - return plugin; } catch (std::exception const& e) @@ -355,19 +405,19 @@ IPluginV2DynamicExt* DisentangledAttentionPluginCreator::createPlugin( return nullptr; } -IPluginV2DynamicExt* DisentangledAttentionPluginCreator::deserializePlugin( - char const* /*name*/, void const* serialData, size_t serialLength) noexcept +void DisentangledAttentionPluginCreator::setPluginNamespace(char const* pluginNamespace) noexcept { try { - DisentangledAttentionPlugin* plugin = new DisentangledAttentionPlugin(serialData, serialLength); - plugin->setPluginNamespace(mNamespace.c_str()); - - return plugin; + mNamespace = pluginNamespace; } catch (std::exception const& e) { caughtError(e); } - return nullptr; +} + +char const* DisentangledAttentionPluginCreator::getPluginNamespace() const noexcept +{ + return mNamespace.c_str(); } diff --git a/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.h b/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.h index f9d01a4c3..73d128b07 100644 --- a/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.h +++ b/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -21,6 +21,7 @@ #include "NvInferPlugin.h" #include "common/plugin.h" #include "common/serialize.hpp" +#include "disentangledAttentionCommon.h" #include #include #include @@ -36,23 +37,10 @@ namespace plugin // using namespace nvinfer1; -// Version 1: regular relative position index -// Version 2: log bucket relative position index -#define kDISENTANGLED_VERSION 2 -#if kDISENTANGLED_VERSION == 1 -constexpr int32_t kDISENTANGLED_TILESIZE = 32; -constexpr int32_t kDISENTANGLED_BLOCKDIMY = 8; -#elif kDISENTANGLED_VERSION == 2 -constexpr int32_t kDISENTANGLED_TILESIZE = 64; -constexpr int32_t kDISENTANGLED_BLOCKDIMY = 4; -#endif - -template -void disentangled_kernel_wrapper(TDataType const* data0, TDataType const* data1, TDataType const* data2, - TDataType* result, dim3 dimData0, dim3 dimData1, dim3 dimData2, dim3 dimResult, TDataType factor, int32_t span, - dim3 block, dim3 grid, cudaStream_t stream); - -class DisentangledAttentionPlugin final : public nvinfer1::IPluginV2DynamicExt +class DisentangledAttentionPlugin : public nvinfer1::IPluginV3, + public nvinfer1::IPluginV3OneCore, + public nvinfer1::IPluginV3OneBuild, + public nvinfer1::IPluginV3OneRuntime { public: DisentangledAttentionPlugin(); @@ -61,47 +49,38 @@ class DisentangledAttentionPlugin final : public nvinfer1::IPluginV2DynamicExt DisentangledAttentionPlugin(void const* serialData, size_t serialLength); - int32_t getNbOutputs() const noexcept override; - - // DynamicExt plugins returns DimsExprs class instead of Dims - nvinfer1::DimsExprs getOutputDimensions(int32_t index, nvinfer1::DimsExprs const* inputs, int32_t nbInputDims, - nvinfer1::IExprBuilder& exprBuilder) noexcept override; // determine output dims based on input info - - int32_t initialize() noexcept override; - - void terminate() noexcept override; + // Destructor + virtual ~DisentangledAttentionPlugin(){}; - size_t getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, - nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept override; + // IPluginV3OneCore methods + int32_t getNbOutputs() const noexcept override; + void setPluginNamespace(char const* pluginNamespace) noexcept; + char const* getPluginNamespace() const noexcept override; + char const* getPluginName() const noexcept override; + char const* getPluginVersion() const noexcept override; + nvinfer1::IPluginV3* clone() noexcept override; + nvinfer1::PluginFieldCollection const* getFieldsToSerialize() noexcept override; + nvinfer1::IPluginCapability* getCapabilityInterface(nvinfer1::PluginCapabilityType type) noexcept override; + + // IPluginV3OneBuild methods + int32_t getOutputShapes(nvinfer1::DimsExprs const* inputs, int32_t nbInputs, nvinfer1::DimsExprs const* shapeInputs, + int32_t nbShapeInputs, nvinfer1::DimsExprs* outputs, int32_t nbOutputs, + nvinfer1::IExprBuilder& exprBuilder) noexcept override; + int32_t configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t nbInputs, + nvinfer1::DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept override; + int32_t getOutputDataTypes(nvinfer1::DataType* outputTypes, int32_t nbOutputs, nvinfer1::DataType const* inputTypes, + int32_t nbInputs) const noexcept override; + bool supportsFormatCombination(int32_t pos, nvinfer1::DynamicPluginTensorDesc const* inOut, int32_t nbInputs, + int32_t nbOutputs) noexcept override; - // This is where the plugin work is done. + // IPluginV3OneRuntime methods int32_t enqueue(nvinfer1::PluginTensorDesc const* inputDesc, nvinfer1::PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept override; - - size_t getSerializationSize() const noexcept override; - - void serialize(void* buffer) const noexcept override; - - bool supportsFormatCombination( - int32_t pos, nvinfer1::PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override; - - char const* getPluginType() const noexcept override; - - char const* getPluginVersion() const noexcept override; - - nvinfer1::IPluginV2DynamicExt* clone() const noexcept override; - - void destroy() noexcept override; - - nvinfer1::DataType getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept override; - - void setPluginNamespace(char const* pluginNamespace) noexcept override; - - char const* getPluginNamespace() const noexcept override; - - void configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t nbInputs, - nvinfer1::DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept override; + size_t getWorkspaceSize(nvinfer1::DynamicPluginTensorDesc const* inputs, int32_t nbInputs, + nvinfer1::DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept override; + int32_t onShapeChange(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, + nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) noexcept override; + nvinfer1::IPluginV3* attachToContext(nvinfer1::IPluginResourceContext* context) noexcept override; private: // Helper method for enqueue() @@ -115,13 +94,12 @@ class DisentangledAttentionPlugin final : public nvinfer1::IPluginV2DynamicExt int32_t mSpan; float mFactor; - using IPluginV2::getOutputDimensions; - using IPluginV2::getWorkspaceSize; - using IPluginV2::enqueue; - using IPluginV2Ext::configurePlugin; + // Field serialization storage + std::vector mDataToSerialize; + nvinfer1::PluginFieldCollection mFCToSerialize; }; -class DisentangledAttentionPluginCreator : public nvinfer1::IPluginCreator +class DisentangledAttentionPluginCreator : public nvinfer1::IPluginCreatorV3One { public: DisentangledAttentionPluginCreator(); @@ -134,13 +112,10 @@ class DisentangledAttentionPluginCreator : public nvinfer1::IPluginCreator nvinfer1::PluginFieldCollection const* getFieldNames() noexcept override; - nvinfer1::IPluginV2DynamicExt* createPlugin( - char const* name, nvinfer1::PluginFieldCollection const* fc) noexcept override; - - nvinfer1::IPluginV2DynamicExt* deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept override; + nvinfer1::IPluginV3* createPlugin( + char const* name, nvinfer1::PluginFieldCollection const* fc, nvinfer1::TensorRTPhase phase) noexcept override; - void setPluginNamespace(char const* pluginNamespace) noexcept override; + void setPluginNamespace(char const* pluginNamespace) noexcept; char const* getPluginNamespace() const noexcept override; @@ -149,6 +124,7 @@ class DisentangledAttentionPluginCreator : public nvinfer1::IPluginCreator static std::vector mPluginAttributes; std::string mNamespace; }; + } // namespace plugin } // namespace nvinfer1 diff --git a/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.cpp b/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.cpp new file mode 100644 index 000000000..c5e20da59 --- /dev/null +++ b/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.cpp @@ -0,0 +1,376 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Legacy version of the plugin maintained for backward compatibility. + * This implementation is based on IPluginV2 interfaces. + */ +#include "disentangledAttentionPluginLegacy.h" +#include "NvInferPlugin.h" +#include +#include +#include + +using namespace nvinfer1; +using namespace nvinfer1::plugin; + +// Static class fields initialization +PluginFieldCollection DisentangledAttentionPluginCreatorLegacy::mFC{}; +std::vector DisentangledAttentionPluginCreatorLegacy::mPluginAttributes; + +REGISTER_TENSORRT_PLUGIN(DisentangledAttentionPluginCreatorLegacy); + +namespace +{ +constexpr char const* kDEBERTA_PLUGIN_NAME{"DisentangledAttention_TRT"}; +constexpr char const* kDEBERTA_PLUGIN_VERSION{"1"}; +} // namespace + +DisentangledAttentionPluginLegacy::DisentangledAttentionPluginLegacy() {} + +DisentangledAttentionPluginLegacy::DisentangledAttentionPluginLegacy(int32_t span, float factor) + : mSpan(span) + , mFactor(factor) +{ +} + +DisentangledAttentionPluginLegacy::DisentangledAttentionPluginLegacy(void const* serialData, size_t serialLength) +{ + // Deserialize in the same order as serialization + deserialize_value(&serialData, &serialLength, &mSpan); + deserialize_value(&serialData, &serialLength, &mFactor); +} + +int32_t DisentangledAttentionPluginLegacy::getNbOutputs() const noexcept +{ + return 1; +} + +int32_t DisentangledAttentionPluginLegacy::initialize() noexcept +{ + return 0; +} + +char const* DisentangledAttentionPluginLegacy::getPluginType() const noexcept +{ + return kDEBERTA_PLUGIN_NAME; +} + +char const* DisentangledAttentionPluginLegacy::getPluginVersion() const noexcept +{ + return kDEBERTA_PLUGIN_VERSION; +} + +// IPluginV2DynamicExt Methods +nvinfer1::DimsExprs DisentangledAttentionPluginLegacy::getOutputDimensions( + int32_t index, nvinfer1::DimsExprs const* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept +{ + try + { + PLUGIN_VALIDATE(inputs != nullptr); + PLUGIN_VALIDATE(index == 0); // Only one output + return inputs[0]; + } + catch (std::exception const& e) + { + caughtError(e); + } + return nvinfer1::DimsExprs{}; +} + +template +void DisentangledAttentionPluginLegacy::enqueueType(nvinfer1::PluginTensorDesc const* inputDesc, + nvinfer1::PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, cudaStream_t stream, + TDataType factor) +{ + nvinfer1::Dims dims0 = inputDesc[0].dims; + nvinfer1::Dims dims1 = inputDesc[1].dims; + nvinfer1::Dims dims2 = inputDesc[2].dims; + dim3 dimData0(dims0.d[0], dims0.d[1], dims0.d[2]); + dim3 dimData1(dims1.d[0], dims1.d[1], dims1.d[2]); + dim3 dimData2(dims2.d[0], dims2.d[1], dims2.d[2]); + dim3 dimResult(dimData0); + + dim3 blockOptimized(kDISENTANGLED_TILESIZE, kDISENTANGLED_BLOCKDIMY); + dim3 gridOptimized( + (dimResult.z - 1) / kDISENTANGLED_TILESIZE + 1, (dimResult.y - 1) / kDISENTANGLED_TILESIZE + 1, dimResult.x); + + auto const* data0 = static_cast(inputs[0]); + auto const* data1 = static_cast(inputs[1]); + auto const* data2 = static_cast(inputs[2]); + auto* result = static_cast(outputs[0]); + disentangled_kernel_wrapper(data0, data1, data2, result, + dimData0, dimData1, dimData2, dimResult, factor, mSpan, blockOptimized, gridOptimized, stream); +} + +int32_t DisentangledAttentionPluginLegacy::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, + nvinfer1::PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, + void* /* workspace */, cudaStream_t stream) noexcept +{ + try + { + PLUGIN_VALIDATE(inputDesc != nullptr && outputDesc != nullptr && inputs != nullptr && outputs != nullptr); + + switch (inputDesc[0].type) + { + case nvinfer1::DataType::kFLOAT: + enqueueType(inputDesc, outputDesc, inputs, outputs, stream, mFactor); + break; + case nvinfer1::DataType::kHALF: + enqueueType<__half>(inputDesc, outputDesc, inputs, outputs, stream, __float2half(mFactor)); + break; + case nvinfer1::DataType::kINT8: + enqueueType(inputDesc, outputDesc, inputs, outputs, stream, static_cast(mFactor)); + break; + default: PLUGIN_VALIDATE(false, "Unsupported Datatype"); break; + } + return cudaPeekAtLastError(); + } + catch (std::exception const& e) + { + caughtError(e); + return STATUS_FAILURE; + } +} + +size_t DisentangledAttentionPluginLegacy::getSerializationSize() const noexcept +{ + return sizeof(mSpan) + sizeof(mFactor); +} + +void DisentangledAttentionPluginLegacy::serialize(void* buffer) const noexcept +{ + serialize_value(&buffer, mSpan); + serialize_value(&buffer, mFactor); +} + +bool DisentangledAttentionPluginLegacy::supportsFormatCombination( + int32_t pos, nvinfer1::PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept +{ + + PLUGIN_ASSERT(inOut && pos < (nbInputs + nbOutputs)); + + bool const consistentFloatPrecision + = (inOut[pos].type == inOut[0].type); // all inputs & outputs should have the same precision type + + return (inOut[pos].type == nvinfer1::DataType::kINT8 || inOut[pos].type == nvinfer1::DataType::kHALF + || inOut[pos].type == nvinfer1::DataType::kFLOAT) + && inOut[pos].format == nvinfer1::PluginFormat::kLINEAR && consistentFloatPrecision; +} + +void DisentangledAttentionPluginLegacy::terminate() noexcept {} + +void DisentangledAttentionPluginLegacy::destroy() noexcept +{ + // This gets called when the network containing plugin is destroyed + delete this; +} + +IPluginV2DynamicExt* DisentangledAttentionPluginLegacy::clone() const noexcept +{ + try + { + auto* plugin = new DisentangledAttentionPluginLegacy(mSpan, mFactor); + plugin->setPluginNamespace(mNamespace.c_str()); + return plugin; + } + catch (std::exception const& e) + { + caughtError(e); + } + return nullptr; +} + +void DisentangledAttentionPluginLegacy::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t nbInputs, + nvinfer1::DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +{ + try + { + // inputs + PLUGIN_VALIDATE(nbInputs == 3); // 3 inputs + + // check for valid input dimensions + PLUGIN_VALIDATE(in[0].desc.dims.nbDims == 3); + PLUGIN_VALIDATE(in[1].desc.dims.nbDims == 3); + PLUGIN_VALIDATE(in[2].desc.dims.nbDims == 3); + + // check BN (batch_size * num_heads) dimension consistency + PLUGIN_VALIDATE(in[0].desc.dims.d[0] == in[1].desc.dims.d[0]); + PLUGIN_VALIDATE(in[0].desc.dims.d[0] == in[2].desc.dims.d[0]); + + // check S (sequence_length) dimension consistency + PLUGIN_VALIDATE(in[0].desc.dims.d[1] == in[1].desc.dims.d[1]); + PLUGIN_VALIDATE(in[0].desc.dims.d[1] == in[2].desc.dims.d[1]); + PLUGIN_VALIDATE(in[0].desc.dims.d[1] == in[0].desc.dims.d[2]); + + // check K (2 * span) dimension consistency for in[1] and in[2] + PLUGIN_VALIDATE(in[1].desc.dims.d[2] == 2 * mSpan); + PLUGIN_VALIDATE(in[2].desc.dims.d[2] == 2 * mSpan); + + // Outputs (same dimension as in[0]) + PLUGIN_VALIDATE(nbOutputs == 1); + PLUGIN_VALIDATE(out[0].desc.dims.nbDims == 3); + PLUGIN_VALIDATE(in[0].desc.dims.d[0] == out[0].desc.dims.d[0]); + PLUGIN_VALIDATE(in[0].desc.dims.d[1] == out[0].desc.dims.d[1]); + PLUGIN_VALIDATE(in[0].desc.dims.d[2] == out[0].desc.dims.d[2]); + } + catch (std::exception const& e) + { + caughtError(e); + } +} + +nvinfer1::DataType DisentangledAttentionPluginLegacy::getOutputDataType( + int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept +{ + try + { + PLUGIN_VALIDATE(inputTypes != nullptr); + PLUGIN_VALIDATE(nbInputs > 0); + PLUGIN_VALIDATE(index == 0); + return inputTypes[0]; // version 1, same as data1; version 2, same as data0 + } + catch (std::exception const& e) + { + caughtError(e); + } + return nvinfer1::DataType{}; +} + +size_t DisentangledAttentionPluginLegacy::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, + nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +{ + return 0; +} + +void DisentangledAttentionPluginLegacy::setPluginNamespace(char const* libNamespace) noexcept +{ + try + { + PLUGIN_VALIDATE(libNamespace != nullptr); + mNamespace = libNamespace; + } + catch (std::exception const& e) + { + caughtError(e); + } +} + +char const* DisentangledAttentionPluginLegacy::getPluginNamespace() const noexcept +{ + return mNamespace.c_str(); +} + +DisentangledAttentionPluginCreatorLegacy::DisentangledAttentionPluginCreatorLegacy() +{ + mPluginAttributes.clear(); + + // consistent with the ONNX model attr fields + mPluginAttributes.emplace_back(PluginField("span", nullptr, PluginFieldType::kINT32, 1)); + mPluginAttributes.emplace_back(PluginField("factor", nullptr, PluginFieldType::kFLOAT32, 1)); + + mFC.nbFields = mPluginAttributes.size(); + mFC.fields = mPluginAttributes.data(); +} + +char const* DisentangledAttentionPluginCreatorLegacy::getPluginName() const noexcept +{ + return kDEBERTA_PLUGIN_NAME; +} + +char const* DisentangledAttentionPluginCreatorLegacy::getPluginVersion() const noexcept +{ + return kDEBERTA_PLUGIN_VERSION; +} + +PluginFieldCollection const* DisentangledAttentionPluginCreatorLegacy::getFieldNames() noexcept +{ + return &mFC; +} + +char const* DisentangledAttentionPluginCreatorLegacy::getPluginNamespace() const noexcept +{ + return mNamespace.c_str(); +} + +void DisentangledAttentionPluginCreatorLegacy::setPluginNamespace(char const* libNamespace) noexcept +{ + try + { + PLUGIN_VALIDATE(libNamespace != nullptr); + mNamespace = libNamespace; + } + catch (std::exception const& e) + { + caughtError(e); + } +} + +IPluginV2DynamicExt* DisentangledAttentionPluginCreatorLegacy::createPlugin( + char const* /*name*/, PluginFieldCollection const* fc) noexcept +{ + try + { + PLUGIN_VALIDATE(fc != nullptr); + + // Set default invalid values (for assert in case when attributes are missing) + int32_t span = 0; + float factor = 0.F; + for (int32_t i = 0; i < fc->nbFields; i++) + { + std::string fieldName = fc->fields[i].name; + if (fieldName.compare("span") == 0) + { + span = *static_cast(fc->fields[i].data); + } + if (fieldName.compare("factor") == 0) + { + factor = *static_cast(fc->fields[i].data); + } + } + + PLUGIN_VALIDATE(span >= 0); + PLUGIN_VALIDATE(factor > 0.F && factor < 1.F); // factor is 1/sqrt(3d), therefore must less than 1 + + DisentangledAttentionPluginLegacy* plugin = new DisentangledAttentionPluginLegacy(span, factor); + plugin->setPluginNamespace(mNamespace.c_str()); + + return plugin; + } + catch (std::exception const& e) + { + caughtError(e); + } + return nullptr; +} + +IPluginV2DynamicExt* DisentangledAttentionPluginCreatorLegacy::deserializePlugin( + char const* /*name*/, void const* serialData, size_t serialLength) noexcept +{ + try + { + DisentangledAttentionPluginLegacy* plugin = new DisentangledAttentionPluginLegacy(serialData, serialLength); + plugin->setPluginNamespace(mNamespace.c_str()); + + return plugin; + } + catch (std::exception const& e) + { + caughtError(e); + } + return nullptr; +} diff --git a/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.h b/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.h new file mode 100644 index 000000000..0155ec596 --- /dev/null +++ b/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.h @@ -0,0 +1,144 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DISENTANGLEDATTENTIONPLUGIN_LEGACY_PLUGIN_H +#define DISENTANGLEDATTENTIONPLUGIN_LEGACY_PLUGIN_H + +/* + * Legacy version of the plugin maintained for backward compatibility. + * This implementation is based on IPluginV2 interfaces. + */ +#include "NvInferPlugin.h" +#include "common/plugin.h" +#include "common/serialize.hpp" +#include "disentangledAttentionCommon.h" +#include +#include +#include +#include + +// One of the preferred ways of making TensorRT to be able to see +// our custom layer requires extending IPluginV2 and IPluginCreator classes. +// For requirements for overriden functions, check TensorRT API docs. +namespace nvinfer1 +{ +namespace plugin +{ + +// using namespace nvinfer1; + +class DisentangledAttentionPluginLegacy : public nvinfer1::IPluginV2DynamicExt +{ +public: + DisentangledAttentionPluginLegacy(); + + DisentangledAttentionPluginLegacy(int32_t span, float factor); + + DisentangledAttentionPluginLegacy(void const* serialData, size_t serialLength); + + int32_t getNbOutputs() const noexcept override; + + // DynamicExt plugins returns DimsExprs class instead of Dims + nvinfer1::DimsExprs getOutputDimensions(int32_t index, nvinfer1::DimsExprs const* inputs, int32_t nbInputDims, + nvinfer1::IExprBuilder& exprBuilder) noexcept override; // determine output dims based on input info + + int32_t initialize() noexcept override; + + void terminate() noexcept override; + + size_t getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, + nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept override; + + // This is where the plugin work is done. + int32_t enqueue(nvinfer1::PluginTensorDesc const* inputDesc, nvinfer1::PluginTensorDesc const* outputDesc, + void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept override; + + size_t getSerializationSize() const noexcept override; + + void serialize(void* buffer) const noexcept override; + + bool supportsFormatCombination( + int32_t pos, nvinfer1::PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override; + + char const* getPluginType() const noexcept override; + + char const* getPluginVersion() const noexcept override; + + nvinfer1::IPluginV2DynamicExt* clone() const noexcept override; + + void destroy() noexcept override; + + nvinfer1::DataType getOutputDataType( + int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept override; + + void setPluginNamespace(char const* pluginNamespace) noexcept override; + + char const* getPluginNamespace() const noexcept override; + + void configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t nbInputs, + nvinfer1::DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept override; + +private: + // Helper method for enqueue() + template + void enqueueType(nvinfer1::PluginTensorDesc const* inputDesc, nvinfer1::PluginTensorDesc const* outputDesc, + void const* const* inputs, void* const* outputs, cudaStream_t stream, TDataType factor); + + std::string mNamespace; + + // attributes + int32_t mSpan; + float mFactor; + + using IPluginV2::getOutputDimensions; + using IPluginV2::getWorkspaceSize; + using IPluginV2::enqueue; + using IPluginV2Ext::configurePlugin; +}; + +class DisentangledAttentionPluginCreatorLegacy : public nvinfer1::IPluginCreator +{ +public: + DisentangledAttentionPluginCreatorLegacy(); + + ~DisentangledAttentionPluginCreatorLegacy() override = default; + + char const* getPluginName() const noexcept override; + + char const* getPluginVersion() const noexcept override; + + nvinfer1::PluginFieldCollection const* getFieldNames() noexcept override; + + nvinfer1::IPluginV2DynamicExt* createPlugin( + char const* name, nvinfer1::PluginFieldCollection const* fc) noexcept override; + + nvinfer1::IPluginV2DynamicExt* deserializePlugin( + char const* name, void const* serialData, size_t serialLength) noexcept override; + + void setPluginNamespace(char const* pluginNamespace) noexcept override; + + char const* getPluginNamespace() const noexcept override; + +private: + static nvinfer1::PluginFieldCollection mFC; + static std::vector mPluginAttributes; + std::string mNamespace; +}; +} // namespace plugin +} // namespace nvinfer1 + +#endif // DISENTANGLEDATTENTIONPLUGIN_LEGACY_PLUGIN_H diff --git a/plugin/disentangledAttentionPlugin/disentangledKernel.cu b/plugin/disentangledAttentionPlugin/disentangledKernel.cu index 8a2d0b76e..7e926d11e 100644 --- a/plugin/disentangledAttentionPlugin/disentangledKernel.cu +++ b/plugin/disentangledAttentionPlugin/disentangledKernel.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,9 +15,10 @@ * limitations under the License. */ -#include "disentangledAttentionPlugin.h" +#include "disentangledAttentionCommon.h" #include #include +#include #define IND(i, j, k, dim) \ ((i) *dim.y * dim.z + (j) *dim.z + (k)) // caveat: must use brackets around var name! otherwise IND(i,j+3,k,dim) = diff --git a/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.h b/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.h index 58e072897..3ca88f8dc 100644 --- a/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.h +++ b/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); diff --git a/plugin/modulatedDeformConvPlugin/CMakeLists.txt b/plugin/modulatedDeformConvPlugin/CMakeLists.txt index 4a4dee8f6..268a8070b 100644 --- a/plugin/modulatedDeformConvPlugin/CMakeLists.txt +++ b/plugin/modulatedDeformConvPlugin/CMakeLists.txt @@ -23,4 +23,6 @@ add_plugin_source( modulatedDeformConvPlugin.h modulatedDeformConvPluginKernel.cu modulatedDeformConvPluginKernel.h + modulatedDeformConvPluginLegacy.cpp + modulatedDeformConvPluginLegacy.h ) diff --git a/plugin/modulatedDeformConvPlugin/CustomModulatedDeformConv2d_PluginConfig.yaml b/plugin/modulatedDeformConvPlugin/CustomModulatedDeformConv2d_PluginConfig.yaml index ef4b867f6..198a3948b 100644 --- a/plugin/modulatedDeformConvPlugin/CustomModulatedDeformConv2d_PluginConfig.yaml +++ b/plugin/modulatedDeformConvPlugin/CustomModulatedDeformConv2d_PluginConfig.yaml @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -16,9 +16,9 @@ # --- name: ModulatedDeformConv2d -interface: "IPluginV2DynamicExt" versions: "1": + interface: "IPluginV2DynamicExt" inputs: - x - offset @@ -98,32 +98,32 @@ versions: stride: min: "=1, =1" max: "=pinf, =pinf" - padding: + padding: min: "=0, =0" max: "=pinf, =pinf" - dilation: + dilation: min: "=1, =1" max: "=pinf, =pinf" - group: + group: min: "=1" max: "=pinf" - deformable_group: + deformable_group: min: "=1" max: "=pinf" attribute_dim_range: stride: min: "=2" max: "=2" - padding: + padding: min: "=2" max: "=2" - dilation: + dilation: min: "=2" max: "=2" - group: + group: min: "=1" max: "=1" - deformable_group: + deformable_group: min: "=1" max: "=1" attributes_required: @@ -145,5 +145,134 @@ versions: bias: float32 attribute_options: [] output_types: - output: float32 + output: float32 + "2": + interface: "IPluginV3" + inputs: + - x + - offset + - mask + - weight + - bias + outputs: + - output + input_dims: + x: 4 + offset: 4 + mask: 4 + weight: 4 + bias: 1 + input_dim_constraints: + - "offset_0 == x_0" + - "mask_0 == x_0" + - "bias_0 == weight_0" + - "mask_2 == offset_2" + - "mask_3 == offset_3" + input_dim_range: + x: + min: "=1, =1, =1, =1" + max: "=pinf, =pinf, =pinf, =pinf" + offset: + min: "=1, =2, =1, =1" + max: "=pinf, =pinf, =pinf, =pinf" + mask: + min: "=1, =1, =1, =1" + max: "=pinf, =pinf, =pinf, =pinf" + weight: + min: "=1, =1, =1, =1" + max: "=pinf, =pinf, =pinf, =pinf" + bias: + min: "=1" + max: "=pinf" + supported_input_types: + combination1: + x: float32 + offset: float32 + mask: float32 + weight: float32 + bias: float32 + combination2: + x: float16 + offset: float16 + mask: float16 + weight: float16 + bias: float16 + output_dims: + output: "mask_0, weight_0, mask_2, mask_3" + attributes: + - stride + - padding + - dilation + - group + - deformable_group + attribute_types: + stride: int32 + padding: int32 + dilation: int32 + group: int32 + deformable_group: int32 + attribute_dims: + stride: 2 + padding: 2 + dilation: 2 + group: 1 + deformable_group: 1 + attribute_length: + stride: 2 + padding: 2 + dilation: 2 + group: 1 + deformable_group: 1 + attribute_options: + stride: + min: "=1, =1" + max: "=pinf, =pinf" + padding: + min: "=0, =0" + max: "=pinf, =pinf" + dilation: + min: "=1, =1" + max: "=pinf, =pinf" + group: + min: "=1" + max: "=pinf" + deformable_group: + min: "=1" + max: "=pinf" + attribute_dim_range: + stride: + min: "=2" + max: "=2" + padding: + min: "=2" + max: "=2" + dilation: + min: "=2" + max: "=2" + group: + min: "=1" + max: "=1" + deformable_group: + min: "=1" + max: "=1" + attributes_required: + - stride + - padding + - dilation + - group + - deformable_group + golden_io_path: "plugin/modulatedDeformConvPlugin/CustomModulatedDeformConv2d_PluginGoldenIO.json" + abs_tol: 1e-5 + rel_tol: 1e-5 + configs: + config1: + input_types: + x: float32 + offset: float32 + mask: float32 + weight: float32 + bias: float32 + attribute_options: [] + output_types: + output: float32 ... diff --git a/plugin/modulatedDeformConvPlugin/README.md b/plugin/modulatedDeformConvPlugin/README.md index 17502c67b..f30de925c 100644 --- a/plugin/modulatedDeformConvPlugin/README.md +++ b/plugin/modulatedDeformConvPlugin/README.md @@ -39,7 +39,7 @@ This plugin generates one output tensor of shape `[batch_size, output_channels, ## Parameters This plugin has the plugin creator class `ModulatedDeformableConvPluginDynamicCreator` and the plugin class `ModulatedDeformableConvPluginDynamic`. - + The following parameters are used to create a `ModulatedDeformableConvPluginDynamic` instance: | Type | Parameter | Description @@ -63,9 +63,8 @@ The following resources provide a deeper understanding of the `modulatedDeformCo For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation. ## Changelog - -Jan 2023: -This is the first release of this `README.md` file. +- April 2025: Added version 2 of the plugin that uses the IPluginV3 interface. The version 1 (using IPluginV2DynamicExt interface) is now deprecated. The version 2 mirrors version 1 in IO and attributes. +- Jan 2023: Initial release of IPluginV2DynamicExt implementation. ## Known issues diff --git a/plugin/modulatedDeformConvPlugin/modulatedDeformConvCudaHelper.cu b/plugin/modulatedDeformConvPlugin/modulatedDeformConvCudaHelper.cu index 255e4bffd..97cef2a0d 100644 --- a/plugin/modulatedDeformConvPlugin/modulatedDeformConvCudaHelper.cu +++ b/plugin/modulatedDeformConvPlugin/modulatedDeformConvCudaHelper.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -102,27 +102,31 @@ template void memcpyPermute( half* dst, half const* src, int32_t* srcSize, int32_t* permute, int32_t srcDim, cudaStream_t stream); template -cublasStatus_t cublasGemmWrap(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, int32_t m, +cublasStatus_t cublasGemmWrap(cublasHandle_t handle, cudaStream_t stream, cublasOperation_t transa, cublasOperation_t transb, int32_t m, int32_t n, int32_t k, TScalar const* alpha, TScalar const* A, int32_t lda, TScalar const* B, int32_t ldb, TScalar const* beta, TScalar* C, int32_t ldc) { - return CUBLAS_STATUS_INTERNAL_ERROR; + return CUBLAS_STATUS_INTERNAL_ERROR; } template <> -cublasStatus_t cublasGemmWrap(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, +cublasStatus_t cublasGemmWrap(cublasHandle_t handle, cudaStream_t stream, cublasOperation_t transa, cublasOperation_t transb, int32_t m, int32_t n, int32_t k, float const* alpha, float const* A, int32_t lda, float const* B, int32_t ldb, float const* beta, float* C, int32_t ldc) { CublasWrapper& wrapper = getCublasWrapper(); + // bind the stream to cublas handle to prevent usage of default stream + wrapper.cublasSetStream(handle, stream); return wrapper.cublasSgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } template <> -cublasStatus_t cublasGemmWrap(cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, +cublasStatus_t cublasGemmWrap(cublasHandle_t handle, cudaStream_t stream, cublasOperation_t transa, cublasOperation_t transb, int32_t m, int32_t n, int32_t k, half const* alpha, half const* A, int32_t lda, half const* B, int32_t ldb, half const* beta, half* C, int32_t ldc) { CublasWrapper& wrapper = getCublasWrapper(); + // bind the stream to cublas handle to prevent usage of default stream + wrapper.cublasSetStream(handle, stream); return wrapper.cublasHgemm(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); } diff --git a/plugin/modulatedDeformConvPlugin/modulatedDeformConvCudaHelper.h b/plugin/modulatedDeformConvPlugin/modulatedDeformConvCudaHelper.h index d78ae6322..4fe1091b2 100644 --- a/plugin/modulatedDeformConvPlugin/modulatedDeformConvCudaHelper.h +++ b/plugin/modulatedDeformConvPlugin/modulatedDeformConvCudaHelper.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -47,8 +47,8 @@ void memcpyPermute( template nvinfer1::pluginInternal::cublasStatus_t cublasGemmWrap(nvinfer1::pluginInternal::cublasHandle_t handle, - nvinfer1::pluginInternal::cublasOperation_t transa, nvinfer1::pluginInternal::cublasOperation_t transb, int32_t m, - int32_t n, int32_t k, TScalar const* alpha, TScalar const* A, int32_t lda, TScalar const* B, int32_t ldb, - TScalar const* beta, TScalar* C, int32_t ldc); + cudaStream_t stream, nvinfer1::pluginInternal::cublasOperation_t transa, + nvinfer1::pluginInternal::cublasOperation_t transb, int32_t m, int32_t n, int32_t k, TScalar const* alpha, + TScalar const* A, int32_t lda, TScalar const* B, int32_t ldb, TScalar const* beta, TScalar* C, int32_t ldc); #endif // TRT_MODULATED_DEFORM_CONV_CUDA_HELPER_H diff --git a/plugin/modulatedDeformConvPlugin/modulatedDeformConvPlugin.cpp b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPlugin.cpp index f2a735116..376923e14 100644 --- a/plugin/modulatedDeformConvPlugin/modulatedDeformConvPlugin.cpp +++ b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPlugin.cpp @@ -25,11 +25,10 @@ */ #include "modulatedDeformConvPlugin.h" -#include -#include +#include using namespace nvinfer1; -using namespace nvinfer1::pluginInternal; +using namespace nvinfer1::plugin; using nvinfer1::plugin::ModulatedDeformableConvPluginDynamic; using nvinfer1::plugin::ModulatedDeformableConvPluginDynamicCreator; @@ -37,25 +36,29 @@ void ModulatedDeformConvForwardCUDAKernelLauncherFloat(float const* input, float float const* offset, float const* mask, float* output, void* workspace, int32_t batch, int32_t channels, int32_t height, int32_t width, int32_t channelsOut, int32_t kernelW, int32_t kernelH, int32_t strideW, int32_t strideH, int32_t padW, int32_t padH, int32_t dilationW, int32_t dilationH, int32_t group, - int32_t deformableGroup, int32_t im2colStep, cublasHandle_t cublasHandle, cudaStream_t stream); + int32_t deformableGroup, int32_t im2colStep, nvinfer1::pluginInternal::cublasHandle_t cublasHandle, + cudaStream_t stream); void ModulatedDeformConvForwardCUDAKernelLauncherHalf(half const* input, half const* weight, half const* bias, half const* offset, half const* mask, half* output, void* workspace, int32_t batch, int32_t channels, int32_t height, int32_t width, int32_t channelsOut, int32_t kernelW, int32_t kernelH, int32_t strideW, int32_t strideH, int32_t padW, int32_t padH, int32_t dilationW, int32_t dilationH, int32_t group, - int32_t deformableGroup, int32_t im2colStep, cublasHandle_t cublasHandle, cudaStream_t stream); + int32_t deformableGroup, int32_t im2colStep, nvinfer1::pluginInternal::cublasHandle_t cublasHandle, + cudaStream_t stream); namespace { -static char const* PLUGIN_VERSION{"1"}; +static char const* PLUGIN_VERSION{"2"}; static char const* PLUGIN_NAME{"ModulatedDeformConv2d"}; } // namespace -nvinfer1::PluginFieldCollection ModulatedDeformableConvPluginDynamicCreator::mFC{}; -std::vector ModulatedDeformableConvPluginDynamicCreator::mPluginAttributes; +PluginFieldCollection ModulatedDeformableConvPluginDynamic::mFCToSerialize{}; +std::vector ModulatedDeformableConvPluginDynamic::mDataToSerialize{}; +PluginFieldCollection ModulatedDeformableConvPluginDynamicCreator::mFC{}; +std::vector ModulatedDeformableConvPluginDynamicCreator::mPluginAttributes{}; ModulatedDeformableConvPluginDynamic::ModulatedDeformableConvPluginDynamic(std::string const& name, - const nvinfer1::Dims stride, const nvinfer1::Dims padding, const nvinfer1::Dims dilation, + nvinfer1::Dims const stride, nvinfer1::Dims const padding, nvinfer1::Dims const dilation, int32_t const deformableGroup, int32_t const group) : mLayerName(name) , mStride(stride) @@ -63,31 +66,17 @@ ModulatedDeformableConvPluginDynamic::ModulatedDeformableConvPluginDynamic(std:: , mDilation(dilation) , mDeformableGroup(deformableGroup) , mGroup(group) + , mWithBias(0) { - mWithBias = false; -} - -ModulatedDeformableConvPluginDynamic::ModulatedDeformableConvPluginDynamic( - const std::string name, void const* data, size_t length) - : mLayerName(name) -{ - char const *d = reinterpret_cast(data), *a = d; - mStride = read(d); - mPadding = read(d); - mDilation = read(d); - mDeformableGroup = read(d); - mGroup = read(d); - PLUGIN_VALIDATE(d == a + length); - mWithBias = false; } ModulatedDeformableConvPluginDynamic::~ModulatedDeformableConvPluginDynamic() {} -nvinfer1::IPluginV2DynamicExt* ModulatedDeformableConvPluginDynamic::clone() const noexcept +nvinfer1::IPluginV3* ModulatedDeformableConvPluginDynamic::clone() noexcept { try { - ModulatedDeformableConvPluginDynamic* plugin = new ModulatedDeformableConvPluginDynamic( + auto* plugin = new ModulatedDeformableConvPluginDynamic( mLayerName, mStride, mPadding, mDilation, mDeformableGroup, mGroup); plugin->setPluginNamespace(getPluginNamespace()); return plugin; @@ -99,182 +88,251 @@ nvinfer1::IPluginV2DynamicExt* ModulatedDeformableConvPluginDynamic::clone() con return nullptr; } -nvinfer1::DimsExprs ModulatedDeformableConvPluginDynamic::getOutputDimensions(int32_t outputIndex, - nvinfer1::DimsExprs const* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept +IPluginCapability* ModulatedDeformableConvPluginDynamic::getCapabilityInterface(PluginCapabilityType type) noexcept { try { - nvinfer1::DimsExprs ret; - ret.nbDims = 4; - ret.d[0] = inputs[0].d[0]; - ret.d[1] = inputs[3].d[0]; - - ret.d[2] = inputs[1].d[2]; - ret.d[3] = inputs[1].d[3]; - return ret; + if (type == PluginCapabilityType::kBUILD) + { + return static_cast(this); + } + if (type == PluginCapabilityType::kRUNTIME) + { + return static_cast(this); + } + PLUGIN_ASSERT(type == PluginCapabilityType::kCORE); + return static_cast(this); } catch (std::exception const& e) { caughtError(e); } - return DimsExprs{}; + return nullptr; } -bool ModulatedDeformableConvPluginDynamic::supportsFormatCombination( - int32_t pos, nvinfer1::PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept +int32_t ModulatedDeformableConvPluginDynamic::getOutputShapes(nvinfer1::DimsExprs const* inputs, int32_t nbInputs, + nvinfer1::DimsExprs const* shapeInputs, int32_t nbShapeInputs, nvinfer1::DimsExprs* outputs, int32_t nbOutputs, + nvinfer1::IExprBuilder& exprBuilder) noexcept { - if (pos == 0) + try { - return ((inOut[pos].type == nvinfer1::DataType::kFLOAT || inOut[pos].type == nvinfer1::DataType::kHALF) && - inOut[pos].format == nvinfer1::TensorFormat::kLINEAR); + PLUGIN_VALIDATE(inputs != nullptr && outputs != nullptr); + PLUGIN_VALIDATE(nbOutputs == 1); + PLUGIN_VALIDATE(nbInputs == 4 || nbInputs == 5); // nbInputs depends on bias + + // Output shape is (N, C_out, H_out, W_out) + // N = N_in (inputs[0].d[0]) + // C_out = C_weight (inputs[3].d[0]) + // H_out = H_offset (inputs[1].d[2]) + // W_out = W_offset (inputs[1].d[3]) + outputs[0].nbDims = 4; + outputs[0].d[0] = inputs[0].d[0]; // Batch size + outputs[0].d[1] = inputs[3].d[0]; // Output channels from weight tensor + outputs[0].d[2] = inputs[1].d[2]; // Output height from offset tensor + outputs[0].d[3] = inputs[1].d[3]; // Output width from offset tensor + return STATUS_SUCCESS; } - else + catch (std::exception const& e) { - return inOut[pos].type == inOut[0].type && inOut[pos].format == inOut[0].format; + caughtError(e); } + return STATUS_FAILURE; } -void ModulatedDeformableConvPluginDynamic::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* inputs, - int32_t nbInputs, nvinfer1::DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) noexcept +bool ModulatedDeformableConvPluginDynamic::supportsFormatCombination( + int32_t pos, nvinfer1::DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept { try { - if (nbInputs == 5) + if (pos == 0) { - mWithBias = true; + // Input tensor must be FP32 or FP16 and linear format + return ((inOut[pos].desc.type == nvinfer1::DataType::kFLOAT + || inOut[pos].desc.type == nvinfer1::DataType::kHALF) + && inOut[pos].desc.format == nvinfer1::TensorFormat::kLINEAR); } + // All other tensors must have the same type and format as the input tensor + return inOut[pos].desc.type == inOut[0].desc.type && inOut[pos].desc.format == inOut[0].desc.format; + } + catch (std::exception const& e) + { + caughtError(e); + } + return false; +} + +int32_t ModulatedDeformableConvPluginDynamic::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* /* in */, + int32_t /* nbInputs */, nvinfer1::DynamicPluginTensorDesc const* /* out */, int32_t /* nbOutputs */) noexcept +{ + // Bias presence (mWithBias) is determined dynamically in onShapeChange based on nbInputs. + // No other configuration needed here. + return STATUS_SUCCESS; +} + +int32_t ModulatedDeformableConvPluginDynamic::onShapeChange(nvinfer1::PluginTensorDesc const* /* inputs */, + int32_t nbInputs, nvinfer1::PluginTensorDesc const* /* outputs */, int32_t /* nbOutputs */) noexcept +{ + try + { + // Determine if bias is present based on the number of inputs. + mWithBias = (nbInputs == 5); + // No specific shape-dependent updates needed for this plugin's internal state. + return STATUS_SUCCESS; } catch (std::exception const& e) { caughtError(e); } + return STATUS_FAILURE; } -size_t ModulatedDeformableConvPluginDynamic::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, - int32_t nbInputs, nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t ModulatedDeformableConvPluginDynamic::getWorkspaceSize(nvinfer1::DynamicPluginTensorDesc const* inputs, + int32_t /* nbInputs */, nvinfer1::DynamicPluginTensorDesc const* outputs, int32_t /* nbOutputs */) const noexcept { - int32_t sizeofDtype = nvinfer1::plugin::bert::getElementSize(outputs[0].type); + // Calculate workspace size needed for the im2col buffer. + int32_t const sizeOfDtype = nvinfer1::plugin::bert::getElementSize(outputs[0].desc.type); - int32_t nInputPlane = inputs[0].dims.d[1]; - int32_t outputHeight = outputs[0].dims.d[2]; - int32_t outputWidth = outputs[0].dims.d[3]; - int32_t kH = inputs[3].dims.d[2]; - int32_t kW = inputs[3].dims.d[3]; + int32_t const nInputPlane = inputs[0].desc.dims.d[1]; // Input channels + int32_t const outputHeight = outputs[0].desc.dims.d[2]; + int32_t const outputWidth = outputs[0].desc.dims.d[3]; + int32_t const kernelH = inputs[3].desc.dims.d[2]; // Weight kernel height + int32_t const kernelW = inputs[3].desc.dims.d[3]; // Weight kernel width - int64_t colSize = divUp(nInputPlane * kW * kH * outputHeight * outputWidth * sizeofDtype, 16) * 16; + // Calculate size needed for the intermediate 'columns' buffer used in im2col + GEMM approach. + int64_t const colSize + = divUp(static_cast(nInputPlane) * kernelW * kernelH * outputHeight * outputWidth * sizeOfDtype, 16) + * 16; // Align to 16 bytes - return colSize; + return static_cast(colSize); } -int32_t ModulatedDeformableConvPluginDynamic::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, - nvinfer1::PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, void* workSpace, +int32_t ModulatedDeformableConvPluginDynamic::enqueue(nvinfer1::PluginTensorDesc const* inputDescs, + nvinfer1::PluginTensorDesc const* outputDescs, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept { try { - PLUGIN_VALIDATE(inputDesc != nullptr && outputDesc != nullptr && inputs != nullptr && outputs != nullptr - && workSpace != nullptr); - - int32_t batch = inputDesc[0].dims.d[0]; - int32_t channels = inputDesc[0].dims.d[1]; - int32_t height = inputDesc[0].dims.d[2]; - int32_t width = inputDesc[0].dims.d[3]; - int32_t channelsOut = outputDesc[0].dims.d[1]; - int32_t kernelH = inputDesc[3].dims.d[2]; - int32_t kernelW = inputDesc[3].dims.d[3]; - - void const* x = inputs[0]; - void const* offset = inputs[1]; - void const* mask = inputs[2]; - void const* weight = inputs[3]; - void const* bias = mWithBias ? inputs[4] : nullptr; - void* output = outputs[0]; - int32_t im2colStep = std::min(batch, 32); - - auto data_type = inputDesc[0].type; - switch (data_type) + PLUGIN_VALIDATE(inputDescs != nullptr && outputDescs != nullptr && inputs != nullptr && outputs != nullptr + && workspace != nullptr); + + // Extract dimensions + int32_t const batch = inputDescs[0].dims.d[0]; + int32_t const channels = inputDescs[0].dims.d[1]; + int32_t const height = inputDescs[0].dims.d[2]; + int32_t const width = inputDescs[0].dims.d[3]; + int32_t const channelsOut = outputDescs[0].dims.d[1]; + int32_t const kernelH = inputDescs[3].dims.d[2]; // Weight kernel height + int32_t const kernelW = inputDescs[3].dims.d[3]; // Weight kernel width + + // Get input/output pointers + void const* inputTensor = inputs[0]; + void const* offsetTensor = inputs[1]; + void const* maskTensor = inputs[2]; + void const* weightTensor = inputs[3]; + void const* biasTensor = mWithBias ? inputs[4] : nullptr; + void* outputTensor = outputs[0]; + + // Determine im2col step size + int32_t const im2colStep = std::min(batch, 32); + + DataType const dataType = inputDescs[0].type; + switch (dataType) { case nvinfer1::DataType::kFLOAT: - ModulatedDeformConvForwardCUDAKernelLauncherFloat((float*) x, (float*) weight, (float*) bias, - (float*) offset, (float*) mask, (float*) output, workSpace, batch, channels, height, width, channelsOut, - kernelW, kernelH, mStride.d[0], mStride.d[1], mPadding.d[0], mPadding.d[1], mDilation.d[0], - mDilation.d[1], mGroup, mDeformableGroup, im2colStep, mCublasHandle, stream); + ModulatedDeformConvForwardCUDAKernelLauncherFloat(static_cast(inputTensor), + static_cast(weightTensor), static_cast(biasTensor), + static_cast(offsetTensor), static_cast(maskTensor), + static_cast(outputTensor), workspace, batch, channels, height, width, channelsOut, kernelW, + kernelH, mStride.d[0], mStride.d[1], mPadding.d[0], mPadding.d[1], mDilation.d[0], mDilation.d[1], + mGroup, mDeformableGroup, im2colStep, mCublasHandle, stream); break; case nvinfer1::DataType::kHALF: - ModulatedDeformConvForwardCUDAKernelLauncherHalf((half*) x, (half*) weight, (half*) bias, - (half*) offset, (half*) mask, (half*) output, workSpace, batch, channels, height, width, channelsOut, - kernelW, kernelH, mStride.d[0], mStride.d[1], mPadding.d[0], mPadding.d[1], mDilation.d[0], - mDilation.d[1], mGroup, mDeformableGroup, im2colStep, mCublasHandle, stream); - break; - default: return 1; + ModulatedDeformConvForwardCUDAKernelLauncherHalf(static_cast(inputTensor), + static_cast(weightTensor), static_cast(biasTensor), + static_cast(offsetTensor), static_cast(maskTensor), + static_cast(outputTensor), workspace, batch, channels, height, width, channelsOut, kernelW, + kernelH, mStride.d[0], mStride.d[1], mPadding.d[0], mPadding.d[1], mDilation.d[0], mDilation.d[1], + mGroup, mDeformableGroup, im2colStep, mCublasHandle, stream); + break; + default: + // Unsupported data type + return STATUS_FAILURE; } + return STATUS_SUCCESS; } catch (std::exception const& e) { caughtError(e); } - - return 0; -} - -nvinfer1::DataType ModulatedDeformableConvPluginDynamic::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept -{ - return inputTypes[0]; -} - -// IPluginV2 Methods -char const* ModulatedDeformableConvPluginDynamic::getPluginType() const noexcept -{ - return PLUGIN_NAME; + return STATUS_FAILURE; } -char const* ModulatedDeformableConvPluginDynamic::getPluginVersion() const noexcept +IPluginV3* ModulatedDeformableConvPluginDynamic::attachToContext(nvinfer1::IPluginResourceContext* context) noexcept { - return PLUGIN_VERSION; + try + { + auto* p = static_cast(clone()); + // The clone has shared ownership of the underlying cublasWrapper instance + // that is mapped to the current context. + p->setCublasResources(nvinfer1::pluginInternal::createPluginCublasWrapper(context)); + return p; + } + catch (std::exception const& e) + { + caughtError(e); + } + return nullptr; } -int32_t ModulatedDeformableConvPluginDynamic::getNbOutputs() const noexcept +void ModulatedDeformableConvPluginDynamic::setCublasResources( + std::shared_ptr cublasWrapper) { - return 1; + mCublasWrapper = cublasWrapper; + if (mCublasWrapper) + { + // The shared cublasWrapper resource owns the handle. + // `this` instance has a non-owning pointer to the handle. + // The cublasWrapper initializes the handle and checks for nullptr. + mCublasHandle = mCublasWrapper->getCublasHandle(); + } + // else: mCublasHandle remains nullptr, handle potential errors in enqueue } -int32_t ModulatedDeformableConvPluginDynamic::initialize() noexcept +int32_t ModulatedDeformableConvPluginDynamic::getOutputDataTypes(nvinfer1::DataType* outputTypes, int32_t nbOutputs, + nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept { - return 0; -} - -void ModulatedDeformableConvPluginDynamic::terminate() noexcept {} + try + { + PLUGIN_VALIDATE(outputTypes != nullptr && inputTypes != nullptr); + PLUGIN_VALIDATE(nbOutputs == 1); + PLUGIN_VALIDATE(nbInputs == 4 || nbInputs == 5); // Depends on bias -size_t ModulatedDeformableConvPluginDynamic::getSerializationSize() const noexcept -{ - return sizeof(mStride) + sizeof(mPadding) + sizeof(mDilation) + sizeof(mDeformableGroup) + sizeof(mGroup); + // Output type must match the input type + outputTypes[0] = inputTypes[0]; + return STATUS_SUCCESS; + } + catch (std::exception const& e) + { + caughtError(e); + } + return STATUS_FAILURE; } -void ModulatedDeformableConvPluginDynamic::serialize(void* buffer) const noexcept +char const* ModulatedDeformableConvPluginDynamic::getPluginName() const noexcept { - char* d = reinterpret_cast(buffer); - write(d, mStride); - write(d, mPadding); - write(d, mDilation); - write(d, mDeformableGroup); - write(d, mGroup); + return PLUGIN_NAME; } -void ModulatedDeformableConvPluginDynamic::destroy() noexcept +char const* ModulatedDeformableConvPluginDynamic::getPluginVersion() const noexcept { - // This gets called when the network containing plugin is destroyed - delete this; + return PLUGIN_VERSION; } -void ModulatedDeformableConvPluginDynamic::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, nvinfer1::IGpuAllocator* gpuAllocator) noexcept +void ModulatedDeformableConvPluginDynamic::setPluginNamespace(char const* pluginNamespace) noexcept { try { - mCublasWrapper = createPluginCublasWrapper(gpuAllocator); - mCublasHandle = mCublasWrapper->getCublasHandle(); - PLUGIN_VALIDATE(mCublasHandle); + mNamespace = (pluginNamespace == nullptr) ? "" : pluginNamespace; } catch (std::exception const& e) { @@ -282,35 +340,54 @@ void ModulatedDeformableConvPluginDynamic::attachToContext( } } -void ModulatedDeformableConvPluginDynamic::detachFromContext() noexcept {} +char const* ModulatedDeformableConvPluginDynamic::getPluginNamespace() const noexcept +{ + return mNamespace.c_str(); +} -void ModulatedDeformableConvPluginDynamic::setPluginNamespace(char const* libNamespace) noexcept +int32_t ModulatedDeformableConvPluginDynamic::getNbOutputs() const noexcept +{ + return 1; +} + +nvinfer1::PluginFieldCollection const* ModulatedDeformableConvPluginDynamic::getFieldsToSerialize() noexcept { try { - mNamespace = libNamespace; + mDataToSerialize.clear(); + // stride, padding, dilation are stored natively as int64 in memory + // even though the plugin exposes them as int32. + // Therefore, during build time, we upcast them to int64. + // During runtime, we serialize/deserialize them as int64. + // See ModulatedDeformableConvPluginDynamicCreator::createPlugin() on how we handle this. + mDataToSerialize.emplace_back("stride", mStride.d, PluginFieldType::kINT64, 2); + mDataToSerialize.emplace_back("padding", mPadding.d, PluginFieldType::kINT64, 2); + mDataToSerialize.emplace_back("dilation", mDilation.d, PluginFieldType::kINT64, 2); + mDataToSerialize.emplace_back("group", &mGroup, PluginFieldType::kINT32, 1); + mDataToSerialize.emplace_back("deformable_group", &mDeformableGroup, PluginFieldType::kINT32, 1); + + mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.fields = mDataToSerialize.data(); + return &mFCToSerialize; } catch (std::exception const& e) { caughtError(e); } -} - -char const* ModulatedDeformableConvPluginDynamic::getPluginNamespace() const noexcept -{ - return mNamespace.c_str(); + return nullptr; } ////////////////////// creator ///////////////////////////// ModulatedDeformableConvPluginDynamicCreator::ModulatedDeformableConvPluginDynamicCreator() { - mPluginAttributes.emplace_back(nvinfer1::PluginField("stride", nullptr, nvinfer1::PluginFieldType::kINT32, 2)); - mPluginAttributes.emplace_back(nvinfer1::PluginField("padding", nullptr, nvinfer1::PluginFieldType::kINT32, 2)); - mPluginAttributes.emplace_back(nvinfer1::PluginField("dilation", nullptr, nvinfer1::PluginFieldType::kINT32, 2)); - mPluginAttributes.emplace_back(nvinfer1::PluginField("group", nullptr, nvinfer1::PluginFieldType::kINT32, 1)); - mPluginAttributes.emplace_back( - nvinfer1::PluginField("deformable_group", nullptr, nvinfer1::PluginFieldType::kINT32, 1)); + mPluginAttributes.clear(); + mPluginAttributes.emplace_back(PluginField("stride", nullptr, PluginFieldType::kINT32, 2)); + mPluginAttributes.emplace_back(PluginField("padding", nullptr, PluginFieldType::kINT32, 2)); + mPluginAttributes.emplace_back(PluginField("dilation", nullptr, PluginFieldType::kINT32, 2)); + mPluginAttributes.emplace_back(PluginField("group", nullptr, PluginFieldType::kINT32, 1)); + mPluginAttributes.emplace_back(PluginField("deformable_group", nullptr, PluginFieldType::kINT32, 1)); + mFC.nbFields = mPluginAttributes.size(); mFC.fields = mPluginAttributes.data(); } @@ -330,90 +407,94 @@ nvinfer1::PluginFieldCollection const* ModulatedDeformableConvPluginDynamicCreat return &mFC; } -nvinfer1::IPluginV2* ModulatedDeformableConvPluginDynamicCreator::createPlugin( - char const* name, nvinfer1::PluginFieldCollection const* fc) noexcept +nvinfer1::IPluginV3* ModulatedDeformableConvPluginDynamicCreator::createPlugin( + char const* name, nvinfer1::PluginFieldCollection const* fc, nvinfer1::TensorRTPhase phase) noexcept { try { + PLUGIN_VALIDATE(fc != nullptr); + PLUGIN_VALIDATE(fc->fields != nullptr || fc->nbFields == 0); + nvinfer1::Dims stride{2, {1, 1}}; nvinfer1::Dims padding{2, {0, 0}}; nvinfer1::Dims dilation{2, {1, 1}}; int32_t deformableGroup = 1; int32_t group = 1; + plugin::validateRequiredAttributesExist({"deformable_group", "group", "stride", "padding", "dilation"}, fc); - for (int32_t i = 0; i < fc->nbFields; i++) + bool const isBuildPhase = (phase == nvinfer1::TensorRTPhase::kBUILD); + + for (int32_t i = 0; i < fc->nbFields; ++i) { - if (fc->fields[i].data == nullptr) + PluginField const& field = fc->fields[i]; + // Skip fields with null data pointer + if (field.data == nullptr) { continue; } - std::string field_name(fc->fields[i].name); - if (field_name.compare("deformable_group") == 0) + std::string const fieldName(field.name); + + if (fieldName == "deformable_group") { - PLUGIN_VALIDATE(fc->fields[i].type == PluginFieldType::kINT32); - deformableGroup = static_cast(fc->fields[i].data)[0]; + PLUGIN_VALIDATE(field.type == PluginFieldType::kINT32); + PLUGIN_VALIDATE(field.length == 1); + deformableGroup = *static_cast(field.data); PLUGIN_VALIDATE(deformableGroup > 0); } - - if (field_name.compare("group") == 0) + else if (fieldName == "group") { - PLUGIN_VALIDATE(fc->fields[i].type == PluginFieldType::kINT32); - group = static_cast(fc->fields[i].data)[0]; + PLUGIN_VALIDATE(field.type == PluginFieldType::kINT32); + PLUGIN_VALIDATE(field.length == 1); + group = *static_cast(field.data); PLUGIN_VALIDATE(group > 0); } - - if (field_name.compare("stride") == 0) + else if (bert::elem(fieldName, {"stride", "padding", "dilation"})) { - PLUGIN_VALIDATE(fc->fields[i].type == PluginFieldType::kINT32); - stride.nbDims = 2; - stride.d[0] = static_cast(fc->fields[i].data)[0]; - stride.d[1] = static_cast(fc->fields[i].data)[1]; - PLUGIN_VALIDATE(stride.d[0] > 0); - PLUGIN_VALIDATE(stride.d[1] > 0); - } - - if (field_name.compare("padding") == 0) - { - PLUGIN_VALIDATE(fc->fields[i].type == PluginFieldType::kINT32); - padding.nbDims = 2; - padding.d[0] = static_cast(fc->fields[i].data)[0]; - padding.d[1] = static_cast(fc->fields[i].data)[1]; - PLUGIN_VALIDATE(padding.d[0] >= 0); - PLUGIN_VALIDATE(padding.d[1] >= 0); - } - - if (field_name.compare("dilation") == 0) - { - PLUGIN_VALIDATE(fc->fields[i].type == PluginFieldType::kINT32); - dilation.nbDims = 2; - dilation.d[0] = static_cast(fc->fields[i].data)[0]; - dilation.d[1] = static_cast(fc->fields[i].data)[1]; - PLUGIN_VALIDATE(dilation.d[0] > 0); - PLUGIN_VALIDATE(dilation.d[1] > 0); + nvinfer1::Dims* dimsPtr + = (fieldName == "stride") ? &stride : ((fieldName == "padding") ? &padding : &dilation); + + PluginFieldType const expectedFieldType + = isBuildPhase ? PluginFieldType::kINT32 : PluginFieldType::kINT64; + PLUGIN_VALIDATE(field.type == expectedFieldType); + PLUGIN_VALIDATE(field.length == 2); + dimsPtr->nbDims = 2; + + // To stay consistent with this plugin's IO, we expose int32 stride, padding, dilation + // during build but store and serialize/deserialize as int64. + if (isBuildPhase) + { + // During build time, data is INT32, upcast to int64 for internal storage (Dims uses int64_t). + auto const* dataPtr = static_cast(field.data); + dimsPtr->d[0] = dataPtr[0]; + dimsPtr->d[1] = dataPtr[1]; + } + else // Runtime phase + { + // During runtime, data is deserialized as INT64. + PLUGIN_VALIDATE(phase == nvinfer1::TensorRTPhase::kRUNTIME); + auto const* dataPtr = static_cast(field.data); + dimsPtr->d[0] = dataPtr[0]; + dimsPtr->d[1] = dataPtr[1]; + } + + // Validate values + if (fieldName == "padding") + { + PLUGIN_VALIDATE(dimsPtr->d[0] >= 0 && dimsPtr->d[1] >= 0); + } + else // stride or dilation + { + // Stride and dilation must be positive + PLUGIN_VALIDATE(dimsPtr->d[0] > 0 && dimsPtr->d[1] > 0); + } } } - ModulatedDeformableConvPluginDynamic* plugin + auto* plugin = new ModulatedDeformableConvPluginDynamic(name, stride, padding, dilation, deformableGroup, group); - plugin->setPluginNamespace(getPluginNamespace()); - return plugin; - } - catch (std::exception const& e) - { - caughtError(e); - } - return nullptr; -} - -nvinfer1::IPluginV2* ModulatedDeformableConvPluginDynamicCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept -{ - try - { - auto plugin = new ModulatedDeformableConvPluginDynamic(name, serialData, serialLength); - plugin->setPluginNamespace(getPluginNamespace()); + plugin->setPluginNamespace(mNamespace.c_str()); return plugin; } catch (std::exception const& e) @@ -427,7 +508,7 @@ void ModulatedDeformableConvPluginDynamicCreator::setPluginNamespace(char const* { try { - mNamespace = libNamespace; + mNamespace = (libNamespace == nullptr) ? "" : libNamespace; } catch (std::exception const& e) { diff --git a/plugin/modulatedDeformConvPlugin/modulatedDeformConvPlugin.h b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPlugin.h index afb794227..b1a71f606 100644 --- a/plugin/modulatedDeformConvPlugin/modulatedDeformConvPlugin.h +++ b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -26,16 +26,19 @@ #ifndef TRT_MODULATED_DEFORM_CONV_PLUGIN_H #define TRT_MODULATED_DEFORM_CONV_PLUGIN_H -#include +#include +#include #include #include #include #include "common/bertCommon.h" #include "common/checkMacrosPlugin.h" +#include "common/cublasWrapper.h" #include "common/plugin.h" #include "common/serialize.hpp" + #include "modulatedDeformConvCudaHelper.h" namespace nvinfer1 @@ -43,50 +46,58 @@ namespace nvinfer1 namespace plugin { -class ModulatedDeformableConvPluginDynamic : public nvinfer1::IPluginV2DynamicExt +class ModulatedDeformableConvPluginDynamic final : public nvinfer1::IPluginV3, + public nvinfer1::IPluginV3OneCore, + public nvinfer1::IPluginV3OneBuild, + public nvinfer1::IPluginV3OneRuntime { public: - ModulatedDeformableConvPluginDynamic(std::string const& name, const nvinfer1::Dims stride, - const nvinfer1::Dims padding, const nvinfer1::Dims dilation, int32_t const deformableGroup, + ModulatedDeformableConvPluginDynamic(std::string const& name, nvinfer1::Dims const stride, + nvinfer1::Dims const padding, nvinfer1::Dims const dilation, int32_t const deformableGroup, int32_t const group); - ModulatedDeformableConvPluginDynamic(const std::string name, void const* data, size_t length); - ModulatedDeformableConvPluginDynamic() = delete; ~ModulatedDeformableConvPluginDynamic() override; - nvinfer1::IPluginV2DynamicExt* clone() const noexcept override; - nvinfer1::DimsExprs getOutputDimensions(int32_t outputIndex, nvinfer1::DimsExprs const* inputs, int32_t nbInputs, - nvinfer1::IExprBuilder& exprBuilder) noexcept override; - bool supportsFormatCombination( - int32_t pos, nvinfer1::PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override; - void configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t nbInputs, - nvinfer1::DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept override; - size_t getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, - nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept override; - int32_t enqueue(nvinfer1::PluginTensorDesc const* inputDesc, nvinfer1::PluginTensorDesc const* outputDesc, - void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept override; - void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, - nvinfer1::IGpuAllocator* gpuAllocator) noexcept override; - void detachFromContext() noexcept override; - - nvinfer1::DataType getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept override; - - char const* getPluginType() const noexcept override; + // --- IPluginV3 methods --- + nvinfer1::IPluginV3* clone() noexcept override; + char const* getPluginName() const noexcept override; char const* getPluginVersion() const noexcept override; + nvinfer1::IPluginCapability* getCapabilityInterface(nvinfer1::PluginCapabilityType type) noexcept override; + nvinfer1::PluginFieldCollection const* getFieldsToSerialize() noexcept override; + + // --- IPluginV3OneCore methods --- int32_t getNbOutputs() const noexcept override; - int32_t initialize() noexcept override; - void terminate() noexcept override; - size_t getSerializationSize() const noexcept override; - void serialize(void* buffer) const noexcept override; - void destroy() noexcept override; - void setPluginNamespace(char const* pluginNamespace) noexcept override; char const* getPluginNamespace() const noexcept override; + void setPluginNamespace(char const* pluginNamespace) noexcept; + + // --- IPluginV3OneBuild methods --- + bool supportsFormatCombination(int32_t pos, nvinfer1::DynamicPluginTensorDesc const* inOut, int32_t nbInputs, + int32_t nbOutputs) noexcept override; + int32_t configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t nbInputs, + nvinfer1::DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept override; + size_t getWorkspaceSize(nvinfer1::DynamicPluginTensorDesc const* inputs, int32_t nbInputs, + nvinfer1::DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept override; + int32_t getOutputDataTypes(nvinfer1::DataType* outputTypes, int32_t nbOutputs, nvinfer1::DataType const* inputTypes, + int32_t nbInputs) const noexcept override; + int32_t getOutputShapes(nvinfer1::DimsExprs const* inputs, int32_t nbInputs, nvinfer1::DimsExprs const* shapeInputs, + int32_t nbShapeInputs, nvinfer1::DimsExprs* outputs, int32_t nbOutputs, + nvinfer1::IExprBuilder& exprBuilder) noexcept override; + + // --- IPluginV3OneRuntime methods --- + int32_t enqueue(nvinfer1::PluginTensorDesc const* inputDescs, nvinfer1::PluginTensorDesc const* outputDescs, + void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept override; + IPluginV3* attachToContext(nvinfer1::IPluginResourceContext* context) noexcept override; + int32_t onShapeChange(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, + nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) noexcept override; + +private: + // Helper method to manage cuBLAS resources + void setCublasResources(std::shared_ptr cublasWrapper); private: - const std::string mLayerName; + std::string const mLayerName; std::string mNamespace; nvinfer1::Dims mStride; @@ -94,28 +105,30 @@ class ModulatedDeformableConvPluginDynamic : public nvinfer1::IPluginV2DynamicEx nvinfer1::Dims mDilation; int32_t mDeformableGroup; int32_t mGroup; - bool mWithBias; + int32_t mWithBias; nvinfer1::pluginInternal::cublasHandle_t mCublasHandle{nullptr}; // the wrapper pointer is shared among all plugins attached to the same context. std::shared_ptr mCublasWrapper; + + static nvinfer1::PluginFieldCollection mFCToSerialize; + static std::vector mDataToSerialize; }; -class ModulatedDeformableConvPluginDynamicCreator : public nvinfer1::IPluginCreator +class ModulatedDeformableConvPluginDynamicCreator final : public nvinfer1::IPluginCreatorV3One { public: ModulatedDeformableConvPluginDynamicCreator(); + ~ModulatedDeformableConvPluginDynamicCreator() override = default; char const* getPluginName() const noexcept override; char const* getPluginVersion() const noexcept override; nvinfer1::PluginFieldCollection const* getFieldNames() noexcept override; - nvinfer1::IPluginV2* createPlugin(char const* name, nvinfer1::PluginFieldCollection const* fc) noexcept override; - - nvinfer1::IPluginV2* deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept override; + nvinfer1::IPluginV3* createPlugin( + char const* name, nvinfer1::PluginFieldCollection const* fc, nvinfer1::TensorRTPhase phase) noexcept override; - void setPluginNamespace(char const* pluginNamespace) noexcept override; + void setPluginNamespace(char const* pluginNamespace) noexcept; char const* getPluginNamespace() const noexcept override; private: diff --git a/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginKernel.cu b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginKernel.cu index cd769bc70..5fd92a3e6 100644 --- a/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginKernel.cu +++ b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginKernel.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -264,7 +264,7 @@ cudaError_t ModulatedDeformConvForwardCUDAKernelLauncher(TScalar const* input, T TScalar* colStart = columns + g * colGStep; TScalar* outBufferStart = output + b * outStep + g * outGroupStep; - cublasGemmWrap(cublasHandle, CUBLAS_OP_N, CUBLAS_OP_N, n, m, k, &alpha, colStart, n, weightStart, + cublasGemmWrap(cublasHandle, stream, CUBLAS_OP_N, CUBLAS_OP_N, n, m, k, &alpha, colStart, n, weightStart, k, &beta, outBufferStart, n); PLUGIN_CHECK_CUDA(cudaPeekAtLastError()); diff --git a/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.cpp b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.cpp new file mode 100644 index 000000000..7a23637a1 --- /dev/null +++ b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.cpp @@ -0,0 +1,441 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + ************************************************************************** + * Modified from mmcv (https://github.com/open-mmlab/mmcv/tree/master/mmcv) + * Copyright (c) OpenMMLab. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 [see LICENSE for details] + * https://github.com/open-mmlab/mmcv/blob/master/LICENSE + ************************************************************************** + */ + +#include "modulatedDeformConvPluginLegacy.h" +#include +#include + +using namespace nvinfer1; +using namespace nvinfer1::pluginInternal; +using nvinfer1::plugin::ModulatedDeformableConvPluginDynamicLegacy; +using nvinfer1::plugin::ModulatedDeformableConvPluginDynamicLegacyCreator; + +void ModulatedDeformConvForwardCUDAKernelLauncherFloat(float const* input, float const* weight, float const* bias, + float const* offset, float const* mask, float* output, void* workspace, int32_t batch, int32_t channels, + int32_t height, int32_t width, int32_t channelsOut, int32_t kernelW, int32_t kernelH, int32_t strideW, + int32_t strideH, int32_t padW, int32_t padH, int32_t dilationW, int32_t dilationH, int32_t group, + int32_t deformableGroup, int32_t im2colStep, cublasHandle_t cublasHandle, cudaStream_t stream); + +void ModulatedDeformConvForwardCUDAKernelLauncherHalf(half const* input, half const* weight, half const* bias, + half const* offset, half const* mask, half* output, void* workspace, int32_t batch, int32_t channels, + int32_t height, int32_t width, int32_t channelsOut, int32_t kernelW, int32_t kernelH, int32_t strideW, + int32_t strideH, int32_t padW, int32_t padH, int32_t dilationW, int32_t dilationH, int32_t group, + int32_t deformableGroup, int32_t im2colStep, cublasHandle_t cublasHandle, cudaStream_t stream); + +namespace +{ +static char const* PLUGIN_VERSION{"1"}; +static char const* PLUGIN_NAME{"ModulatedDeformConv2d"}; +} // namespace + +nvinfer1::PluginFieldCollection ModulatedDeformableConvPluginDynamicLegacyCreator::mFC{}; +std::vector ModulatedDeformableConvPluginDynamicLegacyCreator::mPluginAttributes; + +ModulatedDeformableConvPluginDynamicLegacy::ModulatedDeformableConvPluginDynamicLegacy(std::string const& name, + nvinfer1::Dims const stride, nvinfer1::Dims const padding, nvinfer1::Dims const dilation, + int32_t const deformableGroup, int32_t const group) + : mLayerName(name) + , mStride(stride) + , mPadding(padding) + , mDilation(dilation) + , mDeformableGroup(deformableGroup) + , mGroup(group) +{ + mWithBias = false; +} + +ModulatedDeformableConvPluginDynamicLegacy::ModulatedDeformableConvPluginDynamicLegacy( + std::string const name, void const* data, size_t length) + : mLayerName(name) +{ + char const *d = reinterpret_cast(data), *a = d; + mStride = read(d); + mPadding = read(d); + mDilation = read(d); + mDeformableGroup = read(d); + mGroup = read(d); + PLUGIN_VALIDATE(d == a + length); + mWithBias = false; +} + +ModulatedDeformableConvPluginDynamicLegacy::~ModulatedDeformableConvPluginDynamicLegacy() {} + +nvinfer1::IPluginV2DynamicExt* ModulatedDeformableConvPluginDynamicLegacy::clone() const noexcept +{ + try + { + ModulatedDeformableConvPluginDynamicLegacy* plugin = new ModulatedDeformableConvPluginDynamicLegacy( + mLayerName, mStride, mPadding, mDilation, mDeformableGroup, mGroup); + plugin->setPluginNamespace(getPluginNamespace()); + return plugin; + } + catch (std::exception const& e) + { + caughtError(e); + } + return nullptr; +} + +nvinfer1::DimsExprs ModulatedDeformableConvPluginDynamicLegacy::getOutputDimensions(int32_t outputIndex, + nvinfer1::DimsExprs const* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept +{ + try + { + nvinfer1::DimsExprs ret; + ret.nbDims = 4; + ret.d[0] = inputs[0].d[0]; + ret.d[1] = inputs[3].d[0]; + + ret.d[2] = inputs[1].d[2]; + ret.d[3] = inputs[1].d[3]; + return ret; + } + catch (std::exception const& e) + { + caughtError(e); + } + return DimsExprs{}; +} + +bool ModulatedDeformableConvPluginDynamicLegacy::supportsFormatCombination( + int32_t pos, nvinfer1::PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept +{ + if (pos == 0) + { + return ((inOut[pos].type == nvinfer1::DataType::kFLOAT || inOut[pos].type == nvinfer1::DataType::kHALF) + && inOut[pos].format == nvinfer1::TensorFormat::kLINEAR); + } + else + { + return inOut[pos].type == inOut[0].type && inOut[pos].format == inOut[0].format; + } +} + +void ModulatedDeformableConvPluginDynamicLegacy::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* inputs, + int32_t nbInputs, nvinfer1::DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) noexcept +{ + try + { + if (nbInputs == 5) + { + mWithBias = true; + } + } + catch (std::exception const& e) + { + caughtError(e); + } +} + +size_t ModulatedDeformableConvPluginDynamicLegacy::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, + int32_t nbInputs, nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +{ + int32_t sizeofDtype = nvinfer1::plugin::bert::getElementSize(outputs[0].type); + + int32_t nInputPlane = inputs[0].dims.d[1]; + int32_t outputHeight = outputs[0].dims.d[2]; + int32_t outputWidth = outputs[0].dims.d[3]; + int32_t kH = inputs[3].dims.d[2]; + int32_t kW = inputs[3].dims.d[3]; + + int64_t colSize = divUp(nInputPlane * kW * kH * outputHeight * outputWidth * sizeofDtype, 16) * 16; + + return colSize; +} + +int32_t ModulatedDeformableConvPluginDynamicLegacy::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, + nvinfer1::PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, void* workSpace, + cudaStream_t stream) noexcept +{ + try + { + PLUGIN_VALIDATE(inputDesc != nullptr && outputDesc != nullptr && inputs != nullptr && outputs != nullptr + && workSpace != nullptr); + + int32_t batch = inputDesc[0].dims.d[0]; + int32_t channels = inputDesc[0].dims.d[1]; + int32_t height = inputDesc[0].dims.d[2]; + int32_t width = inputDesc[0].dims.d[3]; + int32_t channelsOut = outputDesc[0].dims.d[1]; + int32_t kernelH = inputDesc[3].dims.d[2]; + int32_t kernelW = inputDesc[3].dims.d[3]; + + void const* x = inputs[0]; + void const* offset = inputs[1]; + void const* mask = inputs[2]; + void const* weight = inputs[3]; + void const* bias = mWithBias ? inputs[4] : nullptr; + void* output = outputs[0]; + int32_t im2colStep = std::min(batch, 32); + + auto data_type = inputDesc[0].type; + switch (data_type) + { + case nvinfer1::DataType::kFLOAT: + ModulatedDeformConvForwardCUDAKernelLauncherFloat((float*) x, (float*) weight, (float*) bias, + (float*) offset, (float*) mask, (float*) output, workSpace, batch, channels, height, width, channelsOut, + kernelW, kernelH, mStride.d[0], mStride.d[1], mPadding.d[0], mPadding.d[1], mDilation.d[0], + mDilation.d[1], mGroup, mDeformableGroup, im2colStep, mCublasHandle, stream); + break; + case nvinfer1::DataType::kHALF: + ModulatedDeformConvForwardCUDAKernelLauncherHalf((half*) x, (half*) weight, (half*) bias, (half*) offset, + (half*) mask, (half*) output, workSpace, batch, channels, height, width, channelsOut, kernelW, kernelH, + mStride.d[0], mStride.d[1], mPadding.d[0], mPadding.d[1], mDilation.d[0], mDilation.d[1], mGroup, + mDeformableGroup, im2colStep, mCublasHandle, stream); + break; + default: return 1; + } + } + catch (std::exception const& e) + { + caughtError(e); + } + + return 0; +} + +nvinfer1::DataType ModulatedDeformableConvPluginDynamicLegacy::getOutputDataType( + int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept +{ + return inputTypes[0]; +} + +// IPluginV2 Methods +char const* ModulatedDeformableConvPluginDynamicLegacy::getPluginType() const noexcept +{ + return PLUGIN_NAME; +} + +char const* ModulatedDeformableConvPluginDynamicLegacy::getPluginVersion() const noexcept +{ + return PLUGIN_VERSION; +} + +int32_t ModulatedDeformableConvPluginDynamicLegacy::getNbOutputs() const noexcept +{ + return 1; +} + +int32_t ModulatedDeformableConvPluginDynamicLegacy::initialize() noexcept +{ + return 0; +} + +void ModulatedDeformableConvPluginDynamicLegacy::terminate() noexcept {} + +size_t ModulatedDeformableConvPluginDynamicLegacy::getSerializationSize() const noexcept +{ + return sizeof(mStride) + sizeof(mPadding) + sizeof(mDilation) + sizeof(mDeformableGroup) + sizeof(mGroup); +} + +void ModulatedDeformableConvPluginDynamicLegacy::serialize(void* buffer) const noexcept +{ + char* d = reinterpret_cast(buffer); + write(d, mStride); + write(d, mPadding); + write(d, mDilation); + write(d, mDeformableGroup); + write(d, mGroup); +} + +void ModulatedDeformableConvPluginDynamicLegacy::destroy() noexcept +{ + // This gets called when the network containing plugin is destroyed + delete this; +} + +void ModulatedDeformableConvPluginDynamicLegacy::attachToContext( + cudnnContext* cudnnContext, cublasContext* cublasContext, nvinfer1::IGpuAllocator* gpuAllocator) noexcept +{ + try + { + mCublasWrapper = createPluginCublasWrapper(gpuAllocator); + mCublasHandle = mCublasWrapper->getCublasHandle(); + PLUGIN_VALIDATE(mCublasHandle); + } + catch (std::exception const& e) + { + caughtError(e); + } +} + +void ModulatedDeformableConvPluginDynamicLegacy::detachFromContext() noexcept {} + +void ModulatedDeformableConvPluginDynamicLegacy::setPluginNamespace(char const* libNamespace) noexcept +{ + try + { + mNamespace = libNamespace; + } + catch (std::exception const& e) + { + caughtError(e); + } +} + +char const* ModulatedDeformableConvPluginDynamicLegacy::getPluginNamespace() const noexcept +{ + return mNamespace.c_str(); +} + +////////////////////// creator ///////////////////////////// + +ModulatedDeformableConvPluginDynamicLegacyCreator::ModulatedDeformableConvPluginDynamicLegacyCreator() +{ + mPluginAttributes.emplace_back(nvinfer1::PluginField("stride", nullptr, nvinfer1::PluginFieldType::kINT32, 2)); + mPluginAttributes.emplace_back(nvinfer1::PluginField("padding", nullptr, nvinfer1::PluginFieldType::kINT32, 2)); + mPluginAttributes.emplace_back(nvinfer1::PluginField("dilation", nullptr, nvinfer1::PluginFieldType::kINT32, 2)); + mPluginAttributes.emplace_back(nvinfer1::PluginField("group", nullptr, nvinfer1::PluginFieldType::kINT32, 1)); + mPluginAttributes.emplace_back( + nvinfer1::PluginField("deformable_group", nullptr, nvinfer1::PluginFieldType::kINT32, 1)); + mFC.nbFields = mPluginAttributes.size(); + mFC.fields = mPluginAttributes.data(); +} + +char const* ModulatedDeformableConvPluginDynamicLegacyCreator::getPluginName() const noexcept +{ + return PLUGIN_NAME; +} + +char const* ModulatedDeformableConvPluginDynamicLegacyCreator::getPluginVersion() const noexcept +{ + return PLUGIN_VERSION; +} + +nvinfer1::PluginFieldCollection const* ModulatedDeformableConvPluginDynamicLegacyCreator::getFieldNames() noexcept +{ + return &mFC; +} + +nvinfer1::IPluginV2* ModulatedDeformableConvPluginDynamicLegacyCreator::createPlugin( + char const* name, nvinfer1::PluginFieldCollection const* fc) noexcept +{ + try + { + nvinfer1::Dims stride{2, {1, 1}}; + nvinfer1::Dims padding{2, {0, 0}}; + nvinfer1::Dims dilation{2, {1, 1}}; + int32_t deformableGroup = 1; + int32_t group = 1; + plugin::validateRequiredAttributesExist({"deformable_group", "group", "stride", "padding", "dilation"}, fc); + + for (int32_t i = 0; i < fc->nbFields; i++) + { + if (fc->fields[i].data == nullptr) + { + continue; + } + std::string field_name(fc->fields[i].name); + + if (field_name.compare("deformable_group") == 0) + { + PLUGIN_VALIDATE(fc->fields[i].type == PluginFieldType::kINT32); + deformableGroup = static_cast(fc->fields[i].data)[0]; + PLUGIN_VALIDATE(deformableGroup > 0); + } + + if (field_name.compare("group") == 0) + { + PLUGIN_VALIDATE(fc->fields[i].type == PluginFieldType::kINT32); + group = static_cast(fc->fields[i].data)[0]; + PLUGIN_VALIDATE(group > 0); + } + + if (field_name.compare("stride") == 0) + { + PLUGIN_VALIDATE(fc->fields[i].type == PluginFieldType::kINT32); + stride.nbDims = 2; + stride.d[0] = static_cast(fc->fields[i].data)[0]; + stride.d[1] = static_cast(fc->fields[i].data)[1]; + PLUGIN_VALIDATE(stride.d[0] > 0); + PLUGIN_VALIDATE(stride.d[1] > 0); + } + + if (field_name.compare("padding") == 0) + { + PLUGIN_VALIDATE(fc->fields[i].type == PluginFieldType::kINT32); + padding.nbDims = 2; + padding.d[0] = static_cast(fc->fields[i].data)[0]; + padding.d[1] = static_cast(fc->fields[i].data)[1]; + PLUGIN_VALIDATE(padding.d[0] >= 0); + PLUGIN_VALIDATE(padding.d[1] >= 0); + } + + if (field_name.compare("dilation") == 0) + { + PLUGIN_VALIDATE(fc->fields[i].type == PluginFieldType::kINT32); + dilation.nbDims = 2; + dilation.d[0] = static_cast(fc->fields[i].data)[0]; + dilation.d[1] = static_cast(fc->fields[i].data)[1]; + PLUGIN_VALIDATE(dilation.d[0] > 0); + PLUGIN_VALIDATE(dilation.d[1] > 0); + } + } + + ModulatedDeformableConvPluginDynamicLegacy* plugin + = new ModulatedDeformableConvPluginDynamicLegacy(name, stride, padding, dilation, deformableGroup, group); + plugin->setPluginNamespace(getPluginNamespace()); + return plugin; + } + catch (std::exception const& e) + { + caughtError(e); + } + return nullptr; +} + +nvinfer1::IPluginV2* ModulatedDeformableConvPluginDynamicLegacyCreator::deserializePlugin( + char const* name, void const* serialData, size_t serialLength) noexcept +{ + try + { + auto plugin = new ModulatedDeformableConvPluginDynamicLegacy(name, serialData, serialLength); + plugin->setPluginNamespace(getPluginNamespace()); + return plugin; + } + catch (std::exception const& e) + { + caughtError(e); + } + return nullptr; +} + +void ModulatedDeformableConvPluginDynamicLegacyCreator::setPluginNamespace(char const* libNamespace) noexcept +{ + try + { + mNamespace = libNamespace; + } + catch (std::exception const& e) + { + caughtError(e); + } +} + +char const* ModulatedDeformableConvPluginDynamicLegacyCreator::getPluginNamespace() const noexcept +{ + return mNamespace.c_str(); +} diff --git a/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.h b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.h new file mode 100644 index 000000000..fb73f846e --- /dev/null +++ b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.h @@ -0,0 +1,130 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + ************************************************************************** + * Modified from mmcv (https://github.com/open-mmlab/mmcv/tree/master/mmcv) + * Copyright (c) OpenMMLab. All Rights Reserved. + * Licensed under the Apache License, Version 2.0 [see LICENSE for details] + * https://github.com/open-mmlab/mmcv/blob/master/LICENSE + ************************************************************************** + */ + +#ifndef TRT_MODULATED_DEFORM_CONV_PLUGIN_LEGACY_H +#define TRT_MODULATED_DEFORM_CONV_PLUGIN_LEGACY_H +#include + +#include +#include +#include + +#include "common/bertCommon.h" +#include "common/checkMacrosPlugin.h" +#include "common/plugin.h" +#include "common/serialize.hpp" +#include "modulatedDeformConvCudaHelper.h" + +namespace nvinfer1 +{ +namespace plugin +{ + +class ModulatedDeformableConvPluginDynamicLegacy : public nvinfer1::IPluginV2DynamicExt +{ +public: + ModulatedDeformableConvPluginDynamicLegacy(std::string const& name, nvinfer1::Dims const stride, + nvinfer1::Dims const padding, nvinfer1::Dims const dilation, int32_t const deformableGroup, + int32_t const group); + + ModulatedDeformableConvPluginDynamicLegacy(std::string const name, void const* data, size_t length); + + ModulatedDeformableConvPluginDynamicLegacy() = delete; + + ~ModulatedDeformableConvPluginDynamicLegacy() override; + + nvinfer1::IPluginV2DynamicExt* clone() const noexcept override; + nvinfer1::DimsExprs getOutputDimensions(int32_t outputIndex, nvinfer1::DimsExprs const* inputs, int32_t nbInputs, + nvinfer1::IExprBuilder& exprBuilder) noexcept override; + bool supportsFormatCombination( + int32_t pos, nvinfer1::PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override; + void configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t nbInputs, + nvinfer1::DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept override; + size_t getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, + nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept override; + int32_t enqueue(nvinfer1::PluginTensorDesc const* inputDesc, nvinfer1::PluginTensorDesc const* outputDesc, + void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept override; + void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, + nvinfer1::IGpuAllocator* gpuAllocator) noexcept override; + void detachFromContext() noexcept override; + + nvinfer1::DataType getOutputDataType( + int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept override; + + char const* getPluginType() const noexcept override; + char const* getPluginVersion() const noexcept override; + int32_t getNbOutputs() const noexcept override; + int32_t initialize() noexcept override; + void terminate() noexcept override; + size_t getSerializationSize() const noexcept override; + void serialize(void* buffer) const noexcept override; + void destroy() noexcept override; + void setPluginNamespace(char const* pluginNamespace) noexcept override; + char const* getPluginNamespace() const noexcept override; + +private: + std::string const mLayerName; + std::string mNamespace; + + nvinfer1::Dims mStride; + nvinfer1::Dims mPadding; + nvinfer1::Dims mDilation; + int32_t mDeformableGroup; + int32_t mGroup; + bool mWithBias; + + nvinfer1::pluginInternal::cublasHandle_t mCublasHandle{nullptr}; + // the wrapper pointer is shared among all plugins attached to the same context. + std::shared_ptr mCublasWrapper; +}; + +class ModulatedDeformableConvPluginDynamicLegacyCreator : public nvinfer1::IPluginCreator +{ +public: + ModulatedDeformableConvPluginDynamicLegacyCreator(); + + char const* getPluginName() const noexcept override; + char const* getPluginVersion() const noexcept override; + nvinfer1::PluginFieldCollection const* getFieldNames() noexcept override; + + nvinfer1::IPluginV2* createPlugin(char const* name, nvinfer1::PluginFieldCollection const* fc) noexcept override; + + nvinfer1::IPluginV2* deserializePlugin( + char const* name, void const* serialData, size_t serialLength) noexcept override; + + void setPluginNamespace(char const* pluginNamespace) noexcept override; + char const* getPluginNamespace() const noexcept override; + +private: + static nvinfer1::PluginFieldCollection mFC; + static std::vector mPluginAttributes; + std::string mNamespace; +}; + +} // namespace plugin +} // namespace nvinfer1 + +#endif // TRT_MODULATED_DEFORM_CONV_PLUGIN_LEGACY_H diff --git a/plugin/multiscaleDeformableAttnPlugin/CMakeLists.txt b/plugin/multiscaleDeformableAttnPlugin/CMakeLists.txt index f7fc5228a..a1e0fe686 100644 --- a/plugin/multiscaleDeformableAttnPlugin/CMakeLists.txt +++ b/plugin/multiscaleDeformableAttnPlugin/CMakeLists.txt @@ -20,5 +20,7 @@ add_plugin_source( multiscaleDeformableAttn.h multiscaleDeformableAttnPlugin.cpp multiscaleDeformableAttnPlugin.h + multiscaleDeformableAttnPluginLegacy.cpp + multiscaleDeformableAttnPluginLegacy.h multiscaleDeformableIm2ColCuda.cuh ) diff --git a/plugin/multiscaleDeformableAttnPlugin/README.md b/plugin/multiscaleDeformableAttnPlugin/README.md index 4affdcca1..223bfb929 100644 --- a/plugin/multiscaleDeformableAttnPlugin/README.md +++ b/plugin/multiscaleDeformableAttnPlugin/README.md @@ -11,13 +11,13 @@ ## Description -The `multiscaleDeformableAttnPlugin` is used to perform attention computation over a small set of key sampling points around a reference point rather than looking over all possible spatial locations. It makes use of multiscale feature maps to effectively represent objects at different scales. It helps to achieve faster convergence and better performance on small objects. +The `multiscaleDeformableAttnPlugin` is used to perform attention computation over a small set of key sampling points around a reference point rather than looking over all possible spatial locations. It makes use of multiscale feature maps to effectively represent objects at different scales. It helps to achieve faster convergence and better performance on small objects. ### Structure The `multiscaleDeformableAttnPlugin` takes 5 inputs in the following order : `value`, `spatial_shapes`, `level_start_index`, `sampling_locations`, and `atttention_weights`. -`value` +`value` The input feature maps from different scales concatenated to provide the input feature vector. The shape of this tensor is `[N, S, M, D]` where `N` is batch size, `S` is the length of the feature maps, `M` is the number of attentions heads, `D` is hidden_dim/num_heads. `spatial_shapes` @@ -53,11 +53,15 @@ The following resources provide a deeper understanding of the `multiscaleDeforma For terms and conditions for use, reproduction, and distribution, see the [TensorRT Software License Agreement](https://docs.nvidia.com/deeplearning/sdk/tensorrt-sla/index.html) documentation. -## Changelog +## Changelog -Feb 2022 +Apr 2025 +Added version 2 of the plugin that uses the IPluginV3 interface. The version 1 (using IPluginV2DynamicExt interface) is now deprecated. The version 2 mirrors version 1 in IO and attributes. + +Feb 2022 This is the first release of this `README.md` file. -## Known issues + +## Known issues There are no known issues in this plugin. diff --git a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.cpp b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.cpp index 59940ce33..80182b3d5 100644 --- a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.cpp +++ b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.cpp @@ -19,28 +19,33 @@ #include "multiscaleDeformableAttn.h" using namespace nvinfer1; -using namespace plugin; - -namespace nvinfer1::plugin -{ +using namespace nvinfer1::plugin; namespace { -static char const* DMHA_VERSION{"1"}; +static char const* DMHA_VERSION{"2"}; static char const* DMHA_NAME{"MultiscaleDeformableAttnPlugin_TRT"}; } // namespace -MultiscaleDeformableAttnPlugin::MultiscaleDeformableAttnPlugin() {} +namespace nvinfer1::plugin +{ -MultiscaleDeformableAttnPlugin::MultiscaleDeformableAttnPlugin(void const* data, size_t length) {} +MultiscaleDeformableAttnPlugin::MultiscaleDeformableAttnPlugin() {} -nvinfer1::IPluginV2DynamicExt* MultiscaleDeformableAttnPlugin::clone() const PLUGIN_NOEXCEPT +IPluginCapability* MultiscaleDeformableAttnPlugin::getCapabilityInterface(PluginCapabilityType type) noexcept { try { - MultiscaleDeformableAttnPlugin* plugin = new MultiscaleDeformableAttnPlugin(); - plugin->setPluginNamespace(getPluginNamespace()); - return plugin; + if (type == PluginCapabilityType::kBUILD) + { + return static_cast(this); + } + if (type == PluginCapabilityType::kRUNTIME) + { + return static_cast(this); + } + PLUGIN_ASSERT(type == PluginCapabilityType::kCORE); + return static_cast(this); } catch (std::exception const& e) { @@ -49,171 +54,318 @@ nvinfer1::IPluginV2DynamicExt* MultiscaleDeformableAttnPlugin::clone() const PLU return nullptr; } -nvinfer1::DimsExprs MultiscaleDeformableAttnPlugin::getOutputDimensions(int32_t outputIndex, - nvinfer1::DimsExprs const* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) PLUGIN_NOEXCEPT +// IPluginV3OneCore methods +char const* MultiscaleDeformableAttnPlugin::getPluginName() const noexcept { - nvinfer1::DimsExprs ret; - ret.nbDims = 4; - ret.d[0] = inputs[0].d[0]; - ret.d[1] = inputs[3].d[1]; - ret.d[2] = inputs[0].d[2]; - ret.d[3] = inputs[0].d[3]; - - return ret; + return DMHA_NAME; } -bool MultiscaleDeformableAttnPlugin::supportsFormatCombination( - int32_t pos, nvinfer1::PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) PLUGIN_NOEXCEPT +char const* MultiscaleDeformableAttnPlugin::getPluginVersion() const noexcept { - PLUGIN_ASSERT((nbInputs == 5)); - PLUGIN_ASSERT((nbOutputs == 1)); + return DMHA_VERSION; +} - if (inOut[pos].format == nvinfer1::TensorFormat::kLINEAR) - { - if ((pos == 1) || (pos == 2)) - { - return (inOut[pos].type == nvinfer1::DataType::kINT32); - } - return ((inOut[pos].type == inOut[0].type) - && ((inOut[pos].type == nvinfer1::DataType::kFLOAT) || (inOut[pos].type == nvinfer1::DataType::kHALF))); - } - return false; +int32_t MultiscaleDeformableAttnPlugin::getNbOutputs() const noexcept +{ + return 1; } -void MultiscaleDeformableAttnPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - nvinfer1::DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) PLUGIN_NOEXCEPT +void MultiscaleDeformableAttnPlugin::setPluginNamespace(char const* pluginNamespace) noexcept { - // Check for valid input dimensions - PLUGIN_ASSERT(inputs[0].desc.dims.nbDims == 4); - PLUGIN_ASSERT(inputs[1].desc.dims.nbDims == 2); - PLUGIN_ASSERT(inputs[2].desc.dims.nbDims == 1); - PLUGIN_ASSERT(inputs[3].desc.dims.nbDims == 6); - PLUGIN_ASSERT(inputs[4].desc.dims.nbDims == 5); - - // Check M dimensions consistency - PLUGIN_ASSERT(inputs[0].desc.dims.d[2] == inputs[3].desc.dims.d[2]); - PLUGIN_ASSERT(inputs[0].desc.dims.d[2] == inputs[4].desc.dims.d[2]); - - // Check L dimensions consistency - PLUGIN_ASSERT(inputs[1].desc.dims.d[0] == inputs[2].desc.dims.d[0]); - PLUGIN_ASSERT(inputs[1].desc.dims.d[0] == inputs[3].desc.dims.d[3]); - PLUGIN_ASSERT(inputs[1].desc.dims.d[0] == inputs[4].desc.dims.d[3]); - - // Check P dimensions consistency - PLUGIN_ASSERT(inputs[3].desc.dims.d[4] == inputs[4].desc.dims.d[4]); - - // Check Lq dimensions consistency - PLUGIN_ASSERT(inputs[3].desc.dims.d[1] == inputs[4].desc.dims.d[1]); + mNamespace = pluginNamespace; } -size_t MultiscaleDeformableAttnPlugin::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, - nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const PLUGIN_NOEXCEPT +char const* MultiscaleDeformableAttnPlugin::getPluginNamespace() const noexcept { - return 0; + return mNamespace.c_str(); } -int32_t MultiscaleDeformableAttnPlugin::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, - nvinfer1::PluginTensorDesc const* /* outputDesc */, void const* const* inputs, void* const* outputs, - void* /* workSpace */, cudaStream_t stream) PLUGIN_NOEXCEPT +IPluginV3* MultiscaleDeformableAttnPlugin::clone() noexcept { - PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); - - int32_t const batch = inputDesc[0].dims.d[0]; - int32_t spatial_size = inputDesc[0].dims.d[1]; - int32_t num_heads = inputDesc[0].dims.d[2]; - int32_t channels = inputDesc[0].dims.d[3]; - int32_t num_levels = inputDesc[1].dims.d[0]; - int32_t num_query = inputDesc[3].dims.d[1]; - int32_t num_point = inputDesc[3].dims.d[4]; - int32_t rc = 0; - if (inputDesc[0].type == nvinfer1::DataType::kFLOAT) + try { - float const* value = static_cast(inputs[0]); - int32_t const* spatialShapes = static_cast(inputs[1]); - int32_t const* levelStartIndex = static_cast(inputs[2]); - float const* samplingLoc = static_cast(inputs[3]); - float const* attnWeight = static_cast(inputs[4]); - float* output = static_cast(outputs[0]); - - rc = ms_deform_attn_cuda_forward(stream, value, spatialShapes, levelStartIndex, samplingLoc, attnWeight, output, - batch, spatial_size, num_heads, channels, num_levels, num_query, num_point); + auto* plugin = new MultiscaleDeformableAttnPlugin(); + plugin->setPluginNamespace(mNamespace.c_str()); + return plugin; } - else if (inputDesc[0].type == nvinfer1::DataType::kHALF) + catch (std::exception const& e) { - __half const* value = static_cast<__half const*>(inputs[0]); - int32_t const* spatialShapes = static_cast(inputs[1]); - int32_t const* levelStartIndex = static_cast(inputs[2]); - __half const* samplingLoc = static_cast<__half const*>(inputs[3]); - __half const* attnWeight = static_cast<__half const*>(inputs[4]); - __half* output = static_cast<__half*>(outputs[0]); - - rc = ms_deform_attn_cuda_forward(stream, value, spatialShapes, levelStartIndex, samplingLoc, attnWeight, output, - batch, spatial_size, num_heads, channels, num_levels, num_query, num_point); + caughtError(e); } - - return rc; + return nullptr; } -void MultiscaleDeformableAttnPlugin::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, nvinfer1::IGpuAllocator* gpuAllocator) PLUGIN_NOEXCEPT +// IPluginV3OneBuild methods +int32_t MultiscaleDeformableAttnPlugin::getOutputDataTypes( + DataType* outputTypes, int32_t nbOutputs, DataType const* inputTypes, int32_t nbInputs) const noexcept { -} + try + { + PLUGIN_VALIDATE(outputTypes != nullptr, "outputTypes pointer is null"); + PLUGIN_VALIDATE(nbOutputs > 0, "nbOutputs is not positive"); + PLUGIN_VALIDATE(inputTypes != nullptr, "inputTypes pointer is null"); + PLUGIN_VALIDATE(nbInputs > 0, "nbInputs is not positive"); -void MultiscaleDeformableAttnPlugin::detachFromContext() PLUGIN_NOEXCEPT {} + // Output type is the same as the first input type + std::fill_n(outputTypes, nbOutputs, inputTypes[0]); -// IPluginV2Ext Methods -nvinfer1::DataType MultiscaleDeformableAttnPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const PLUGIN_NOEXCEPT -{ - return inputTypes[0]; + return STATUS_SUCCESS; + } + catch (std::exception const& e) + { + caughtError(e); + } + return STATUS_FAILURE; } -// IPluginV2 Methods -char const* MultiscaleDeformableAttnPlugin::getPluginType() const PLUGIN_NOEXCEPT +int32_t MultiscaleDeformableAttnPlugin::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, + DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, + IExprBuilder& exprBuilder) noexcept { - return DMHA_NAME; + try + { + PLUGIN_VALIDATE(outputs != nullptr, "outputs pointer is null"); + PLUGIN_VALIDATE(nbOutputs > 0, "nbOutputs is not positive"); + PLUGIN_VALIDATE(inputs != nullptr, "inputs pointer is null"); + PLUGIN_VALIDATE(nbInputs == 5, "Expected 5 inputs"); + + // Output shape: [N, Lq, M, D] + outputs[0].nbDims = 4; + outputs[0].d[0] = inputs[0].d[0]; // Batch size + outputs[0].d[1] = inputs[3].d[1]; // Lq (query length) + outputs[0].d[2] = inputs[0].d[2]; // Number of heads + outputs[0].d[3] = inputs[0].d[3]; // Hidden dimension per head + + return STATUS_SUCCESS; + } + catch (std::exception const& e) + { + caughtError(e); + } + return STATUS_FAILURE; } -char const* MultiscaleDeformableAttnPlugin::getPluginVersion() const PLUGIN_NOEXCEPT +bool MultiscaleDeformableAttnPlugin::supportsFormatCombination( + int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept { - return DMHA_VERSION; + try + { + PLUGIN_VALIDATE(inOut != nullptr, "inOut pointer is null"); + PLUGIN_VALIDATE(nbInputs == 5, "Expected 5 inputs"); + PLUGIN_VALIDATE(nbOutputs == 1, "Expected 1 output"); + + // Check format + PluginTensorDesc const& desc = inOut[pos].desc; + if (desc.format != TensorFormat::kLINEAR) + { + return false; + } + + // Special handling for spatial_shapes and level_start_index (inputs 1 and 2) + if (pos == 1 || pos == 2) + { + return desc.type == DataType::kINT32; + } + + // Other inputs and output must have the same type, either FP32 or FP16 + if (pos == 0 || pos == 3 || pos == 4 || pos == nbInputs) + { + // Check that the data type matches input[0] + bool const isFloatType = desc.type == DataType::kFLOAT || desc.type == DataType::kHALF; + if (pos == 0) // First tensor, just check if it's a supported type + { + return isFloatType; + } + // Other tensors must match the first + return desc.type == inOut[0].desc.type && isFloatType; + } + + return false; + } + catch (std::exception const& e) + { + caughtError(e); + } + return false; } -int32_t MultiscaleDeformableAttnPlugin::getNbOutputs() const PLUGIN_NOEXCEPT +int32_t MultiscaleDeformableAttnPlugin::configurePlugin( + DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept { - return 1; + try + { + PLUGIN_VALIDATE(in != nullptr, "in pointer is null"); + PLUGIN_VALIDATE(out != nullptr, "out pointer is null"); + PLUGIN_VALIDATE(nbInputs == 5, "Expected 5 inputs"); + PLUGIN_VALIDATE(nbOutputs == 1, "Expected 1 output"); + + // Check for valid input dimensions + PLUGIN_VALIDATE(in[0].desc.dims.nbDims == 4, "First input must have 4 dimensions"); + PLUGIN_VALIDATE(in[1].desc.dims.nbDims == 2, "Second input must have 2 dimensions"); + PLUGIN_VALIDATE(in[2].desc.dims.nbDims == 1, "Third input must have 1 dimension"); + PLUGIN_VALIDATE(in[3].desc.dims.nbDims == 6, "Fourth input must have 6 dimensions"); + PLUGIN_VALIDATE(in[4].desc.dims.nbDims == 5, "Fifth input must have 5 dimensions"); + + // Check M dimensions consistency + PLUGIN_VALIDATE(in[0].desc.dims.d[2] == in[3].desc.dims.d[2], "Inconsistent dimensions for number of heads"); + PLUGIN_VALIDATE(in[0].desc.dims.d[2] == in[4].desc.dims.d[2], "Inconsistent dimensions for number of heads"); + + // Check L dimensions consistency + PLUGIN_VALIDATE(in[1].desc.dims.d[0] == in[2].desc.dims.d[0], "Inconsistent dimensions for number of levels"); + PLUGIN_VALIDATE(in[1].desc.dims.d[0] == in[3].desc.dims.d[3], "Inconsistent dimensions for number of levels"); + PLUGIN_VALIDATE(in[1].desc.dims.d[0] == in[4].desc.dims.d[3], "Inconsistent dimensions for number of levels"); + + // Check P dimensions consistency + PLUGIN_VALIDATE(in[3].desc.dims.d[4] == in[4].desc.dims.d[4], "Inconsistent dimensions for number of points"); + + // Check Lq dimensions consistency + PLUGIN_VALIDATE(in[3].desc.dims.d[1] == in[4].desc.dims.d[1], "Inconsistent dimensions for query length"); + + return STATUS_SUCCESS; + } + catch (std::exception const& e) + { + caughtError(e); + } + return STATUS_FAILURE; } -int32_t MultiscaleDeformableAttnPlugin::initialize() PLUGIN_NOEXCEPT +PluginFieldCollection const* MultiscaleDeformableAttnPlugin::getFieldsToSerialize() noexcept { - return 0; + try + { + mDataToSerialize.clear(); + // This plugin has no fields to serialize + mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.fields = mDataToSerialize.data(); + return &mFCToSerialize; + } + catch (std::exception const& e) + { + caughtError(e); + } + return nullptr; } -void MultiscaleDeformableAttnPlugin::terminate() PLUGIN_NOEXCEPT {} - -size_t MultiscaleDeformableAttnPlugin::getSerializationSize() const PLUGIN_NOEXCEPT +// IPluginV3OneRuntime methods +size_t MultiscaleDeformableAttnPlugin::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, + DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept { + // No workspace needed for this plugin return 0; } -void MultiscaleDeformableAttnPlugin::serialize(void* buffer) const PLUGIN_NOEXCEPT {} - -void MultiscaleDeformableAttnPlugin::destroy() PLUGIN_NOEXCEPT +int32_t MultiscaleDeformableAttnPlugin::onShapeChange( + PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) noexcept { - delete this; + try + { + PLUGIN_VALIDATE(inputs != nullptr, "inputs pointer is null"); + PLUGIN_VALIDATE(outputs != nullptr, "outputs pointer is null"); + PLUGIN_VALIDATE(nbInputs == 5, "Expected 5 inputs"); + PLUGIN_VALIDATE(nbOutputs == 1, "Expected 1 output"); + + // Check for valid input dimensions + PLUGIN_VALIDATE(inputs[0].dims.nbDims == 4, "First input must have 4 dimensions"); + PLUGIN_VALIDATE(inputs[1].dims.nbDims == 2, "Second input must have 2 dimensions"); + PLUGIN_VALIDATE(inputs[2].dims.nbDims == 1, "Third input must have 1 dimension"); + PLUGIN_VALIDATE(inputs[3].dims.nbDims == 6, "Fourth input must have 6 dimensions"); + PLUGIN_VALIDATE(inputs[4].dims.nbDims == 5, "Fifth input must have 5 dimensions"); + + // Check M dimensions consistency + PLUGIN_VALIDATE(inputs[0].dims.d[2] == inputs[3].dims.d[2], "Inconsistent dimensions for number of heads"); + PLUGIN_VALIDATE(inputs[0].dims.d[2] == inputs[4].dims.d[2], "Inconsistent dimensions for number of heads"); + + // Check L dimensions consistency + PLUGIN_VALIDATE(inputs[1].dims.d[0] == inputs[2].dims.d[0], "Inconsistent dimensions for number of levels"); + PLUGIN_VALIDATE(inputs[1].dims.d[0] == inputs[3].dims.d[3], "Inconsistent dimensions for number of levels"); + PLUGIN_VALIDATE(inputs[1].dims.d[0] == inputs[4].dims.d[3], "Inconsistent dimensions for number of levels"); + + // Check P dimensions consistency + PLUGIN_VALIDATE(inputs[3].dims.d[4] == inputs[4].dims.d[4], "Inconsistent dimensions for number of points"); + + // Check Lq dimensions consistency + PLUGIN_VALIDATE(inputs[3].dims.d[1] == inputs[4].dims.d[1], "Inconsistent dimensions for query length"); + + return STATUS_SUCCESS; + } + catch (std::exception const& e) + { + caughtError(e); + } + return STATUS_FAILURE; } -void MultiscaleDeformableAttnPlugin::setPluginNamespace(char const* pluginNamespace) PLUGIN_NOEXCEPT +IPluginV3* MultiscaleDeformableAttnPlugin::attachToContext(IPluginResourceContext* context) noexcept { - mNamespace = pluginNamespace; + try + { + // No resources need to be attached + return clone(); + } + catch (std::exception const& e) + { + caughtError(e); + } + return nullptr; } -char const* MultiscaleDeformableAttnPlugin::getPluginNamespace() const PLUGIN_NOEXCEPT + +int32_t MultiscaleDeformableAttnPlugin::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, + void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept { - return mNamespace.c_str(); -} + try + { + PLUGIN_VALIDATE( + inputDesc != nullptr && inputs != nullptr && outputs != nullptr, "Null pointers found in enqueue"); + + int32_t const batch = inputDesc[0].dims.d[0]; + int32_t spatialSize = inputDesc[0].dims.d[1]; + int32_t numHeads = inputDesc[0].dims.d[2]; + int32_t channels = inputDesc[0].dims.d[3]; + int32_t numLevels = inputDesc[1].dims.d[0]; + int32_t numQuery = inputDesc[3].dims.d[1]; + int32_t numPoint = inputDesc[3].dims.d[4]; + int32_t rc = 0; + + if (inputDesc[0].type == DataType::kFLOAT) + { + auto const* value = static_cast(inputs[0]); + auto const* spatialShapes = static_cast(inputs[1]); + auto const* levelStartIndex = static_cast(inputs[2]); + auto const* samplingLoc = static_cast(inputs[3]); + auto const* attnWeight = static_cast(inputs[4]); + auto* output = static_cast(outputs[0]); + + rc = ms_deform_attn_cuda_forward(stream, value, spatialShapes, levelStartIndex, samplingLoc, attnWeight, + output, batch, spatialSize, numHeads, channels, numLevels, numQuery, numPoint); + } + else if (inputDesc[0].type == DataType::kHALF) + { + auto const* value = static_cast<__half const*>(inputs[0]); + auto const* spatialShapes = static_cast(inputs[1]); + auto const* levelStartIndex = static_cast(inputs[2]); + auto const* samplingLoc = static_cast<__half const*>(inputs[3]); + auto const* attnWeight = static_cast<__half const*>(inputs[4]); + auto* output = static_cast<__half*>(outputs[0]); + + rc = ms_deform_attn_cuda_forward(stream, value, spatialShapes, levelStartIndex, samplingLoc, attnWeight, + output, batch, spatialSize, numHeads, channels, numLevels, numQuery, numPoint); + } + else + { + PLUGIN_VALIDATE(false, "Unsupported data type"); + } -// Pluginv1 Creator + return rc; + } + catch (std::exception const& e) + { + caughtError(e); + } + return STATUS_FAILURE; +} +// Plugin Creator Implementation MultiscaleDeformableAttnPluginCreator::MultiscaleDeformableAttnPluginCreator() { mPluginAttributes.clear(); @@ -221,44 +373,28 @@ MultiscaleDeformableAttnPluginCreator::MultiscaleDeformableAttnPluginCreator() mFC.fields = mPluginAttributes.data(); } -char const* MultiscaleDeformableAttnPluginCreator::getPluginName() const PLUGIN_NOEXCEPT +char const* MultiscaleDeformableAttnPluginCreator::getPluginName() const noexcept { return DMHA_NAME; } -char const* MultiscaleDeformableAttnPluginCreator::getPluginVersion() const PLUGIN_NOEXCEPT +char const* MultiscaleDeformableAttnPluginCreator::getPluginVersion() const noexcept { return DMHA_VERSION; } -nvinfer1::PluginFieldCollection const* MultiscaleDeformableAttnPluginCreator::getFieldNames() PLUGIN_NOEXCEPT +PluginFieldCollection const* MultiscaleDeformableAttnPluginCreator::getFieldNames() noexcept { return &mFC; } -IPluginV2* MultiscaleDeformableAttnPluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc) PLUGIN_NOEXCEPT -{ - try - { - MultiscaleDeformableAttnPlugin* plugin = new MultiscaleDeformableAttnPlugin(); - return plugin; - } - catch (std::exception const& e) - { - caughtError(e); - } - return nullptr; -} - -IPluginV2* MultiscaleDeformableAttnPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) PLUGIN_NOEXCEPT +IPluginV3* MultiscaleDeformableAttnPluginCreator::createPlugin( + char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept { try { - auto plugin = new MultiscaleDeformableAttnPlugin(serialData, serialLength); - plugin->setPluginNamespace(getPluginNamespace()); - return plugin; + // This plugin doesn't have any configurable parameters + return new MultiscaleDeformableAttnPlugin(); } catch (std::exception const& e) { @@ -267,12 +403,12 @@ IPluginV2* MultiscaleDeformableAttnPluginCreator::deserializePlugin( return nullptr; } -void MultiscaleDeformableAttnPluginCreator::setPluginNamespace(char const* pluginNamespace) PLUGIN_NOEXCEPT +void MultiscaleDeformableAttnPluginCreator::setPluginNamespace(char const* pluginNamespace) noexcept { mNamespace = pluginNamespace; } -char const* MultiscaleDeformableAttnPluginCreator::getPluginNamespace() const PLUGIN_NOEXCEPT +char const* MultiscaleDeformableAttnPluginCreator::getPluginNamespace() const noexcept { return mNamespace.c_str(); } diff --git a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.h b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.h index 7f96db6b7..3329db8df 100644 --- a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.h +++ b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,118 +15,107 @@ * limitations under the License. */ +/* + * V3 version of the plugin using IPluginV3 interfaces. + * This implementation follows TensorRT's plugin V3 API. + */ + #ifndef TRT_MULTISCALE_DEFORMABLE_ATTN_PLUGIN_H #define TRT_MULTISCALE_DEFORMABLE_ATTN_PLUGIN_H -// For loadLibrary -#ifdef _MSC_VER -// Needed so that the max/min definitions in windows.h do not conflict with -// std::max/min. -#define NOMINMAX -#include -#undef NOMINMAX -#else -#include -#endif - +// Standard library includes #include #include #include -#include - -#include "NvInfer.h" #include "NvInferPlugin.h" -#include "NvInferVersion.h" +// TensorRT includes #include "common/plugin.h" -#if NV_TENSORRT_MAJOR > 7 -#define PLUGIN_NOEXCEPT noexcept -#else -#define PLUGIN_NOEXCEPT -#endif - -using namespace nvinfer1::plugin; - namespace nvinfer1 { namespace plugin { -class MultiscaleDeformableAttnPlugin : public nvinfer1::IPluginV2DynamicExt + +// Forward declarations +class MultiscaleDeformableAttnPlugin; +class MultiscaleDeformableAttnPluginCreator; + +// V3 Plugin implementation +class MultiscaleDeformableAttnPlugin : public IPluginV3, + public IPluginV3OneCore, + public IPluginV3OneBuild, + public IPluginV3OneRuntime { public: + // Constructors/destructors MultiscaleDeformableAttnPlugin(); - - MultiscaleDeformableAttnPlugin(void const* data, size_t length); - - // IPluginV2DynamicExt methods - nvinfer1::IPluginV2DynamicExt* clone() const PLUGIN_NOEXCEPT override; - nvinfer1::DimsExprs getOutputDimensions(int32_t outputIndex, nvinfer1::DimsExprs const* inputs, int32_t nbInputs, - nvinfer1::IExprBuilder& exprBuilder) PLUGIN_NOEXCEPT override; - bool supportsFormatCombination(int32_t pos, nvinfer1::PluginTensorDesc const* inOut, int32_t nbInputs, - int32_t nbOutputs) PLUGIN_NOEXCEPT override; - void configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t nbInputs, - nvinfer1::DynamicPluginTensorDesc const* out, int32_t nbOutputs) PLUGIN_NOEXCEPT override; - size_t getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, - nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const PLUGIN_NOEXCEPT override; - int32_t enqueue(nvinfer1::PluginTensorDesc const* inputDesc, nvinfer1::PluginTensorDesc const* outputDesc, - void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) PLUGIN_NOEXCEPT override; - void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, - nvinfer1::IGpuAllocator* gpuAllocator) PLUGIN_NOEXCEPT override; - void detachFromContext() PLUGIN_NOEXCEPT override; - - // IPluginV2Ext Methods - nvinfer1::DataType getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const PLUGIN_NOEXCEPT override; - - // IPluginV2 Methods - char const* getPluginType() const PLUGIN_NOEXCEPT override; - char const* getPluginVersion() const PLUGIN_NOEXCEPT override; - int32_t getNbOutputs() const PLUGIN_NOEXCEPT override; - int32_t initialize() PLUGIN_NOEXCEPT override; - void terminate() PLUGIN_NOEXCEPT override; - size_t getSerializationSize() const PLUGIN_NOEXCEPT override; - void serialize(void* buffer) const PLUGIN_NOEXCEPT override; - void destroy() PLUGIN_NOEXCEPT override; - void setPluginNamespace(char const* pluginNamespace) PLUGIN_NOEXCEPT override; - char const* getPluginNamespace() const PLUGIN_NOEXCEPT override; + ~MultiscaleDeformableAttnPlugin() = default; + + // IPluginV3 methods + IPluginCapability* getCapabilityInterface(PluginCapabilityType type) noexcept override; + + // IPluginV3OneCore methods + char const* getPluginName() const noexcept override; + char const* getPluginVersion() const noexcept override; + char const* getPluginNamespace() const noexcept override; + void setPluginNamespace(char const* pluginNamespace) noexcept; + int32_t getNbOutputs() const noexcept override; + IPluginV3* clone() noexcept override; + + // IPluginV3OneBuild methods + bool supportsFormatCombination( + int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override; + int32_t getOutputDataTypes( + DataType* outputTypes, int32_t nbOutputs, DataType const* inputTypes, int32_t nbInputs) const noexcept override; + int32_t getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs, + int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept override; + int32_t configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, + int32_t nbOutputs) noexcept override; + PluginFieldCollection const* getFieldsToSerialize() noexcept override; + + // IPluginV3OneRuntime methods + size_t getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, + DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept override; + int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, void const* const* inputs, + void* const* outputs, void* workspace, cudaStream_t stream) noexcept override; + IPluginV3* attachToContext(IPluginResourceContext* context) noexcept override; + int32_t onShapeChange(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, + int32_t nbOutputs) noexcept override; private: - std::string mNamespace; + // Serialization helpers + std::vector mDataToSerialize; + PluginFieldCollection mFCToSerialize; -#if NV_TENSORRT_MAJOR < 8 - using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch; - using nvinfer1::IPluginV2DynamicExt::configurePlugin; - using nvinfer1::IPluginV2DynamicExt::enqueue; - using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; - using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; - using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch; - using nvinfer1::IPluginV2DynamicExt::supportsFormat; -#endif + // Plugin namespace + std::string mNamespace; }; -class MultiscaleDeformableAttnPluginCreator : public nvinfer1::IPluginCreator +// Plugin creator class +class MultiscaleDeformableAttnPluginCreator : public IPluginCreatorV3One { public: + // Constructor MultiscaleDeformableAttnPluginCreator(); - char const* getPluginName() const PLUGIN_NOEXCEPT override; - char const* getPluginVersion() const PLUGIN_NOEXCEPT override; - nvinfer1::PluginFieldCollection const* getFieldNames() PLUGIN_NOEXCEPT override; - nvinfer1::IPluginV2* createPlugin( - char const* name, nvinfer1::PluginFieldCollection const* fc) PLUGIN_NOEXCEPT override; - nvinfer1::IPluginV2* deserializePlugin( - char const* name, void const* serialData, size_t serialLength) PLUGIN_NOEXCEPT override; - void setPluginNamespace(char const* pluginNamespace) PLUGIN_NOEXCEPT override; - char const* getPluginNamespace() const PLUGIN_NOEXCEPT override; + + // IPluginCreatorV3One methods + char const* getPluginName() const noexcept override; + char const* getPluginVersion() const noexcept override; + PluginFieldCollection const* getFieldNames() noexcept override; + IPluginV3* createPlugin(char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept override; + void setPluginNamespace(char const* pluginNamespace) noexcept; + char const* getPluginNamespace() const noexcept override; private: - nvinfer1::PluginFieldCollection mFC; - std::vector mPluginAttributes; + // Plugin fields and namespace + PluginFieldCollection mFC; + std::vector mPluginAttributes; std::string mNamespace; }; } // namespace plugin } // namespace nvinfer1 -#endif +#endif // TRT_MULTISCALE_DEFORMABLE_ATTN_PLUGIN_H diff --git a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.cpp b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.cpp new file mode 100644 index 000000000..cb5206fa2 --- /dev/null +++ b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.cpp @@ -0,0 +1,287 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Legacy version of the plugin maintained for backward compatibility. + * This implementation is based on IPluginV2 interfaces. + */ +#include "multiscaleDeformableAttnPluginLegacy.h" +#include "multiscaleDeformableAttn.h" + +using namespace nvinfer1; +using namespace nvinfer1::plugin; + +namespace nvinfer1::plugin +{ + +namespace +{ +static char const* DMHA_VERSION{"1"}; +static char const* DMHA_NAME{"MultiscaleDeformableAttnPlugin_TRT"}; +} // namespace + +// // Register the plugin with TensorRT +// REGISTER_TENSORRT_PLUGIN(MultiscaleDeformableAttnPluginCreatorLegacy); + +MultiscaleDeformableAttnPluginLegacy::MultiscaleDeformableAttnPluginLegacy() {} + +MultiscaleDeformableAttnPluginLegacy::MultiscaleDeformableAttnPluginLegacy(void const* data, size_t length) {} + +nvinfer1::IPluginV2DynamicExt* MultiscaleDeformableAttnPluginLegacy::clone() const noexcept +{ + try + { + MultiscaleDeformableAttnPluginLegacy* plugin = new MultiscaleDeformableAttnPluginLegacy(); + plugin->setPluginNamespace(getPluginNamespace()); + return plugin; + } + catch (std::exception const& e) + { + caughtError(e); + } + return nullptr; +} + +nvinfer1::DimsExprs MultiscaleDeformableAttnPluginLegacy::getOutputDimensions(int32_t outputIndex, + nvinfer1::DimsExprs const* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept +{ + nvinfer1::DimsExprs ret; + ret.nbDims = 4; + ret.d[0] = inputs[0].d[0]; + ret.d[1] = inputs[3].d[1]; + ret.d[2] = inputs[0].d[2]; + ret.d[3] = inputs[0].d[3]; + + return ret; +} + +bool MultiscaleDeformableAttnPluginLegacy::supportsFormatCombination( + int32_t pos, nvinfer1::PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept +{ + PLUGIN_ASSERT((nbInputs == 5)); + PLUGIN_ASSERT((nbOutputs == 1)); + + if (inOut[pos].format == nvinfer1::TensorFormat::kLINEAR) + { + if ((pos == 1) || (pos == 2)) + { + return (inOut[pos].type == nvinfer1::DataType::kINT32); + } + return ((inOut[pos].type == inOut[0].type) + && ((inOut[pos].type == nvinfer1::DataType::kFLOAT) || (inOut[pos].type == nvinfer1::DataType::kHALF))); + } + return false; +} + +void MultiscaleDeformableAttnPluginLegacy::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* inputs, + int32_t nbInputs, nvinfer1::DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) noexcept +{ + // Check for valid input dimensions + PLUGIN_ASSERT(inputs[0].desc.dims.nbDims == 4); + PLUGIN_ASSERT(inputs[1].desc.dims.nbDims == 2); + PLUGIN_ASSERT(inputs[2].desc.dims.nbDims == 1); + PLUGIN_ASSERT(inputs[3].desc.dims.nbDims == 6); + PLUGIN_ASSERT(inputs[4].desc.dims.nbDims == 5); + + // Check M dimensions consistency + PLUGIN_ASSERT(inputs[0].desc.dims.d[2] == inputs[3].desc.dims.d[2]); + PLUGIN_ASSERT(inputs[0].desc.dims.d[2] == inputs[4].desc.dims.d[2]); + + // Check L dimensions consistency + PLUGIN_ASSERT(inputs[1].desc.dims.d[0] == inputs[2].desc.dims.d[0]); + PLUGIN_ASSERT(inputs[1].desc.dims.d[0] == inputs[3].desc.dims.d[3]); + PLUGIN_ASSERT(inputs[1].desc.dims.d[0] == inputs[4].desc.dims.d[3]); + + // Check P dimensions consistency + PLUGIN_ASSERT(inputs[3].desc.dims.d[4] == inputs[4].desc.dims.d[4]); + + // Check Lq dimensions consistency + PLUGIN_ASSERT(inputs[3].desc.dims.d[1] == inputs[4].desc.dims.d[1]); +} + +size_t MultiscaleDeformableAttnPluginLegacy::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, + int32_t nbInputs, nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +{ + return 0; +} + +int32_t MultiscaleDeformableAttnPluginLegacy::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, + nvinfer1::PluginTensorDesc const* /* outputDesc */, void const* const* inputs, void* const* outputs, + void* /* workSpace */, cudaStream_t stream) noexcept +{ + PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); + + int32_t const batch = inputDesc[0].dims.d[0]; + int32_t spatial_size = inputDesc[0].dims.d[1]; + int32_t num_heads = inputDesc[0].dims.d[2]; + int32_t channels = inputDesc[0].dims.d[3]; + int32_t num_levels = inputDesc[1].dims.d[0]; + int32_t num_query = inputDesc[3].dims.d[1]; + int32_t num_point = inputDesc[3].dims.d[4]; + int32_t rc = 0; + if (inputDesc[0].type == nvinfer1::DataType::kFLOAT) + { + float const* value = static_cast(inputs[0]); + int32_t const* spatialShapes = static_cast(inputs[1]); + int32_t const* levelStartIndex = static_cast(inputs[2]); + float const* samplingLoc = static_cast(inputs[3]); + float const* attnWeight = static_cast(inputs[4]); + float* output = static_cast(outputs[0]); + + rc = ms_deform_attn_cuda_forward(stream, value, spatialShapes, levelStartIndex, samplingLoc, attnWeight, output, + batch, spatial_size, num_heads, channels, num_levels, num_query, num_point); + } + else if (inputDesc[0].type == nvinfer1::DataType::kHALF) + { + __half const* value = static_cast<__half const*>(inputs[0]); + int32_t const* spatialShapes = static_cast(inputs[1]); + int32_t const* levelStartIndex = static_cast(inputs[2]); + __half const* samplingLoc = static_cast<__half const*>(inputs[3]); + __half const* attnWeight = static_cast<__half const*>(inputs[4]); + __half* output = static_cast<__half*>(outputs[0]); + + rc = ms_deform_attn_cuda_forward(stream, value, spatialShapes, levelStartIndex, samplingLoc, attnWeight, output, + batch, spatial_size, num_heads, channels, num_levels, num_query, num_point); + } + + return rc; +} + +void MultiscaleDeformableAttnPluginLegacy::attachToContext( + cudnnContext* cudnnContext, cublasContext* cublasContext, nvinfer1::IGpuAllocator* gpuAllocator) noexcept +{ +} + +void MultiscaleDeformableAttnPluginLegacy::detachFromContext() noexcept {} + +// IPluginV2Ext Methods +nvinfer1::DataType MultiscaleDeformableAttnPluginLegacy::getOutputDataType( + int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept +{ + return inputTypes[0]; +} + +// IPluginV2 Methods +char const* MultiscaleDeformableAttnPluginLegacy::getPluginType() const noexcept +{ + return DMHA_NAME; +} + +char const* MultiscaleDeformableAttnPluginLegacy::getPluginVersion() const noexcept +{ + return DMHA_VERSION; +} + +int32_t MultiscaleDeformableAttnPluginLegacy::getNbOutputs() const noexcept +{ + return 1; +} + +int32_t MultiscaleDeformableAttnPluginLegacy::initialize() noexcept +{ + return 0; +} + +void MultiscaleDeformableAttnPluginLegacy::terminate() noexcept {} + +size_t MultiscaleDeformableAttnPluginLegacy::getSerializationSize() const noexcept +{ + return 0; +} + +void MultiscaleDeformableAttnPluginLegacy::serialize(void* buffer) const noexcept {} + +void MultiscaleDeformableAttnPluginLegacy::destroy() noexcept +{ + delete this; +} + +void MultiscaleDeformableAttnPluginLegacy::setPluginNamespace(char const* pluginNamespace) noexcept +{ + mNamespace = pluginNamespace; +} +char const* MultiscaleDeformableAttnPluginLegacy::getPluginNamespace() const noexcept +{ + return mNamespace.c_str(); +} + +// Pluginv1 Creator + +MultiscaleDeformableAttnPluginCreatorLegacy::MultiscaleDeformableAttnPluginCreatorLegacy() +{ + mPluginAttributes.clear(); + mFC.nbFields = mPluginAttributes.size(); + mFC.fields = mPluginAttributes.data(); +} + +char const* MultiscaleDeformableAttnPluginCreatorLegacy::getPluginName() const noexcept +{ + return DMHA_NAME; +} + +char const* MultiscaleDeformableAttnPluginCreatorLegacy::getPluginVersion() const noexcept +{ + return DMHA_VERSION; +} + +nvinfer1::PluginFieldCollection const* MultiscaleDeformableAttnPluginCreatorLegacy::getFieldNames() noexcept +{ + return &mFC; +} + +IPluginV2* MultiscaleDeformableAttnPluginCreatorLegacy::createPlugin( + char const* name, PluginFieldCollection const* fc) noexcept +{ + try + { + MultiscaleDeformableAttnPluginLegacy* plugin = new MultiscaleDeformableAttnPluginLegacy(); + return plugin; + } + catch (std::exception const& e) + { + caughtError(e); + } + return nullptr; +} + +IPluginV2* MultiscaleDeformableAttnPluginCreatorLegacy::deserializePlugin( + char const* name, void const* serialData, size_t serialLength) noexcept +{ + try + { + auto plugin = new MultiscaleDeformableAttnPluginLegacy(serialData, serialLength); + plugin->setPluginNamespace(getPluginNamespace()); + return plugin; + } + catch (std::exception const& e) + { + caughtError(e); + } + return nullptr; +} + +void MultiscaleDeformableAttnPluginCreatorLegacy::setPluginNamespace(char const* pluginNamespace) noexcept +{ + mNamespace = pluginNamespace; +} + +char const* MultiscaleDeformableAttnPluginCreatorLegacy::getPluginNamespace() const noexcept +{ + return mNamespace.c_str(); +} + +} // namespace nvinfer1::plugin diff --git a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.h b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.h new file mode 100644 index 000000000..18da1b789 --- /dev/null +++ b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.h @@ -0,0 +1,121 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Legacy version of the plugin maintained for backward compatibility. + * This implementation is based on IPluginV2 interfaces. + */ + +#ifndef TRT_MULTISCALE_DEFORMABLE_ATTN_PLUGIN_LEGACY_H +#define TRT_MULTISCALE_DEFORMABLE_ATTN_PLUGIN_LEGACY_H + +// Standard library includes +#include +#include +#include + +#include "NvInferPlugin.h" + +// TensorRT includes +#include "common/plugin.h" + +namespace nvinfer1 +{ +namespace plugin +{ + +// Legacy V2 Plugin implementation +class MultiscaleDeformableAttnPluginLegacy : public nvinfer1::IPluginV2DynamicExt +{ +public: + // Constructors/destructors + MultiscaleDeformableAttnPluginLegacy(); + MultiscaleDeformableAttnPluginLegacy(void const* data, size_t length); + + // IPluginV2DynamicExt methods + nvinfer1::IPluginV2DynamicExt* clone() const noexcept override; + nvinfer1::DimsExprs getOutputDimensions(int32_t outputIndex, nvinfer1::DimsExprs const* inputs, int32_t nbInputs, + nvinfer1::IExprBuilder& exprBuilder) noexcept override; + bool supportsFormatCombination( + int32_t pos, nvinfer1::PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override; + void configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t nbInputs, + nvinfer1::DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept override; + size_t getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, + nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept override; + int32_t enqueue(nvinfer1::PluginTensorDesc const* inputDesc, nvinfer1::PluginTensorDesc const* outputDesc, + void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept override; + void attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, + nvinfer1::IGpuAllocator* gpuAllocator) noexcept override; + void detachFromContext() noexcept override; + + // IPluginV2Ext Methods + nvinfer1::DataType getOutputDataType( + int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept override; + + // IPluginV2 Methods + char const* getPluginType() const noexcept override; + char const* getPluginVersion() const noexcept override; + int32_t getNbOutputs() const noexcept override; + int32_t initialize() noexcept override; + void terminate() noexcept override; + size_t getSerializationSize() const noexcept override; + void serialize(void* buffer) const noexcept override; + void destroy() noexcept override; + void setPluginNamespace(char const* pluginNamespace) noexcept override; + char const* getPluginNamespace() const noexcept override; + +private: + std::string mNamespace; + +#if NV_TENSORRT_MAJOR < 8 + using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch; + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::enqueue; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch; + using nvinfer1::IPluginV2DynamicExt::supportsFormat; +#endif +}; + +// Legacy creator class +class MultiscaleDeformableAttnPluginCreatorLegacy : public nvinfer1::IPluginCreator +{ +public: + // Constructor + MultiscaleDeformableAttnPluginCreatorLegacy(); + + // IPluginCreator methods + char const* getPluginName() const noexcept override; + char const* getPluginVersion() const noexcept override; + nvinfer1::PluginFieldCollection const* getFieldNames() noexcept override; + nvinfer1::IPluginV2* createPlugin(char const* name, nvinfer1::PluginFieldCollection const* fc) noexcept override; + nvinfer1::IPluginV2* deserializePlugin( + char const* name, void const* serialData, size_t serialLength) noexcept override; + void setPluginNamespace(char const* pluginNamespace) noexcept override; + char const* getPluginNamespace() const noexcept override; + +private: + nvinfer1::PluginFieldCollection mFC; + std::vector mPluginAttributes; + std::string mNamespace; +}; + +} // namespace plugin +} // namespace nvinfer1 + +#endif // TRT_MULTISCALE_DEFORMABLE_ATTN_PLUGIN_LEGACY_H diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 80360ede0..86b7cccd9 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -33,8 +33,8 @@ set(TRT_BUILD_PYTHON_PY_VERSIONS 3.8 3.9 3.10 3.11 3.12 3.13 CACHE STRING "The l set(TRT_PYTHON_MODULE_NAMES "tensorrt" "tensorrt_lean" - "tensorrt_dispatch" ) + list(APPEND TRT_PYTHON_MODULE_NAMES "tensorrt_dispatch") if (${TRT_BUILD_ENABLE_NEW_PYTHON_FLOW}) @@ -91,7 +91,7 @@ function(createBindingLibrary moduleName pyVersion) # Create an indirect refernce to the add_${libName}_source function which can be called by the subdirectories. # This allows each subdir to add files to the individual targets with unique binary dirs on each call. set(ADD_SOURCES_FUNCTION add_${libName}_source) - set(SUBDIR_BINARY_DIR_PREFIX ${libName}) + set(SUBDIR_BINARY_DIR_PREFIX subbuild/${libName}) add_subdirectory(src ${SUBDIR_BINARY_DIR_PREFIX}/src) target_link_libraries(${libName} PRIVATE @@ -235,6 +235,8 @@ function(processWheelTemplates moduleName pyVersion) --trt-py-version ${TensorRT_VERSION} --cuda-version ${TRT_CUDA_VERSION} --trt-version ${TensorRT_VERSION} + --trt-nvinfer-name ${TRT_NVINFER_NAME} + --trt-onnxparser-name ${TRT_ONNXPARSER_NAME} DEPENDS scripts/process_wheel_template.py ${CMAKE_CURRENT_LIST_DIR}/packaging/bindings_wheel/tensorrt/${filePath} @@ -438,21 +440,22 @@ else() endif() if(MSVC) - set(nvinfer_lib_name "nvinfer_${TENSORRT_MAJOR_VERSION}") + set(nvinfer_lib_name "${TRT_NVINFER_NAME}_${TENSORRT_MAJOR_VERSION}${TRT_LIB_SUFFIX}") set(nvinfer_plugin_lib_name "nvinfer_plugin_${TENSORRT_MAJOR_VERSION}") - set(nvonnxparser_lib_name "nvonnxparser_${TENSORRT_MAJOR_VERSION}") + set(nvonnxparser_lib_name "${TRT_ONNXPARSER_NAME}_${TENSORRT_MAJOR_VERSION}${TRT_LIB_SUFFIX}") set(nvinfer_lean_lib_name "nvinfer_lean_${TENSORRT_MAJOR_VERSION}${vfc_suffix}") set(nvinfer_dispatch_lib_name "nvinfer_dispatch_${TENSORRT_MAJOR_VERSION}${vfc_suffix}") else() - set(nvinfer_lib_name "nvinfer") + set(nvinfer_lib_name "${TRT_NVINFER_NAME}") set(nvinfer_plugin_lib_name "nvinfer_plugin") - set(nvonnxparser_lib_name "nvonnxparser") + set(nvonnxparser_lib_name "${TRT_ONNXPARSER_NAME}") set(nvinfer_lean_lib_name "nvinfer_lean${vfc_suffix}") set(nvinfer_dispatch_lib_name "nvinfer_dispatch${vfc_suffix}") endif() if(${TENSORRT_MODULE} STREQUAL "tensorrt") - set(TRT_LIBS ${nvinfer_lib_name} ${nvonnxparser_lib_name} ${nvinfer_plugin_lib_name}) + set(TRT_LIBS ${nvinfer_lib_name} ${nvonnxparser_lib_name}) + list(APPEND TRT_LIBS ${nvinfer_plugin_lib_name}) elseif(${TENSORRT_MODULE} STREQUAL "tensorrt_lean") set(TRT_LIBS ${nvinfer_lean_lib_name}) elseif(${TENSORRT_MODULE} STREQUAL "tensorrt_dispatch") diff --git a/python/build.sh b/python/build.sh index 1bba6deb3..44d193342 100755 --- a/python/build.sh +++ b/python/build.sh @@ -36,14 +36,15 @@ cmake .. -DCMAKE_BUILD_TYPE=Release \ -DCUDA_INCLUDE_DIRS=${CUDA_ROOT}/include \ -DTENSORRT_ROOT=${ROOT_PATH} \ -DTENSORRT_MODULE=${TENSORRT_MODULE} \ - -DTENSORRT_LIBPATH=${TRT_LIBPATH} + -DTENSORRT_LIBPATH=${TRT_LIBPATH} \ + -DTRT_ONNXPARSER_NAME=nvonnxparser make -j12 # Generate wheel -TRT_MAJOR=$(awk '/^\#define NV_TENSORRT_MAJOR/ {print $3}' ${ROOT_PATH}/include/NvInferVersion.h) -TRT_MINOR=$(awk '/^\#define NV_TENSORRT_MINOR/ {print $3}' ${ROOT_PATH}/include/NvInferVersion.h) -TRT_PATCH=$(awk '/^\#define NV_TENSORRT_PATCH/ {print $3}' ${ROOT_PATH}/include/NvInferVersion.h) -TRT_BUILD=$(awk '/^\#define NV_TENSORRT_BUILD/ {print $3}' ${ROOT_PATH}/include/NvInferVersion.h) +TRT_MAJOR=$(awk '/^\#define TRT_MAJOR_ENTERPRISE/ {print $3}' ${ROOT_PATH}/include/NvInferVersion.h) +TRT_MINOR=$(awk '/^\#define TRT_MINOR_ENTERPRISE/ {print $3}' ${ROOT_PATH}/include/NvInferVersion.h) +TRT_PATCH=$(awk '/^\#define TRT_PATCH_ENTERPRISE/ {print $3}' ${ROOT_PATH}/include/NvInferVersion.h) +TRT_BUILD=$(awk '/^\#define TRT_BUILD_ENTERPRISE/ {print $3}' ${ROOT_PATH}/include/NvInferVersion.h) TRT_VERSION=${TRT_MAJOR}.${TRT_MINOR}.${TRT_PATCH}.${TRT_BUILD} TRT_MAJMINPATCH=${TRT_MAJOR}.${TRT_MINOR}.${TRT_PATCH} @@ -55,6 +56,7 @@ expand_vars_cp () { -e "s|\#\#TENSORRT_MAJMINPATCH\#\#|${TRT_MAJMINPATCH}|g" \ -e "s|\#\#TENSORRT_PYTHON_VERSION\#\#|${TRT_MAJMINPATCH}|g" \ -e "s|\#\#TENSORRT_MODULE\#\#|${TENSORRT_MODULE}|g" \ + -e "s/##TENSORRT_PLUGIN_DISABLED##/false/g" \ ${1} > ${2} } diff --git a/python/docstrings/infer/pyCoreDoc.h b/python/docstrings/infer/pyCoreDoc.h index 7dbc2daee..60d08e4f5 100644 --- a/python/docstrings/infer/pyCoreDoc.h +++ b/python/docstrings/infer/pyCoreDoc.h @@ -214,7 +214,7 @@ constexpr char const* set_shape_input = R"trtdoc( Set the minimum/optimum/maximum values for a shape input tensor. This function must be called for every input tensor ``t`` that is a shape tensor (``t.is_shape`` == ``True``). - This implies that the datatype of ``t`` is ``int32``, the rank is either 0 or 1, and the dimensions of ``t`` + This implies that the datatype of ``t`` is ``int64`` or ``int32``, the rank is either 0 or 1, and the dimensions of ``t`` are fixed at network definition time. This function must NOT be called for any input tensor that is not a shape tensor. @@ -610,6 +610,12 @@ constexpr char const* set_all_tensors_debug_state = R"trtdoc( :arg flag: True if turning on debug state of tensor. False if turning off. )trtdoc"; + +constexpr char const* get_runtime_config = R"trtdoc( + Get the runtime configuration. From the execution context. + + :returns: The runtime configuration. +)trtdoc"; } // namespace IExecutionContextDoc namespace IDebugListenerDoc @@ -697,6 +703,27 @@ constexpr char const* phase_finish = R"trtdoc( )trtdoc"; } // namespace IProgressMonitorDoc +namespace IRuntimeConfigDoc +{ +constexpr char const* descr = R"trtdoc( + A runtime configuration for an :class:`ICudaEngine` . +)trtdoc"; + +constexpr char const* set_execution_context_allocation_strategy = R"trtdoc( + Set the execution context allocation strategy. + + :arg strategy: The execution context allocation strategy. +)trtdoc"; + +constexpr char const* get_execution_context_allocation_strategy = R"trtdoc( + Get the execution context allocation strategy. + + :returns: The execution context allocation strategy. +)trtdoc"; + +} // namespace IRuntimeConfigDoc + + namespace ICudaEngineDoc { constexpr char const* descr = R"trtdoc( @@ -747,6 +774,19 @@ constexpr char const* create_execution_context_without_device_memory = R"trtdoc( :returns: An :class:`IExecutionContext` without device memory allocated. )trtdoc"; +constexpr char const* create_execution_context_with_runtime_config = R"trtdoc( + Create an :class:`IExecutionContext` with a runtime configuration. + + :arg runtime_config: The runtime configuration. + :returns: The newly created :class:`IExecutionContext` . +)trtdoc"; + +constexpr char const* create_runtime_config = R"trtdoc( + Create a runtime configuration. + + :returns: The newly created :class:`IRuntimeConfig` . +)trtdoc"; + constexpr char const* get_tensor_profile_values = R"trtdoc( Get minimum/optimum/maximum values for an input shape binding under an optimization profile. If the specified binding is not an input shape binding, an exception is raised. @@ -1722,6 +1762,7 @@ constexpr char const* ON_PROFILE_CHANGE = R"trtdoc(Reallocate for a profile when constexpr char const* USER_MANAGED = R"trtdoc(The user supplies custom allocation to the execution context.)trtdoc"; } // namespace ExecutionContextAllocationStrategyDoc + namespace BuilderDoc { constexpr char const* descr = R"trtdoc( diff --git a/python/docstrings/infer/pyGraphDoc.h b/python/docstrings/infer/pyGraphDoc.h index a2ac75bce..b228ab40c 100644 --- a/python/docstrings/infer/pyGraphDoc.h +++ b/python/docstrings/infer/pyGraphDoc.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -2000,7 +2000,7 @@ constexpr const char* add_input = R"trtdoc( :arg name: The name of the tensor. Each input and output tensor must have a unique name. :arg dtype: The data type of the tensor. - :arg shape: The dimensions of the tensor. The total volume must be less than 2^31 elements. + :arg shape: The dimensions of the tensor. :returns: The newly added Tensor. )trtdoc"; diff --git a/python/docstrings/infer/pyPluginDoc.h b/python/docstrings/infer/pyPluginDoc.h index 40effa7a9..de2643ae0 100644 --- a/python/docstrings/infer/pyPluginDoc.h +++ b/python/docstrings/infer/pyPluginDoc.h @@ -381,12 +381,6 @@ constexpr const char* ipluginv3_descr = R"trtdoc( Every attribute must be explicitly initialized on Python-based plugins. These attributes will be read-only when accessed through a C++-based plugin. - :ivar num_outputs: :class:`int` The number of outputs from the plugin. This is used by the implementations of :class:`INetworkDefinition` and :class:`Builder`. In particular, it is called prior to any call to :func:`initialize`. - :ivar tensorrt_version: :class:`int` [READ ONLY] The API version with which this plugin was built. - :ivar plugin_name: :class:`str` The plugin name. Should match the plugin name returned by the corresponding plugin creator. - :ivar plugin_version: :class:`str` The plugin version. Should match the plugin version returned by the corresponding plugin creator. - :ivar plugin_namespace: :class:`str` The namespace that this plugin object belongs to. Ideally, all plugin objects from the same plugin library should have the same namespace. - :ivar serialization_size: :class:`int` [READ ONLY] The size of the serialization buffer required. )trtdoc"; constexpr const char* iplugincapability_descr = R"trtdoc( @@ -844,8 +838,7 @@ constexpr const char* descr = R"trtdoc( Contains plugin attribute field names and associated data. This information can be parsed to decode necessary plugin metadata - :ivar num_fields: :class:`int` Number of :class:`PluginField` entries. - :ivar fields: :class:`list` PluginField entries. + The collection behaves like a Python iterable. )trtdoc"; } // namespace PluginFieldCollectionDoc @@ -861,7 +854,6 @@ namespace IPluginCreatorDoc constexpr const char* descr = R"trtdoc( Plugin creator class for user implemented layers - :ivar tensorrt_version: :class:`int` Number of :class:`PluginField` entries. :ivar name: :class:`str` Plugin name. :ivar plugin_version: :class:`str` Plugin version. :ivar field_names: :class:`list` List of fields that needs to be passed to :func:`create_plugin` . @@ -911,7 +903,6 @@ namespace IPluginCreatorV3OneDoc constexpr const char* descr = R"trtdoc( Plugin creator class for user implemented layers - :ivar tensorrt_version: :class:`int` Number of :class:`PluginField` entries. :ivar name: :class:`str` Plugin name. :ivar plugin_version: :class:`str` Plugin version. :ivar field_names: :class:`list` List of fields that needs to be passed to :func:`create_plugin` . diff --git a/python/docstrings/parsers/pyOnnxDoc.h b/python/docstrings/parsers/pyOnnxDoc.h index 5493b942e..031bd0b39 100644 --- a/python/docstrings/parsers/pyOnnxDoc.h +++ b/python/docstrings/parsers/pyOnnxDoc.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -225,6 +225,11 @@ constexpr const char* NATIVE_INSTANCENORM = R"trtdoc( This flag is required when building version-compatible or hardware-compatible engines. The flag is ON by default. )trtdoc"; +constexpr const char* ENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA = R"trtdoc( + Enable UINT8 as a quantization data type and asymmetric quantization with non-zero zero-point values in Quantize and Dequantize nodes. + This flag is set to be OFF by default. + The resulting engine must be built targeting DLA version >= 3.16. + )trtdoc"; } // namespace OnnxParserFlagDoc namespace ParserErrorDoc diff --git a/python/include/impl/NvInferPythonPlugin.h b/python/include/impl/NvInferPythonPlugin.h index 752ec54ca..d703ba52c 100644 --- a/python/include/impl/NvInferPythonPlugin.h +++ b/python/include/impl/NvInferPythonPlugin.h @@ -51,7 +51,6 @@ enum class PluginArgDataType : int32_t //! 32-bit signed integer kINT32 = 2, }; - //! \class ISymExpr //! \brief Generic interface for a scalar symbolic expression implementable by a Python plugin / TensorRT Python backend class ISymExpr @@ -116,6 +115,7 @@ class ISymExprs virtual ~ISymExprs() noexcept = default; }; + //! \enum QuickPluginCreationRequest //! \brief Communicates preference when a quickly deployable plugin is to be added to the network enum class QuickPluginCreationRequest : int32_t diff --git a/python/packaging/bindings_wheel/tensorrt/__init__.py b/python/packaging/bindings_wheel/tensorrt/__init__.py index 31e0dc164..4d9a9735b 100644 --- a/python/packaging/bindings_wheel/tensorrt/__init__.py +++ b/python/packaging/bindings_wheel/tensorrt/__init__.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -29,6 +29,9 @@ else: _libs_wheel_imported = True +_trt_lib_suffix = "" +if "##TENSORRT_NVINFER_NAME##".strip() == "tensorrt_rtx": + _trt_lib_suffix = "_##TENSORRT_MINOR##" if not _libs_wheel_imported and sys.platform.startswith("win"): log_found_dlls = bool(int(os.environ.get("TRT_LOG_FOUND_DLLS", 0))) @@ -48,6 +51,9 @@ def find_lib(name): print(f"Found {name} in path: {libpath}") return libpath + if ##TENSORRT_PLUGIN_DISABLED## and name.startswith("nvinfer_plugin"): + return None + if name.startswith("nvinfer_builder_resource"): return None @@ -58,9 +64,9 @@ def find_lib(name): # Order matters here because of dependencies LIBRARIES = { "tensorrt": [ - "nvinfer_##TENSORRT_MAJOR##.dll", + f"##TENSORRT_NVINFER_NAME##_##TENSORRT_MAJOR##{_trt_lib_suffix}.dll", "nvinfer_plugin_##TENSORRT_MAJOR##.dll", - "nvonnxparser_##TENSORRT_MAJOR##.dll", + f"##TENSORRT_ONNXPARSER_NAME##_##TENSORRT_MAJOR##{_trt_lib_suffix}.dll", "nvinfer_builder_resource_##TENSORRT_MAJOR##.dll", ], "tensorrt_dispatch": [ @@ -79,6 +85,7 @@ def find_lib(name): ctypes.CDLL(lib_path) del _libs_wheel_imported +del _trt_lib_suffix from .##TENSORRT_MODULE## import * diff --git a/python/scripts/process_wheel_template.py b/python/scripts/process_wheel_template.py index ae24d790b..5cf4fc93a 100644 --- a/python/scripts/process_wheel_template.py +++ b/python/scripts/process_wheel_template.py @@ -37,7 +37,9 @@ def main(): parser.add_argument("--trt-py-version", help="The version string for the python bindings being built. Usually `major.minor.patch.build`.", required=True) parser.add_argument("--cuda-version", help="The Cuda version (major.minor).", required=True) parser.add_argument("--trt-version", help="The TensorRT version (major.minor.patch).", required=True) - + parser.add_argument("--plugin-disabled", help="Whether the plugin is disabled.", type=int, choices=[0,1], default=0, required=False) + parser.add_argument("--trt-nvinfer-name", help="The name of the nvinfer library.", required=True) + parser.add_argument("--trt-onnxparser-name", help="The name of the onnxparser library.", required=True) args, _ = parser.parse_known_args() if not os.path.isdir(args.src_dir): @@ -57,6 +59,10 @@ def main(): contents = contents.replace("##TENSORRT_PYTHON_VERSION##", args.trt_py_version) contents = contents.replace("##CUDA_MAJOR##", args.cuda_version.split(".")[0]) contents = contents.replace("##TENSORRT_MAJOR##", args.trt_version.split(".")[0]) + contents = contents.replace("##TENSORRT_MINOR##", args.trt_version.split(".")[1]) + contents = contents.replace("##TENSORRT_PLUGIN_DISABLED##", "True" if args.plugin_disabled is 1 else "False") + contents = contents.replace("##TENSORRT_NVINFER_NAME##", args.trt_nvinfer_name) + contents = contents.replace("##TENSORRT_ONNXPARSER_NAME##", args.trt_onnxparser_name) dest_path = os.path.join(args.dst_dir, args.filepath) os.makedirs(os.path.dirname(dest_path), exist_ok=True) diff --git a/python/src/infer/pyCore.cpp b/python/src/infer/pyCore.cpp index 16074edc4..d3c23a7be 100644 --- a/python/src/infer/pyCore.cpp +++ b/python/src/infer/pyCore.cpp @@ -57,29 +57,29 @@ static const auto opt_profile_get_shape }; static const auto opt_profile_set_shape_input = [](IOptimizationProfile& self, std::string const& inputName, - std::vector const& min, std::vector const& opt, - std::vector const& max) { - PY_ASSERT_RUNTIME_ERROR(self.setShapeValues(inputName.c_str(), OptProfileSelector::kMIN, min.data(), min.size()), + std::vector const& min, std::vector const& opt, + std::vector const& max) { + PY_ASSERT_RUNTIME_ERROR(self.setShapeValuesV2(inputName.c_str(), OptProfileSelector::kMIN, min.data(), min.size()), "min input provided for shape tensor is inconsistent with other inputs."); - PY_ASSERT_RUNTIME_ERROR(self.setShapeValues(inputName.c_str(), OptProfileSelector::kOPT, opt.data(), opt.size()), + PY_ASSERT_RUNTIME_ERROR(self.setShapeValuesV2(inputName.c_str(), OptProfileSelector::kOPT, opt.data(), opt.size()), "opt input provided for shape tensor is inconsistent with other inputs."); - PY_ASSERT_RUNTIME_ERROR(self.setShapeValues(inputName.c_str(), OptProfileSelector::kMAX, max.data(), max.size()), + PY_ASSERT_RUNTIME_ERROR(self.setShapeValuesV2(inputName.c_str(), OptProfileSelector::kMAX, max.data(), max.size()), "max input provided for shape tensor is inconsistent with other inputs."); }; static const auto opt_profile_get_shape_input - = [](IOptimizationProfile& self, std::string const& inputName) -> std::vector> { - std::vector> shapes{}; + = [](IOptimizationProfile& self, std::string const& inputName) -> std::vector> { + std::vector> shapes{}; int32_t const shapeSize = self.getNbShapeValues(inputName.c_str()); - int32_t const* shapePtr = self.getShapeValues(inputName.c_str(), OptProfileSelector::kMIN); + int64_t const* shapePtr = self.getShapeValuesV2(inputName.c_str(), OptProfileSelector::kMIN); // In the Python bindings, it is impossible to set only one shape in an optimization profile. if (shapePtr && shapeSize >= 0) { shapes.emplace_back(shapePtr, shapePtr + shapeSize); - shapePtr = self.getShapeValues(inputName.c_str(), OptProfileSelector::kOPT); + shapePtr = self.getShapeValuesV2(inputName.c_str(), OptProfileSelector::kOPT); PY_ASSERT_RUNTIME_ERROR(shapePtr != nullptr, "Invalid shape for OPT."); shapes.emplace_back(shapePtr, shapePtr + shapeSize); - shapePtr = self.getShapeValues(inputName.c_str(), OptProfileSelector::kMAX); + shapePtr = self.getShapeValuesV2(inputName.c_str(), OptProfileSelector::kMAX); PY_ASSERT_RUNTIME_ERROR(shapePtr != nullptr, "Invalid shape for MAX."); shapes.emplace_back(shapePtr, shapePtr + shapeSize); } @@ -189,7 +189,7 @@ std::vector get_tensor_profile_shape(ICudaEngine& self, std::string const& return shapes; }; -std::vector> get_tensor_profile_values( +std::vector> get_tensor_profile_values( ICudaEngine& self, int32_t profileIndex, std::string const& tensorName) { char const* const name = tensorName.c_str(); @@ -199,16 +199,16 @@ std::vector> get_tensor_profile_values( PY_ASSERT_RUNTIME_ERROR(shape.nbDims >= 0, "Missing shape for input shape tensor"); auto const shapeSize{utils::volume(shape)}; PY_ASSERT_RUNTIME_ERROR(shapeSize >= 0, "Negative volume for input shape tensor"); - std::vector> shapes{}; + std::vector> shapes{}; // In the Python bindings, it is impossible to set only one shape in an optimization profile. - int32_t const* shapePtr{self.getProfileTensorValues(name, profileIndex, OptProfileSelector::kMIN)}; + int64_t const* shapePtr{self.getProfileTensorValuesV2(name, profileIndex, OptProfileSelector::kMIN)}; if (shapePtr) { shapes.emplace_back(shapePtr, shapePtr + shapeSize); - shapePtr = self.getProfileTensorValues(name, profileIndex, OptProfileSelector::kOPT); + shapePtr = self.getProfileTensorValuesV2(name, profileIndex, OptProfileSelector::kOPT); shapes.emplace_back(shapePtr, shapePtr + shapeSize); - shapePtr = self.getProfileTensorValues(name, profileIndex, OptProfileSelector::kMAX); + shapePtr = self.getProfileTensorValuesV2(name, profileIndex, OptProfileSelector::kMAX); shapes.emplace_back(shapePtr, shapePtr + shapeSize); } return shapes; @@ -1326,7 +1326,7 @@ void bindCore(py::module& m) .def("get_debug_state", &IExecutionContext::getDebugState, "name"_a, IExecutionContextDoc::get_debug_state) .def("set_all_tensors_debug_state", &IExecutionContext::setAllTensorsDebugState, "flag"_a, IExecutionContextDoc::set_all_tensors_debug_state) - ; + .def("get_runtime_config", &IExecutionContext::getRuntimeConfig, IExecutionContextDoc::get_runtime_config); py::enum_(m, "ExecutionContextAllocationStrategy", py::arithmetic{}, ExecutionContextAllocationStrategyDoc::descr, py::module_local()) @@ -1336,6 +1336,7 @@ void bindCore(py::module& m) .value("USER_MANAGED", ExecutionContextAllocationStrategy::kUSER_MANAGED, ExecutionContextAllocationStrategyDoc::USER_MANAGED); + py::enum_( m, "SerializationFlag", py::arithmetic{}, SerializationFlagDoc::descr, py::module_local()) .value("EXCLUDE_WEIGHTS", SerializationFlag::kEXCLUDE_WEIGHTS, SerializationFlagDoc::EXCLUDE_WEIGHTS) @@ -1380,6 +1381,16 @@ void bindCore(py::module& m) .value("INPUT", TensorIOMode::kINPUT, TensorIOModeDoc::INPUT) .value("OUTPUT", TensorIOMode::kOUTPUT, TensorIOModeDoc::OUTPUT); + py::class_(m, "IRuntimeConfig", IRuntimeConfigDoc::descr, py::module_local()) + .def("set_execution_context_allocation_strategy", &IRuntimeConfig::setExecutionContextAllocationStrategy, + IRuntimeConfigDoc::set_execution_context_allocation_strategy, + py::arg("strategy") = ExecutionContextAllocationStrategy::kSTATIC, py::keep_alive<0, 1>{}, + py::call_guard{}) + .def("get_execution_context_allocation_strategy", &IRuntimeConfig::getExecutionContextAllocationStrategy, + IRuntimeConfigDoc::get_execution_context_allocation_strategy, py::keep_alive<0, 1>{}, + py::call_guard{}); + + py::class_(m, "ICudaEngine", ICudaEngineDoc::descr, py::module_local()) .def("__getitem__", lambdas::engine_getitem) .def_property_readonly("has_implicit_batch_dimension", @@ -1391,9 +1402,10 @@ void bindCore(py::module& m) ICudaEngineDoc::create_serialization_config, py::keep_alive<0, 1>{}) .def("serialize_with_config", &ICudaEngine::serializeWithConfig, ICudaEngineDoc::serialize_with_config, py::call_guard{}) - .def("create_execution_context", &ICudaEngine::createExecutionContext, ICudaEngineDoc::create_execution_context, - py::arg("strategy") = ExecutionContextAllocationStrategy::kSTATIC, py::keep_alive<0, 1>{}, - py::call_guard{}) + .def("create_execution_context", + py::overload_cast(&ICudaEngine::createExecutionContext), + ICudaEngineDoc::create_execution_context, py::arg("strategy") = ExecutionContextAllocationStrategy::kSTATIC, + py::keep_alive<0, 1>{}, py::call_guard{}) .def("create_execution_context_without_device_memory", utils::deprecateMember(&ICudaEngine::createExecutionContextWithoutDeviceMemory, "create_execution_context"), ICudaEngineDoc::create_execution_context_without_device_memory, py::keep_alive<0, 1>{}, @@ -1522,6 +1534,11 @@ void bindCore(py::module& m) "weight_streaming_scratch_memory_size", &ICudaEngine::getWeightStreamingScratchMemorySize) // End weight streaming APIs .def("is_debug_tensor", &ICudaEngine::isDebugTensor, "name"_a, ICudaEngineDoc::is_debug_tensor) + .def("create_execution_context", py::overload_cast(&ICudaEngine::createExecutionContext), + ICudaEngineDoc::create_execution_context, py::arg("runtime_config") = nullptr, py::keep_alive<0, 1>{}, + py::call_guard{}) + .def("create_runtime_config", &ICudaEngine::createRuntimeConfig, ICudaEngineDoc::create_runtime_config, + py::keep_alive<0, 1>{}, py::call_guard{}) .def("__del__", &utils::doNothingDel); diff --git a/python/src/infer/pyPlugin.cpp b/python/src/infer/pyPlugin.cpp index 7393af306..43058dd51 100644 --- a/python/src/infer/pyPlugin.cpp +++ b/python/src/infer/pyPlugin.cpp @@ -3452,6 +3452,7 @@ int32_t getAliasedInput(int32_t outputIndex) } // namespace pluginDoc + void bindPlugin(py::module& m) { py::class_>( @@ -3923,7 +3924,6 @@ void bindPlugin(py::module& m) .def("__len__", &ISymExprs::getNbSymExprs) .def("__getitem__", &ISymExprs::getSymExpr) .def("__setitem__", &ISymExprs::setSymExpr); - #if EXPORT_ALL_BINDINGS m.def("get_builder_plugin_registry", &getBuilderPluginRegistry, py::return_value_policy::reference, FreeFunctionsDoc::get_builder_plugin_registry); diff --git a/python/src/parsers/pyOnnx.cpp b/python/src/parsers/pyOnnx.cpp index d4d44a4e7..4cb802f91 100644 --- a/python/src/parsers/pyOnnx.cpp +++ b/python/src/parsers/pyOnnx.cpp @@ -152,7 +152,10 @@ void bindOnnx(py::module& m) .def("__del__", &utils::doNothingDel); py::enum_(m, "OnnxParserFlag", OnnxParserFlagDoc::descr, py::module_local()) - .value("NATIVE_INSTANCENORM", OnnxParserFlag::kNATIVE_INSTANCENORM, OnnxParserFlagDoc::NATIVE_INSTANCENORM); + .value("NATIVE_INSTANCENORM", OnnxParserFlag::kNATIVE_INSTANCENORM, OnnxParserFlagDoc::NATIVE_INSTANCENORM) + .value("ENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA", + OnnxParserFlag::kENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA, + OnnxParserFlagDoc::ENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA); py::enum_(m, "ErrorCode", ErrorCodeDoc::descr, py::module_local()) .value("SUCCESS", ErrorCode::kSUCCESS) diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 5f97e4462..22d6b6313 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -15,6 +15,41 @@ # limitations under the License. # +if (${TRT_BUILD_ENABLE_NEW_SAMPLES_FLOW}) + +# This target will build all of the samples. +add_custom_target(tensorrt_samples) + +# Build the common submodule, which creates the trt_samples_common target. +add_subdirectory(common) + +# Public (OSS) Samples +set(SAMPLE_FOLDERS + sampleCharRNN + sampleDynamicReshape + sampleEditableTimingCache + sampleINT8API + sampleIOFormats + sampleNamedDimensions + sampleNonZeroPlugin + sampleOnnxMNIST + sampleProgressMonitor + trtexec +) + +# This sample needs to link against nvinfer_plugin. +if(${TRT_BUILD_PLUGINS}) + list(APPEND SAMPLE_FOLDERS sampleOnnxMnistCoordConvAC) +endif() + +include(workaround.cmake) + +foreach(FOLDER IN LISTS SAMPLE_FOLDERS) + add_subdirectory(${FOLDER}) +endforeach() + +else() + add_custom_target(samples) set(OPENSOURCE_SAMPLES_LIST @@ -33,3 +68,5 @@ set(OPENSOURCE_SAMPLES_LIST foreach(SAMPLE_ITER ${OPENSOURCE_SAMPLES_LIST}) add_subdirectory(${SAMPLE_ITER}) endforeach(SAMPLE_ITER) + +endif() diff --git a/samples/common/CMakeLists.txt b/samples/common/CMakeLists.txt new file mode 100644 index 000000000..7765384de --- /dev/null +++ b/samples/common/CMakeLists.txt @@ -0,0 +1,80 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: LicenseRef-NvidiaProprietary +# +# NVIDIA CORPORATION, its affiliates and licensors retain all intellectual +# property and proprietary rights in and to this material, related +# documentation and any modifications thereto. Any use, reproduction, +# disclosure or distribution of this material and related documentation +# without an express license agreement from NVIDIA CORPORATION or +# its affiliates is strictly prohibited. + +add_library(trt_samples_common STATIC) + +target_sources(trt_samples_common PRIVATE + argsParser.h + BatchStream.h + bfloat16.cpp + bfloat16.h + buffers.h + common.h + EntropyCalibrator.h + ErrorRecorder.h + getOptions.cpp + getOptions.h + getoptWin.h + half.h + logger.cpp + logger.h + logging.h + parserOnnxConfig.h + safeCommon.h + safeErrorRecorder.h + sampleConfig.h + sampleDevice.cpp + sampleDevice.h + sampleEngines.cpp + sampleEngines.h + sampleEntrypoints.h + sampleInference.cpp + sampleInference.h + sampleOptions.cpp + sampleOptions.h + sampleReporting.cpp + sampleReporting.h + sampleUtils.cpp + sampleUtils.h + streamReader.h +) + +if (MSVC) + enable_language(C) + target_sources(trt_samples_common PRIVATE + getOpt.c + ) +endif() + +if(${TRT_BUILD_SAMPLES_LINK_STATIC_TRT}) + target_link_libraries(trt_samples_common PUBLIC + tensorrt_static + nvonnxparser_static + ) +else() + target_link_libraries(trt_samples_common PUBLIC + tensorrt + nvonnxparser + ) +endif() + +target_link_libraries(trt_samples_common PUBLIC + trt_global_definitions + CUDA::cudart_static + trt_shared +) + +target_include_directories(trt_samples_common PUBLIC + ${CMAKE_CURRENT_LIST_DIR} +) + +if(${TRT_BUILD_ENABLE_DLA}) + target_link_libraries(trt_samples_common PUBLIC NVDLA::compiler) +endif() diff --git a/samples/common/common.h b/samples/common/common.h index 035f1000b..6198f15c9 100644 --- a/samples/common/common.h +++ b/samples/common/common.h @@ -18,9 +18,7 @@ #ifndef TENSORRT_COMMON_H #define TENSORRT_COMMON_H #include "NvInfer.h" -#if !TRT_WINML #include "NvInferPlugin.h" -#endif #include "logger.h" #include "sampleEntrypoints.h" #include "utils/timingCache.h" @@ -1046,7 +1044,7 @@ inline int32_t getMaxPersistentCacheSize() CHECK(cudaGetDevice(&deviceIndex)); int32_t maxPersistentL2CacheSize{}; -#if CUDART_VERSION >= 11030 && !TRT_WINML +#if CUDART_VERSION >= 11030 CHECK(cudaDeviceGetAttribute(&maxPersistentL2CacheSize, cudaDevAttrMaxPersistingL2CacheSize, deviceIndex)); #endif diff --git a/samples/common/sampleDevice.cpp b/samples/common/sampleDevice.cpp index 2145dcd05..1b5527835 100644 --- a/samples/common/sampleDevice.cpp +++ b/samples/common/sampleDevice.cpp @@ -105,7 +105,6 @@ void setCudaDevice(int32_t device, std::ostream& os) // clang-format on } -#if !TRT_WINML int32_t getCudaDriverVersion() { int32_t version{-1}; @@ -119,6 +118,5 @@ int32_t getCudaRuntimeVersion() CHECK(cudaRuntimeGetVersion(&version)); return version; } -#endif } // namespace sample diff --git a/samples/common/sampleDevice.h b/samples/common/sampleDevice.h index 62f43b607..6a5000bdc 100644 --- a/samples/common/sampleDevice.h +++ b/samples/common/sampleDevice.h @@ -579,14 +579,12 @@ class OutputAllocator : public nvinfer1::IOutputAllocator //! Set the GPU to run the inference on. void setCudaDevice(int32_t device, std::ostream& os); -#if !TRT_WINML //! Get the CUDA version of the current CUDA driver. int32_t getCudaDriverVersion(); //! Get the CUDA version of the current CUDA runtime. int32_t getCudaRuntimeVersion(); -#endif } // namespace sample diff --git a/samples/common/sampleEngines.cpp b/samples/common/sampleEngines.cpp index 1c647f65b..7abe5fbbf 100644 --- a/samples/common/sampleEngines.cpp +++ b/samples/common/sampleEngines.cpp @@ -118,12 +118,10 @@ nvinfer1::ICudaEngine* LazilyDeserializedEngine::get() mRuntime->setDLACore(mDLACore); } mRuntime->setErrorRecorder(&gRecorder); -#if !TRT_WINML for (auto const& pluginPath : mDynamicPlugins) { mRuntime->getPluginRegistry().loadLibrary(pluginPath.c_str()); } -#endif if (getFileReader().isOpen()) { @@ -212,20 +210,21 @@ Parser modelToNetwork(ModelOptions const& model, BuildOptions const& build, nvin using namespace nvonnxparser; parser.onnxParser.reset(createONNXParser(network)); ASSERT(parser.onnxParser != nullptr); -#if !TRT_WINML // kNATIVE_INSTANCENORM is ON by default in the parser and must be cleared to use the plugin implementation. if (build.pluginInstanceNorm) { parser.onnxParser->clearFlag(OnnxParserFlag::kNATIVE_INSTANCENORM); } -#endif + if (build.enableUInt8AsymmetricQuantizationDLA) + { + parser.onnxParser->setFlag(OnnxParserFlag::kENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA); + } if (!parser.onnxParser->parseFromFile( model.baseModel.model.c_str(), static_cast(sample::gLogger.getReportableSeverity()))) { err << "Failed to parse onnx file" << std::endl; parser.onnxParser.reset(); } -#if !TRT_WINML if (vcPluginLibrariesUsed && parser.onnxParser.get()) { int64_t nbPluginLibs; @@ -245,7 +244,6 @@ Parser modelToNetwork(ModelOptions const& model, BuildOptions const& build, nvin << std::endl; } } -#endif break; } case ModelFormat::kANY: break; @@ -908,7 +906,6 @@ bool setupNetworkAndConfig(BuildOptions const& build, SystemOptions const& sys, { config.setFlag(BuilderFlag::kVERSION_COMPATIBLE); } -#if !TRT_WINML std::vector pluginPaths; for (auto const& pluginPath : sys.setPluginsToSerialize) { @@ -919,7 +916,6 @@ bool setupNetworkAndConfig(BuildOptions const& build, SystemOptions const& sys, { config.setPluginsToSerialize(pluginPaths.data(), pluginPaths.size()); } -#endif if (build.excludeLeanRuntime) { config.setFlag(BuilderFlag::kEXCLUDE_LEAN_RUNTIME); @@ -1206,11 +1202,9 @@ bool networkToSerializedEngine( } // CUDA stream used for profiling by the builder. -#if !TRT_WINML auto profileStream = samplesCommon::makeCudaStream(); SMP_RETVAL_IF_FALSE(profileStream != nullptr, "Cuda stream creation failed", false, err); config->setProfileStream(*profileStream); -#endif auto const tBegin = std::chrono::high_resolution_clock::now(); std::unique_ptr serializedEngine{builder.buildSerializedNetwork(*env.network, *config)}; @@ -1243,12 +1237,10 @@ bool modelToBuildEnv( auto networkFlags = (build.stronglyTyped) ? 1U << static_cast(nvinfer1::NetworkDefinitionCreationFlag::kSTRONGLY_TYPED) : 0U; -#if !TRT_WINML for (auto const& pluginPath : sys.dynamicPlugins) { env.builder->getPluginRegistry().loadLibrary(pluginPath.c_str()); } -#endif env.network.reset(env.builder->createNetworkV2(networkFlags)); std::vector vcPluginLibrariesUsed; @@ -1257,7 +1249,6 @@ bool modelToBuildEnv( = modelToNetwork(model, build, *env.network, err, build.versionCompatible ? &vcPluginLibrariesUsed : nullptr); SMP_RETVAL_IF_FALSE(env.parser.operator bool(), "Parsing model failed", false, err); -#if !TRT_WINML if (build.versionCompatible && !sys.ignoreParsedPluginLibs && !vcPluginLibrariesUsed.empty()) { sample::gLogInfo << "The following plugin libraries were identified by the parser as required for a " @@ -1286,7 +1277,6 @@ bool modelToBuildEnv( sample::gLogInfo << "Use --ignoreParsedPluginLibs to disable this behavior." << std::endl; } -#endif SMP_RETVAL_IF_FALSE( networkToSerializedEngine(build, sys, *env.builder, env, err), "Building engine failed", false, err); diff --git a/samples/common/sampleInference.cpp b/samples/common/sampleInference.cpp index 8a7cf70fd..11d84cff9 100644 --- a/samples/common/sampleInference.cpp +++ b/samples/common/sampleInference.cpp @@ -22,14 +22,12 @@ #include #include #include -#include #include #include #include #include #include #include -#include #if defined(__QNX__) #include @@ -42,12 +40,8 @@ #include "bfloat16.h" #include "common.h" #include "logger.h" -#include "sampleDevice.h" -#include "sampleEngines.h" #include "sampleInference.h" #include "sampleOptions.h" -#include "sampleReporting.h" -#include "sampleUtils.h" #include #if CUDA_VERSION >= 11060 @@ -256,18 +250,24 @@ void stretchInt32ToInt64(std::vector& shapeData) } // namespace +//! \return the `ExecutionContextAllocationStrategy` to use for the given allocation strategy, \p s. +auto getExecutionContextAllocationStrategy = [](MemoryAllocationStrategy s) { + return s == MemoryAllocationStrategy::kSTATIC + // Let TRT pre-allocate and manage the memory. + ? ExecutionContextAllocationStrategy::kSTATIC + // Allocate based on the current profile or runtime shapes. + : ExecutionContextAllocationStrategy::kUSER_MANAGED; +}; + + bool setUpInference(InferenceEnvironment& iEnv, InferenceOptions const& inference, SystemOptions const& system) { -#if TRT_WINML - int32_t const isIntegrated{}; -#else int32_t device{}; CHECK(cudaGetDevice(&device)); cudaDeviceProp properties; CHECK(cudaGetDeviceProperties(&properties, device)); int32_t const isIntegrated{properties.integrated}; -#endif // Use managed memory on integrated devices when transfers are skipped // and when it is explicitly requested on the commandline. bool useManagedMemory{(inference.skipTransfers && isIntegrated) || inference.useManaged}; @@ -281,10 +281,6 @@ bool setUpInference(InferenceEnvironment& iEnv, InferenceOptions const& inferenc // Release serialized blob to save memory space. iEnv.engine.releaseBlob(); -#if TRT_WINML - // Start JIT Compilation time after engine deserialization - auto jitCompileBegin = std::chrono::high_resolution_clock::now(); -#endif // Setup weight streaming if enabled if (engine->getStreamableWeightsSize() > 0) @@ -355,16 +351,8 @@ bool setUpInference(InferenceEnvironment& iEnv, InferenceOptions const& inferenc for (int32_t s = 0; s < inference.infStreams; ++s) { IExecutionContext* ec{nullptr}; - if (inference.memoryAllocationStrategy == MemoryAllocationStrategy::kSTATIC) - { - // Let TRT pre-allocate and manage the memory. - ec = engine->createExecutionContext(); - } - else - { - // Allocate based on the current profile or runtime shapes. - ec = engine->createExecutionContext(ExecutionContextAllocationStrategy::kUSER_MANAGED); - } + + ec = engine->createExecutionContext(getExecutionContextAllocationStrategy(inference.memoryAllocationStrategy)); if (ec == nullptr) { sample::gLogError << "Unable to create execution context for stream " << s << "." << std::endl; @@ -372,12 +360,10 @@ bool setUpInference(InferenceEnvironment& iEnv, InferenceOptions const& inferenc } ec->setNvtxVerbosity(inference.nvtxVerbosity); -#if !TRT_WINML int32_t const persistentCacheLimit = samplesCommon::getMaxPersistentCacheSize() * inference.persistentCacheRatio; sample::gLogInfo << "Setting persistentCacheLimit to " << persistentCacheLimit << " bytes." << std::endl; ec->setPersistentCacheLimit(persistentCacheLimit); -#endif auto setProfile = ec->setOptimizationProfileAsync(inference.optProfileIndex, setOptProfileStream); CHECK(cudaStreamSynchronize(setOptProfileStream)); @@ -543,12 +529,6 @@ bool setUpInference(InferenceEnvironment& iEnv, InferenceOptions const& inferenc bool fillBindingsSuccess = FillStdBindings( engine, context, inference.inputs, iEnv.bindings, 1, endBindingIndex, inference.optProfileIndex)(); -#if TRT_WINML - // Stop JIT Compile Time when setup for inference is complete - auto jitCompileEnd = std::chrono::high_resolution_clock::now(); - sample::gLogInfo << "JIT Compilation in " << std::chrono::duration(jitCompileEnd - jitCompileBegin).count() - << " sec." << std::endl; -#endif return fillBindingsSuccess; } @@ -1139,6 +1119,7 @@ bool runInference( th.join(); } + CHECK(cudaProfilerStop()); auto cmpTrace = [](InferenceTrace const& a, InferenceTrace const& b) { return a.h2dStart < b.h2dStart; }; @@ -1219,12 +1200,10 @@ bool timeDeserialize(InferenceEnvironment& iEnv, SystemOptions const& sys) SMP_RETVAL_IF_FALSE(!iEnv.safe, "Safe inference is not supported!", false, sample::gLogError); -#if !TRT_WINML for (auto const& pluginPath : sys.dynamicPlugins) { rt->getPluginRegistry().loadLibrary(pluginPath.c_str()); } -#endif auto& reader = iEnv.engine.getFileReader(); auto& asyncReader = iEnv.engine.getAsyncFileReader(); ASSERT(reader.isOpen() || asyncReader.isOpen()); diff --git a/samples/common/sampleOptions.cpp b/samples/common/sampleOptions.cpp index 3aee0c540..d47a9eb67 100644 --- a/samples/common/sampleOptions.cpp +++ b/samples/common/sampleOptions.cpp @@ -270,6 +270,7 @@ WeightStreamingBudget stringToValue(std::string const& op return budget; } + template std::pair splitNameAndValue(const std::string& s) { @@ -1200,9 +1201,7 @@ void BuildOptions::parse(Arguments& arguments) fp16 = true; // BF16 only supported on Ampere+ -#if !TRT_WINML if (samplesCommon::getSMVersion() >= 0x0800) -#endif { bf16 = true; } @@ -1228,15 +1227,14 @@ void BuildOptions::parse(Arguments& arguments) getAndDelOption(arguments, "--versionCompatible", versionCompatible); } -#if !TRT_WINML // --pi and --pluginInstanceNorm are synonyms getAndDelOption(arguments, "--pi", pluginInstanceNorm); if (!pluginInstanceNorm) { getAndDelOption(arguments, "--pluginInstanceNorm", pluginInstanceNorm); } -#endif + getAndDelOption(arguments, "--uint8AsymmetricQuantizationDLA", enableUInt8AsymmetricQuantizationDLA); getAndDelOption(arguments, "--excludeLeanRuntime", excludeLeanRuntime); getAndDelOption(arguments, "--noCompilationCache", disableCompilationCache); getAndDelOption(arguments, "--monitorMemory", enableMonitorMemory); @@ -1275,9 +1273,7 @@ void BuildOptions::parse(Arguments& arguments) } // Print a message to tell users that --noTF32 can be added to improve accuracy with performance cost. -#if !TRT_WINML if (samplesCommon::getSMVersion() >= 0x0800) -#endif { if (!(stronglyTyped || fp16 || bf16 || int8 || fp8 || int4)) { @@ -1623,7 +1619,6 @@ void SystemOptions::parse(Arguments& arguments) { getAndDelOption(arguments, "--device", device); getAndDelOption(arguments, "--useDLACore", DLACore); -#if !TRT_WINML std::string pluginName; while (getAndDelOption(arguments, "--plugins", pluginName)) { @@ -1643,7 +1638,6 @@ void SystemOptions::parse(Arguments& arguments) dynamicPlugins.emplace_back(pluginName); } getAndDelOption(arguments, "--ignoreParsedPluginLibs", ignoreParsedPluginLibs); -#endif } constexpr int64_t WeightStreamingBudget::kDISABLE; @@ -1724,6 +1718,7 @@ void InferenceOptions::parse(Arguments& arguments) getAndDelOption(arguments, "--saveDebugTensors", debugTensorList); std::vector fileNames{splitToStringVec(debugTensorList, ',')}; splitInsertKeyValue(fileNames, debugTensorFileNames); + } void ReportingOptions::parse(Arguments& arguments) @@ -1876,6 +1871,10 @@ void AllOptions::parse(Arguments& arguments) throw std::invalid_argument("GPU fallback (--allowGPUFallback) not allowed for DLA standalone mode"); } } + if (system.DLACore < 0 && build.enableUInt8AsymmetricQuantizationDLA) + { + throw std::invalid_argument("--uint8AsymmetricQuantizationDLA is not supported without DLA cores."); + } } } @@ -1913,7 +1912,6 @@ void SafeBuilderOptions::parse(Arguments& arguments) getAndDelOption(arguments, "--int8", int8); getAndDelOption(arguments, "--calib", calibFile); getAndDelOption(arguments, "--std", standard); -#if !TRT_WINML std::string pluginName; while (getAndDelOption(arguments, "--plugins", pluginName)) { @@ -1924,7 +1922,6 @@ void SafeBuilderOptions::parse(Arguments& arguments) { plugins.emplace_back(pluginName); } -#endif bool noBuilderCache{false}; getAndDelOption(arguments, "--noBuilderCache", noBuilderCache); getAndDelOption(arguments, "--timingCacheFile", timingCacheFile); @@ -2148,6 +2145,7 @@ std::ostream& operator<<(std::ostream& os, nvinfer1::DeviceType devType) return os; } + std::ostream& operator<<(std::ostream& os, nvinfer1::RuntimePlatform platform) { switch (platform) @@ -2232,9 +2230,8 @@ std::ostream& operator<<(std::ostream& os, const BuildOptions& options) "Refit: " << boolToEnabled(options.refittable) << std::endl << "Strip weights: " << boolToEnabled(options.stripWeights) << std::endl << "Version Compatible: " << boolToEnabled(options.versionCompatible) << std::endl << -#if !TRT_WINML "ONNX Plugin InstanceNorm: " << boolToEnabled(options.pluginInstanceNorm) << std::endl << -#endif + "ONNX kENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA flag: " << boolToEnabled(options.enableUInt8AsymmetricQuantizationDLA) << std::endl << "TensorRT runtime: " << options.useRuntime << std::endl << "Lean DLL Path: " << options.leanDLLPath << std::endl << "Tempfile Controls: "; printTempfileControls(os, options.tempfileControls) << std::endl << @@ -2298,7 +2295,6 @@ std::ostream& operator<<(std::ostream& os, const SystemOptions& options) "Device: " << options.device << std::endl << "DLACore: " << (options.DLACore != -1 ? std::to_string(options.DLACore) : "") << std::endl; -#if !TRT_WINML os << "Plugins:"; for (const auto& p : options.plugins) @@ -2325,7 +2321,6 @@ std::ostream& operator<<(std::ostream& os, const SystemOptions& options) os << "ignoreParsedPluginLibs: " << options.ignoreParsedPluginLibs << std::endl; os << std::endl; -#endif return os; // clang-format on } @@ -2458,13 +2453,11 @@ std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options) printIOFormats(os, "Input(s)", options.inputFormats); printIOFormats(os, "Output(s)", options.outputFormats); -#if !TRT_WINML os << "Plugins:"; for (const auto& p : options.plugins) { os << " " << p; } -#endif os << "timingCacheMode: "; printTimingCache(os, options.timingCacheMode) << std::endl; os << "timingCacheFile: " << options.timingCacheFile << std::endl; @@ -2545,10 +2538,12 @@ void BuildOptions::help(std::ostream& os) " --weightless [Deprecated] this knob has been deprecated. Please use --stripWeights" "\n" " --versionCompatible, --vc Mark the engine as version compatible. This allows the engine to be used with newer versions" "\n" " of TensorRT on the same host OS, as well as TensorRT's dispatch and lean runtimes." "\n" -#if !TRT_WINML " --pluginInstanceNorm, --pi Set `kNATIVE_INSTANCENORM` to false in the ONNX parser. This will cause the ONNX parser to use" "\n" " a plugin InstanceNorm implementation over the native implementation when parsing." "\n" -#endif + " --uint8AsymmetricQuantizationDLA Set `kENABLE_UINT8_AND_ASYMMETRIC_QUANTIZATION_DLA` to true in the ONNX parser. This directs the" "\n" + " onnx parser to allow UINT8 as a quantization data type and import zero point values directly" "\n" + " without converting to float type or all-zero values. Should only be set with DLA software version" "\n" + " >= 3.16." "\n" R"( --useRuntime=runtime TensorRT runtime to execute engine. "lean" and "dispatch" require loading VC engine and do)" "\n" " not support building an engine." "\n" R"( runtime::= "full"|"lean"|"dispatch")" "\n" @@ -2691,16 +2686,12 @@ void SystemOptions::help(std::ostream& os) os << "=== System Options ===" << std::endl << " --device=N Select cuda device N (default = " << defaultDevice << ")" << std::endl << " --useDLACore=N Select DLA core N for layers that support DLA (default = none)" << std::endl << -#if TRT_WINML - std::endl; -#else " --staticPlugins Plugin library (.so) to load statically (can be specified multiple times)" << std::endl << " --dynamicPlugins Plugin library (.so) to load dynamically and may be serialized with the engine if they are included in --setPluginsToSerialize (can be specified multiple times)" << std::endl << " --setPluginsToSerialize Plugin library (.so) to be serialized with the engine (can be specified multiple times)" << std::endl << " --ignoreParsedPluginLibs By default, when building a version-compatible engine, plugin libraries specified by the ONNX parser " << std::endl << " are implicitly serialized with the engine (unless --excludeLeanRuntime is specified) and loaded dynamically. " << std::endl << " Enable this flag to ignore these plugin libraries instead." << std::endl; -#endif // clang-format on } @@ -2772,6 +2763,7 @@ void InferenceOptions::help(std::ostream& os) " Requires the '%' character." << std::endl << " >=0B: The exact amount of streamable weights that reside on the GPU. Supports the " << std::endl << " following base-2 suffixes: " << getAvailableUnitSuffixes() << "." << std::endl; + // clang-format on } @@ -2867,9 +2859,7 @@ void SafeBuilderOptions::printHelp(std::ostream& os) " --std Build standard serialized engine, (default = disabled)" << std::endl << " --calib= Read INT8 calibration cache file" << std::endl << " --serialized= Save the serialized network" << std::endl << -#if !TRT_WINML " --staticPlugins Plugin library (.so) to load statically (can be specified multiple times)" << std::endl << -#endif " --verbose or -v Use verbose logging (default = false)" << std::endl << " --help or -h Print this message" << std::endl << " --noBuilderCache Disable timing cache in builder (default is to enable timing cache)" << std::endl << diff --git a/samples/common/sampleOptions.h b/samples/common/sampleOptions.h index 8aba4bfbb..8d421e210 100644 --- a/samples/common/sampleOptions.h +++ b/samples/common/sampleOptions.h @@ -239,6 +239,7 @@ class BuildOptions : public Options bool stripWeights{false}; bool versionCompatible{false}; bool pluginInstanceNorm{false}; + bool enableUInt8AsymmetricQuantizationDLA{false}; bool excludeLeanRuntime{false}; bool disableCompilationCache{false}; bool enableMonitorMemory{false}; @@ -438,6 +439,7 @@ std::ostream& operator<<(std::ostream& os, nvinfer1::DataType dtype); std::ostream& operator<<(std::ostream& os, nvinfer1::DeviceType devType); + inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) { for (int32_t i = 0; i < dims.nbDims; ++i) diff --git a/samples/common/sampleUtils.h b/samples/common/sampleUtils.h index 9c3f45b7f..667e09235 100644 --- a/samples/common/sampleUtils.h +++ b/samples/common/sampleUtils.h @@ -79,11 +79,9 @@ std::vector splitToStringVec(std::string const& option, char separa bool broadcastIOFormats(std::vector const& formats, size_t nbBindings, bool isInput = true); -#if !TRT_WINML int32_t getCudaDriverVersion(); int32_t getCudaRuntimeVersion(); -#endif void sparsify(nvinfer1::INetworkDefinition& network, std::vector>& sparseWeights); void sparsify(nvinfer1::Weights const& weights, int32_t k, int32_t rs, std::vector& sparseWeights); diff --git a/samples/python/downloader.py b/samples/python/downloader.py index 3c1e1e046..b41794e57 100755 --- a/samples/python/downloader.py +++ b/samples/python/downloader.py @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -76,7 +76,7 @@ def _createDirIfNeeded(path): raise -def download(data_dir, yaml_path, overwrite=False): +def download(data_dir, yaml_path, retries, overwrite=False): """Download the data files specified in YAML file to a directory. Return false if the downloaded file or the local copy (if not overwrite) has a different checksum. @@ -84,13 +84,13 @@ def download(data_dir, yaml_path, overwrite=False): sample_data = _loadYAML(yaml_path) logger.info("Downloading data for %s", sample_data.sample) - def _downloadFile(path, url): + def _downloadFile(path, url, retries): logger.info("Downloading %s from %s", path, url) import requests from requests.adapters import HTTPAdapter, Retry session = requests.Session() - retries = Retry(total=10, backoff_factor=0.5) + retries = Retry(total=retries, backoff_factor=0.5) session.mount("http://", HTTPAdapter(max_retries=retries)) session.mount("https://", HTTPAdapter(max_retries=retries)) try: @@ -137,7 +137,7 @@ def _downloadFile(path, url): allGood = False continue _createDirIfNeeded(fpath) - assert _downloadFile(fpath, f.url) + assert _downloadFile(fpath, f.url, retries=retries) if not _checkMD5(fpath, f.checksum): logger.error("The downloaded file %s has a different checksum!", fpath) allGood = False @@ -174,7 +174,13 @@ def _parseArgs(): action="store_true", default=False, ) - + parser.add_argument( + "-r", + "--retries", + help="Number of retries for download", + type=int, + default=10, + ) args, _ = parser.parse_known_args() data = os.environ.get("TRT_DATA_DIR", None) if args.data is None else args.data if data is None: @@ -218,7 +224,7 @@ def main(): if args.verify: ret = verifyChecksum(data, args.file) else: - ret = download(data, args.file, args.overwrite) + ret = download(data, args.file, args.retries, args.overwrite) if not ret: # Error of downloading or checksum diff --git a/samples/python/plugin_utils.py b/samples/python/plugin_utils.py index 955668d7a..e286bb99a 100644 --- a/samples/python/plugin_utils.py +++ b/samples/python/plugin_utils.py @@ -73,9 +73,6 @@ def _cudaGetErrorEnum(error): def getComputeCapacity(devID): major = checkCudaErrors(cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor, devID)) minor = checkCudaErrors(cudart.cudaDeviceGetAttribute(cudart.cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor, devID)) - # Redirect 12.1 to 12.0 since 12.1 can reuse 12.0 cubins and this can save lib size and compile time. - if major == 12 and minor == 1: - minor = 0 return (major, minor) diff --git a/samples/sampleCharRNN/CMakeLists.txt b/samples/sampleCharRNN/CMakeLists.txt index 2a987a638..842c456c7 100644 --- a/samples/sampleCharRNN/CMakeLists.txt +++ b/samples/sampleCharRNN/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,9 +14,24 @@ # See the License for the specific language governing permissions and # limitations under the License. # +if (${TRT_BUILD_ENABLE_NEW_SAMPLES_FLOW}) + +add_executable(sample_char_rnn sampleCharRNN.cpp) +target_link_libraries(sample_char_rnn PRIVATE trt_samples_common) +add_dependencies(tensorrt_samples sample_char_rnn) + +install( + TARGETS sample_char_rnn + OPTIONAL +) + +else() + set(SAMPLE_SOURCES sampleCharRNN.cpp ../common/sampleDevice.cpp ../common/sampleEngines.cpp ../common/sampleOptions.cpp ../common/sampleUtils.cpp ../common/bfloat16.cpp) # Required due to inclusion of sampleEnines.h set(SAMPLE_PARSERS "onnx") include(../CMakeSamplesTemplate.txt) + +endif() diff --git a/samples/sampleDynamicReshape/CMakeLists.txt b/samples/sampleDynamicReshape/CMakeLists.txt index 2e2d11e13..20996f8cd 100644 --- a/samples/sampleDynamicReshape/CMakeLists.txt +++ b/samples/sampleDynamicReshape/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,6 +14,21 @@ # See the License for the specific language governing permissions and # limitations under the License. # +if (${TRT_BUILD_ENABLE_NEW_SAMPLES_FLOW}) + +add_executable(sample_dynamic_reshape sampleDynamicReshape.cpp) +target_link_libraries(sample_dynamic_reshape PRIVATE trt_samples_common) +add_dependencies(tensorrt_samples sample_dynamic_reshape) + +install( + TARGETS sample_dynamic_reshape + OPTIONAL +) + +else() + set(SAMPLE_SOURCES sampleDynamicReshape.cpp) set(SAMPLE_PARSERS "onnx") include(../CMakeSamplesTemplate.txt) + +endif() diff --git a/samples/sampleEditableTimingCache/CMakeLists.txt b/samples/sampleEditableTimingCache/CMakeLists.txt index fb20d202e..72218a355 100644 --- a/samples/sampleEditableTimingCache/CMakeLists.txt +++ b/samples/sampleEditableTimingCache/CMakeLists.txt @@ -14,6 +14,21 @@ # See the License for the specific language governing permissions and # limitations under the License. # +if (${TRT_BUILD_ENABLE_NEW_SAMPLES_FLOW}) + +add_executable(sample_editable_timing_cache sampleEditableTimingCache.cpp) +target_link_libraries(sample_editable_timing_cache PRIVATE trt_samples_common) +add_dependencies(tensorrt_samples sample_editable_timing_cache) + +install( + TARGETS sample_editable_timing_cache + OPTIONAL +) + +else() + set(SAMPLE_SOURCES sampleEditableTimingCache.cpp) set(SAMPLE_PARSERS "onnx") include(../CMakeSamplesTemplate.txt) + +endif() diff --git a/samples/sampleINT8API/CMakeLists.txt b/samples/sampleINT8API/CMakeLists.txt index 0882a5811..70e06dfdf 100644 --- a/samples/sampleINT8API/CMakeLists.txt +++ b/samples/sampleINT8API/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,8 +14,24 @@ # See the License for the specific language governing permissions and # limitations under the License. # + +if (${TRT_BUILD_ENABLE_NEW_SAMPLES_FLOW}) + +add_executable(sample_int8_api sampleINT8API.cpp) +target_link_libraries(sample_int8_api PRIVATE trt_samples_common) +add_dependencies(tensorrt_samples sample_int8_api) + +install( + TARGETS sample_int8_api + OPTIONAL +) + +else() + set(SAMPLE_SOURCES sampleINT8API.cpp) set(SAMPLE_PARSERS "onnx") include(../CMakeSamplesTemplate.txt) + +endif() diff --git a/samples/sampleIOFormats/CMakeLists.txt b/samples/sampleIOFormats/CMakeLists.txt index 440adb5d8..7ba322c79 100644 --- a/samples/sampleIOFormats/CMakeLists.txt +++ b/samples/sampleIOFormats/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,9 +14,24 @@ # See the License for the specific language governing permissions and # limitations under the License. # +if (${TRT_BUILD_ENABLE_NEW_SAMPLES_FLOW}) + +add_executable(sample_io_formats sampleIOFormats.cpp) +target_link_libraries(sample_io_formats PRIVATE trt_samples_common) +add_dependencies(tensorrt_samples sample_io_formats) + +install( + TARGETS sample_io_formats + OPTIONAL +) + +else() + set(SAMPLE_SOURCES sampleIOFormats.cpp ../common/sampleDevice.cpp ../common/sampleEngines.cpp ../common/sampleOptions.cpp ../common/sampleUtils.cpp ../common/bfloat16.cpp) set(SAMPLE_PARSERS "onnx") include(../CMakeSamplesTemplate.txt) + +endif() diff --git a/samples/sampleNamedDimensions/CMakeLists.txt b/samples/sampleNamedDimensions/CMakeLists.txt index 35c55b4fd..88829681d 100644 --- a/samples/sampleNamedDimensions/CMakeLists.txt +++ b/samples/sampleNamedDimensions/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,8 +14,23 @@ # See the License for the specific language governing permissions and # limitations under the License. # +if (${TRT_BUILD_ENABLE_NEW_SAMPLES_FLOW}) + +add_executable(sample_named_dimensions sampleNamedDimensions.cpp) +target_link_libraries(sample_named_dimensions PRIVATE trt_samples_common) +add_dependencies(tensorrt_samples sample_named_dimensions) + +install( + TARGETS sample_named_dimensions + OPTIONAL +) + +else() + set(SAMPLE_SOURCES sampleNamedDimensions.cpp) set(SAMPLE_PARSERS "onnx") include(../CMakeSamplesTemplate.txt) + +endif() diff --git a/samples/sampleNonZeroPlugin/CMakeLists.txt b/samples/sampleNonZeroPlugin/CMakeLists.txt index 775766992..988b66e2f 100644 --- a/samples/sampleNonZeroPlugin/CMakeLists.txt +++ b/samples/sampleNonZeroPlugin/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,6 +14,22 @@ # See the License for the specific language governing permissions and # limitations under the License. # +if (${TRT_BUILD_ENABLE_NEW_SAMPLES_FLOW}) + +add_executable(sample_non_zero_plugin + sampleNonZeroPlugin.cpp + nonZeroKernel.cu +) +target_link_libraries(sample_non_zero_plugin PRIVATE trt_samples_common) +add_dependencies(tensorrt_samples sample_non_zero_plugin) + +install( + TARGETS sample_non_zero_plugin + OPTIONAL +) + +else() + set(SAMPLE_SOURCES sampleNonZeroPlugin.cpp nonZeroKernel.cu) set(SAMPLE_PARSERS "onnx") @@ -21,3 +37,5 @@ set(SAMPLE_PARSERS "onnx") set(CUDA_LIBS_REQUIRED True) include(../CMakeSamplesTemplate.txt) + +endif() diff --git a/samples/sampleOnnxMNIST/CMakeLists.txt b/samples/sampleOnnxMNIST/CMakeLists.txt index 404af0706..6c4869380 100644 --- a/samples/sampleOnnxMNIST/CMakeLists.txt +++ b/samples/sampleOnnxMNIST/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,8 +14,23 @@ # See the License for the specific language governing permissions and # limitations under the License. # +if (${TRT_BUILD_ENABLE_NEW_SAMPLES_FLOW}) + +add_executable(sample_onnx_mnist sampleOnnxMNIST.cpp) +target_link_libraries(sample_onnx_mnist PRIVATE trt_samples_common) +add_dependencies(tensorrt_samples sample_onnx_mnist) + +install( + TARGETS sample_onnx_mnist + OPTIONAL +) + +else() + set(SAMPLE_SOURCES sampleOnnxMNIST.cpp) set(SAMPLE_PARSERS "onnx") include(../CMakeSamplesTemplate.txt) + +endif() diff --git a/samples/sampleOnnxMnistCoordConvAC/CMakeLists.txt b/samples/sampleOnnxMnistCoordConvAC/CMakeLists.txt index 108f2cff0..aa3a8f652 100644 --- a/samples/sampleOnnxMnistCoordConvAC/CMakeLists.txt +++ b/samples/sampleOnnxMnistCoordConvAC/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,9 +14,34 @@ # See the License for the specific language governing permissions and # limitations under the License. # +if (${TRT_BUILD_ENABLE_NEW_SAMPLES_FLOW}) + +add_executable(sample_onnx_mnist_coord_conv_ac sampleOnnxMnistCoordConvAC.cpp) +target_link_libraries(sample_onnx_mnist_coord_conv_ac PRIVATE trt_samples_common) +add_dependencies(tensorrt_samples sample_onnx_mnist_coord_conv_ac) + +if(${TRT_BUILD_SAMPLES_LINK_STATIC_TRT}) + target_link_libraries(sample_onnx_mnist_coord_conv_ac PRIVATE + tensorrt_plugins_static + ) +else() + target_link_libraries(sample_onnx_mnist_coord_conv_ac PRIVATE + tensorrt_plugins + ) +endif() + +install( + TARGETS sample_onnx_mnist_coord_conv_ac + OPTIONAL +) + +else() + set(SAMPLE_SOURCES sampleOnnxMnistCoordConvAC.cpp) set(SAMPLE_PARSERS "onnx") set(PLUGINS_NEEDED ON) include(../CMakeSamplesTemplate.txt) + +endif() diff --git a/samples/sampleOnnxMnistCoordConvAC/sampleOnnxMnistCoordConvAC.cpp b/samples/sampleOnnxMnistCoordConvAC/sampleOnnxMnistCoordConvAC.cpp index a6225aeb5..54b294245 100644 --- a/samples/sampleOnnxMnistCoordConvAC/sampleOnnxMnistCoordConvAC.cpp +++ b/samples/sampleOnnxMnistCoordConvAC/sampleOnnxMnistCoordConvAC.cpp @@ -35,6 +35,7 @@ #include "parserOnnxConfig.h" #include "NvInfer.h" +#include "NvInferPlugin.h" #include #include diff --git a/samples/sampleProgressMonitor/CMakeLists.txt b/samples/sampleProgressMonitor/CMakeLists.txt index c50a7a033..9ee826014 100644 --- a/samples/sampleProgressMonitor/CMakeLists.txt +++ b/samples/sampleProgressMonitor/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,9 +14,23 @@ # See the License for the specific language governing permissions and # limitations under the License. # +if (${TRT_BUILD_ENABLE_NEW_SAMPLES_FLOW}) + +add_executable(sample_progress_monitor sampleProgressMonitor.cpp) +target_link_libraries(sample_progress_monitor PRIVATE trt_samples_common) +add_dependencies(tensorrt_samples sample_progress_monitor) + +install( + TARGETS sample_progress_monitor + OPTIONAL +) + +else() set(SAMPLE_SOURCES sampleProgressMonitor.cpp) set(SAMPLE_PARSERS "onnx") include(../CMakeSamplesTemplate.txt) + +endif() diff --git a/samples/trtexec/CMakeLists.txt b/samples/trtexec/CMakeLists.txt index 1c9f5c3d6..8e34a7bd0 100644 --- a/samples/trtexec/CMakeLists.txt +++ b/samples/trtexec/CMakeLists.txt @@ -1,5 +1,5 @@ # -# SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,6 +14,19 @@ # See the License for the specific language governing permissions and # limitations under the License. # +if (${TRT_BUILD_ENABLE_NEW_SAMPLES_FLOW}) + +add_executable(trtexec trtexec.cpp) +target_link_libraries(trtexec PRIVATE trt_samples_common) +add_dependencies(tensorrt_samples trtexec) + +install( + TARGETS trtexec + OPTIONAL +) + +else() + set(SAMPLE_SOURCES ../common/sampleDevice.cpp ../common/sampleEngines.cpp @@ -27,3 +40,5 @@ set(SAMPLE_SOURCES set(SAMPLE_PARSERS "onnx") include(../CMakeSamplesTemplate.txt) + +endif() diff --git a/samples/trtexec/trtexec.cpp b/samples/trtexec/trtexec.cpp index 62b58bdfa..840b5ea7e 100644 --- a/samples/trtexec/trtexec.cpp +++ b/samples/trtexec/trtexec.cpp @@ -31,9 +31,7 @@ #include #include "NvInfer.h" -#if !TRT_WINML #include "NvInferPlugin.h" -#endif #include "buffers.h" #include "common.h" @@ -52,17 +50,28 @@ namespace { using LibraryPtr = std::unique_ptr; +std::string const TRT_NVINFER_NAME = "nvinfer"; +std::string const TRT_ONNXPARSER_NAME = "nvonnxparser"; +std::string const TRT_LIB_SUFFIX = ""; + #if !TRT_STATIC #if defined(_WIN32) -std::string const kNVINFER_PLUGIN_LIBNAME = std::string{"nvinfer_plugin_"} + std::to_string(NV_TENSORRT_MAJOR) + std::string{".dll"}; -std::string const kNVINFER_LIBNAME = std::string{"nvinfer_"} + std::to_string(NV_TENSORRT_MAJOR) + std::string{".dll"}; -std::string const kNVONNXPARSER_LIBNAME = std::string{"nvonnxparser_"} + std::to_string(NV_TENSORRT_MAJOR) + std::string{".dll"}; -std::string const kNVINFER_LEAN_LIBNAME = std::string{"nvinfer_lean_"} + std::to_string(NV_TENSORRT_MAJOR) + std::string{".dll"}; -std::string const kNVINFER_DISPATCH_LIBNAME = std::string{"nvinfer_dispatch_"} + std::to_string(NV_TENSORRT_MAJOR) + std::string{".dll"}; +std::string const kNVINFER_PLUGIN_LIBNAME + = std::string{"nvinfer_plugin_"} + std::to_string(NV_TENSORRT_MAJOR) + std::string{".dll"}; +std::string const kNVINFER_LIBNAME = std::string(TRT_NVINFER_NAME) + std::string{"_"} + + std::to_string(NV_TENSORRT_MAJOR) + TRT_LIB_SUFFIX + std::string{".dll"}; +std::string const kNVONNXPARSER_LIBNAME = std::string(TRT_ONNXPARSER_NAME) + std::string{"_"} + + std::to_string(NV_TENSORRT_MAJOR) + TRT_LIB_SUFFIX + std::string{".dll"}; +std::string const kNVINFER_LEAN_LIBNAME + = std::string{"nvinfer_lean_"} + std::to_string(NV_TENSORRT_MAJOR) + std::string{".dll"}; +std::string const kNVINFER_DISPATCH_LIBNAME + = std::string{"nvinfer_dispatch_"} + std::to_string(NV_TENSORRT_MAJOR) + std::string{".dll"}; #else std::string const kNVINFER_PLUGIN_LIBNAME = std::string{"libnvinfer_plugin.so."} + std::to_string(NV_TENSORRT_MAJOR); -std::string const kNVINFER_LIBNAME = std::string{"libnvinfer.so."} + std::to_string(NV_TENSORRT_MAJOR); -std::string const kNVONNXPARSER_LIBNAME = std::string{"libnvonnxparser.so."} + std::to_string(NV_TENSORRT_MAJOR); +std::string const kNVINFER_LIBNAME + = std::string{"lib"} + std::string(TRT_NVINFER_NAME) + std::string{".so."} + std::to_string(NV_TENSORRT_MAJOR); +std::string const kNVONNXPARSER_LIBNAME + = std::string{"lib"} + std::string(TRT_ONNXPARSER_NAME) + std::string{".so."} + std::to_string(NV_TENSORRT_MAJOR); std::string const kNVINFER_LEAN_LIBNAME = std::string{"libnvinfer_lean.so."} + std::to_string(NV_TENSORRT_MAJOR); std::string const kNVINFER_DISPATCH_LIBNAME = std::string{"libnvinfer_dispatch.so."} + std::to_string(NV_TENSORRT_MAJOR); @@ -272,19 +281,14 @@ int main(int argc, char** argv) { sample::setReportableSeverity(ILogger::Severity::kVERBOSE); } -#if TRT_WINML - std::string const jitInVersion = " JIT"; -#else std::string const jitInVersion; setCudaDevice(options.system.device, sample::gLogInfo); -#endif sample::gLogInfo << std::endl; sample::gLogInfo << "TensorRT version: " << NV_TENSORRT_MAJOR << "." << NV_TENSORRT_MINOR << "." << NV_TENSORRT_PATCH << jitInVersion << std::endl; // Record specified runtime gUseRuntime = options.build.useRuntime; -#if !TRT_WINML #if !TRT_STATIC LibraryPtr nvinferPluginLib{}; #endif @@ -311,7 +315,6 @@ int main(int argc, char** argv) { throw std::runtime_error("TRT-18412: Plugins require --useRuntime=full."); } -#endif // !TRT_WINML if (options.build.safe && !sample::hasSafeRuntime()) { sample::gLogError << "Safety is not supported because safety runtime library is unavailable." << std::endl; @@ -336,20 +339,9 @@ int main(int argc, char** argv) return sample::gLogger.reportPass(sampleTest); } -#if TRT_WINML - if (options.build.skipInference) - { - sample::gLogInfo << "Skipped inference phase since --skipInference is added." << std::endl; - return sample::gLogger.reportPass(sampleTest); - } - setCudaDevice(options.system.device, sample::gLogInfo); -#endif - -#if !TRT_WINML // dynamicPlugins may have been updated by getEngineBuildEnv above bEnv->engine.setDynamicPlugins(options.system.dynamicPlugins); -#endif // !TRT_WINML // When some options are enabled, engine deserialization is not supported on the platform that the engine was // built. bool const supportDeserialization = !options.build.safe && !options.build.buildDLAStandalone @@ -446,7 +438,6 @@ int main(int argc, char** argv) if (profilerEnabled && !options.inference.rerun) { iEnv->profiler.reset(new Profiler); -#if !TRT_WINML if (options.inference.graph && (getCudaDriverVersion() < 11010 || getCudaRuntimeVersion() < 11000)) { options.inference.graph = false; @@ -455,7 +446,6 @@ int main(int argc, char** argv) "and disabled CUDA graph." << std::endl; } -#endif } if (!setUpInference(*iEnv, options.inference, options.system)) @@ -501,7 +491,6 @@ int main(int argc, char** argv) iEnv->profiler.reset(profiler); iEnv->contexts.front()->setProfiler(profiler); iEnv->contexts.front()->setEnqueueEmitsProfile(false); -#if !TRT_WINML if (options.inference.graph && (getCudaDriverVersion() < 11010 || getCudaRuntimeVersion() < 11000)) { options.inference.graph = false; @@ -510,7 +499,6 @@ int main(int argc, char** argv) "and disabled CUDA graph." << std::endl; } -#endif if (!runInference(options.inference, *iEnv, options.system.device, trace)) { sample::gLogError << "Error occurred during inference" << std::endl; diff --git a/shared/CMakeLists.txt b/shared/CMakeLists.txt index 23886b8b0..656524b97 100644 --- a/shared/CMakeLists.txt +++ b/shared/CMakeLists.txt @@ -19,8 +19,8 @@ function(add_shared_source) target_sources(trt_shared PRIVATE ${ARGN}) endfunction() -target_link_libraries(trt_shared PUBLIC - tensorrt +target_link_libraries(trt_shared PRIVATE + $ trt_global_definitions CUDA::cudart_static ) diff --git a/tools/Polygraphy/CHANGELOG.md b/tools/Polygraphy/CHANGELOG.md index 652efbb4c..a3cdde6b0 100644 --- a/tools/Polygraphy/CHANGELOG.md +++ b/tools/Polygraphy/CHANGELOG.md @@ -2,7 +2,21 @@ Dates are in YYYY-MM-DD format. -## v0.49.20 (2025-03-4) + +## v0.49.22 (2025-05-09) +### Changed +- Updated TensorRT loaders to handle some missing attributes more gracefully. + + +## v0.49.21 (2025-05-08) +### Added +- Added new comparison functions to `CompareFunc` + +### Changed +- Updated `Comparator` to avoid making deep copies of outputs where possible. + + +## v0.49.20 (2025-03-04) ### Fixed - Fixed a bug where `colored` did not work for *older* versions of the `colored` package. diff --git a/tools/Polygraphy/examples/api/00_inference_with_tensorrt/README.md b/tools/Polygraphy/examples/api/00_inference_with_tensorrt/README.md index 550d4e5dc..45e9ff636 100644 --- a/tools/Polygraphy/examples/api/00_inference_with_tensorrt/README.md +++ b/tools/Polygraphy/examples/api/00_inference_with_tensorrt/README.md @@ -45,4 +45,4 @@ engine to a file and see how you can load it again and run inference. ## Further Reading For more details on the Polygraphy Python API, see the -[Polygraphy API reference](https://docs.nvidia.com/deeplearning/tensorrt/polygraphy/docs/index.html). +[Polygraphy API reference](https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/polygraphy/index.html). diff --git a/tools/Polygraphy/examples/api/01_comparing_frameworks/README.md b/tools/Polygraphy/examples/api/01_comparing_frameworks/README.md index a2dcf8caa..e80961507 100644 --- a/tools/Polygraphy/examples/api/01_comparing_frameworks/README.md +++ b/tools/Polygraphy/examples/api/01_comparing_frameworks/README.md @@ -8,7 +8,11 @@ different backends. This makes it possible to check the accuracy of one backend respect to another. In this example, we'll look at how you can use the Polygraphy API to run inference -with synthetic input data using ONNX-Runtime and TensorRT, and then compare the results. +with synthetic input data using ONNX-Runtime and TensorRT, and then compare the results +using two different comparison methods: + +1. A simple comparison using absolute tolerance +2. A more comprehensive comparison using distance metrics (L2 distance, cosine similarity, and PSNR) ## Running The Example @@ -27,3 +31,15 @@ with synthetic input data using ONNX-Runtime and TensorRT, and then compare the ```bash polygraphy inspect data inference_results.json ``` + +## Comparison Methods + +The example demonstrates two approaches for comparing outputs: + +- **Simple Comparison**: Uses absolute tolerance to determine if outputs match within a specified threshold. +- **Distance Metrics**: Performs a more comprehensive comparison using multiple metrics including: + - L2 distance (Euclidean distance) + - Cosine similarity (measures the angle between vectors) + - PSNR (Peak Signal-to-Noise Ratio, useful for comparing image-like data) + +These comparison methods help validate that frameworks produce equivalent results within acceptable margins. diff --git a/tools/Polygraphy/examples/api/01_comparing_frameworks/example.py b/tools/Polygraphy/examples/api/01_comparing_frameworks/example.py index 5eee4f601..5cd85bd83 100644 --- a/tools/Polygraphy/examples/api/01_comparing_frameworks/example.py +++ b/tools/Polygraphy/examples/api/01_comparing_frameworks/example.py @@ -57,6 +57,30 @@ def main(): ) ) + # Use distance metrics comparison for more comprehensive evaluation + assert bool( + Comparator.compare_accuracy( + run_results, + compare_func=CompareFunc.distance_metrics( + l2_tolerance=1e-5, # Maximum allowed L2 norm (Euclidean distance) + cosine_similarity_threshold=0.99, # Minimum cosine similarity (angular similarity) + ) + ) + ) + print("All outputs matched using distance metrics (L2 norm, Cosine Similarity)") + + # Use quality metrics for signal quality evaluation + assert bool( + Comparator.compare_accuracy( + run_results, + compare_func=CompareFunc.quality_metrics( + psnr_tolerance=50.0, # Minimum Peak Signal-to-Noise Ratio in dB + snr_tolerance=25.0 # Minimum Signal-to-Noise Ratio in dB + ) + ) + ) + print("All outputs matched using quality metrics (PSNR, SNR)") + # We can use `RunResults.save()` method to save the inference results to a JSON file. # This can be useful if you want to generate and compare results separately. run_results.save("inference_results.json") diff --git a/tools/Polygraphy/examples/api/04_int8_calibration_in_tensorrt/README.md b/tools/Polygraphy/examples/api/04_int8_calibration_in_tensorrt/README.md index c2ab92244..24969620c 100644 --- a/tools/Polygraphy/examples/api/04_int8_calibration_in_tensorrt/README.md +++ b/tools/Polygraphy/examples/api/04_int8_calibration_in_tensorrt/README.md @@ -5,7 +5,7 @@ Int8 calibration in TensorRT involves providing a representative set of input data to TensorRT as part of the engine building process. The -[calibration API](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Int8/Calibrator.html) +[calibration API](https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Int8/Calibrator.html) included in TensorRT requires the user to handle copying input data to the GPU and manage the calibration cache generated by TensorRT. diff --git a/tools/Polygraphy/examples/api/05_using_tensorrt_network_api/README.md b/tools/Polygraphy/examples/api/05_using_tensorrt_network_api/README.md index b14cb0446..7cb343eab 100644 --- a/tools/Polygraphy/examples/api/05_using_tensorrt_network_api/README.md +++ b/tools/Polygraphy/examples/api/05_using_tensorrt_network_api/README.md @@ -33,4 +33,4 @@ loader to seamlessly integrate a network defined using TensorRT APIs with Polygr ## Further Reading For more information on the TensorRT Network API, see the -[TensorRT API documentation](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/infer/Graph/pyGraph.html) +[TensorRT API documentation](https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/infer/Graph/Network.html) diff --git a/tools/Polygraphy/examples/cli/run/08_adding_precision_constraints/README.md b/tools/Polygraphy/examples/cli/run/08_adding_precision_constraints/README.md index f5d2e5df6..9bf1a4f12 100644 --- a/tools/Polygraphy/examples/cli/run/08_adding_precision_constraints/README.md +++ b/tools/Polygraphy/examples/cli/run/08_adding_precision_constraints/README.md @@ -95,7 +95,7 @@ as a part of which you can set layer precisions. The below section assumes you have read through the example on [Defining a TensorRT Network or Config Manually](../../../../examples/cli/run/04_defining_a_tensorrt_network_or_config_manually) -and have a basic understanding of how to use the [TensorRT Python API](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/index.html). +and have a basic understanding of how to use the [TensorRT Python API](https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/index.html). First, run ONNX-Runtime on the model to generate reference inputs and golden outputs: @@ -135,4 +135,4 @@ polygraphy run constrained_network.py --precision-constraints prefer \ reduced precision optimizations using Polygraphy. * [Defining a TensorRT Network or Config Manually](../../../../examples/cli/run/04_defining_a_tensorrt_network_or_config_manually) for instructions on how to create network script templates. -* [TensorRT Python API Reference](https://docs.nvidia.com/deeplearning/tensorrt/api/python_api/index.html) +* [TensorRT Python API Reference](https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/python-api/index.html) diff --git a/tools/Polygraphy/polygraphy/README.md b/tools/Polygraphy/polygraphy/README.md index cb6a01362..9ea074a40 100644 --- a/tools/Polygraphy/polygraphy/README.md +++ b/tools/Polygraphy/polygraphy/README.md @@ -226,7 +226,7 @@ You can find complete code examples that use the Polygraphy Python API [here](.. ## Python API Reference Documentation -For more details, see the [Polygraphy Python API reference documentation](https://docs.nvidia.com/deeplearning/tensorrt/polygraphy/docs/index.html). +For more details, see the [Polygraphy Python API reference documentation](https://docs.nvidia.com/deeplearning/tensorrt/latest/_static/polygraphy/index.html). ### Building Python API Documentation Locally diff --git a/tools/Polygraphy/polygraphy/__init__.py b/tools/Polygraphy/polygraphy/__init__.py index 706e68cda..5ebca09bb 100644 --- a/tools/Polygraphy/polygraphy/__init__.py +++ b/tools/Polygraphy/polygraphy/__init__.py @@ -1,3 +1,3 @@ import polygraphy.config -__version__ = "0.49.20" +__version__ = "0.49.22" diff --git a/tools/Polygraphy/polygraphy/backend/trt/util.py b/tools/Polygraphy/polygraphy/backend/trt/util.py index 65c0d362f..c0b0e4c94 100644 --- a/tools/Polygraphy/polygraphy/backend/trt/util.py +++ b/tools/Polygraphy/polygraphy/backend/trt/util.py @@ -496,7 +496,7 @@ def get_enabled_enum_vals(EnumType, is_enabled): add_line("Preview Features", f"{str_from_list(feature_vals)}") # Calibrator - if config.int8_calibrator: + if hasattr(config, "int8_calibrator") and config.int8_calibrator: add_line("Calibrator", f"{config.int8_calibrator}") # Quantization Flags @@ -595,7 +595,10 @@ def try_setup_polygraphy_calibrator(config, network, calib_profile=None): Tries to call setup methods specific to Polygraphy calibrators. Returns early if there is no calibrator or if it is not a Polygraphy calibrator. """ - calibrator = config.int8_calibrator + try: + calibrator = config.int8_calibrator + except AttributeError: + return if calibrator is None or not ( hasattr(calibrator, "is_polygraphy_calibrator") and calibrator.is_polygraphy_calibrator diff --git a/tools/Polygraphy/polygraphy/comparator/comparator.py b/tools/Polygraphy/polygraphy/comparator/comparator.py index 7a70a9a96..aea8501fc 100644 --- a/tools/Polygraphy/polygraphy/comparator/comparator.py +++ b/tools/Polygraphy/polygraphy/comparator/comparator.py @@ -124,7 +124,7 @@ def execute_runner(runner, loader_cache): # Then, actual iterations. index = 0 iteration_results = [] - + iterations_num = len(loader_cache) total_runtime = 0 for index, feed_dict in enumerate(loader_cache): G_LOGGER.info( @@ -139,10 +139,13 @@ def execute_runner(runner, loader_cache): runtime = active_runner.last_inference_time() total_runtime += runtime - # Without a deep copy here, outputs will always reference the output of the last run + + # Only make a deep copy if we have more than one iteration. + # For single iteration case, we can use the outputs directly since they won't be reused. + # This allows running with a large number of outputs (e.g. for accuracy debugging) without memory explosion. iteration_results.append( IterationResult( - outputs=copy.deepcopy(outputs), + outputs=copy.deepcopy(outputs) if iterations_num > 1 else outputs, runtime=runtime, runner_name=active_runner.name, ) diff --git a/tools/Polygraphy/polygraphy/comparator/compare.py b/tools/Polygraphy/polygraphy/comparator/compare.py index b4dc362aa..8fb2e7351 100644 --- a/tools/Polygraphy/polygraphy/comparator/compare.py +++ b/tools/Polygraphy/polygraphy/comparator/compare.py @@ -91,6 +91,120 @@ def __str__(self): return f"(atol={self.max_absdiff}, rtol={self.max_reldiff})" +@mod.export() +class DistanceMetricsResult: + """ + Represents the result of comparing a single output using distance metrics + between two runners. + """ + + def __init__( + self, + passed, + l2_norm, + cosine_similarity, + ): + """ + Records the distance metrics gathered during comparison. + + Args: + passed (bool): + Whether the output passed all enabled metric comparisons. + l2_norm (float): + The L2 norm (Euclidean distance) between the outputs. + cosine_similarity (float): + The cosine similarity between the outputs. + """ + self.passed = passed + self.l2_norm = l2_norm + self.cosine_similarity = cosine_similarity + + def __bool__(self): + """ + Whether the output passed all metric comparisons. + + Returns: + bool + """ + return self.passed + + +@mod.export() +class QualityMetricsResult: + """ + Represents the result of comparing a single output using quality metrics + between two runners. + """ + + def __init__( + self, + passed, + psnr=None, + snr=None, + ): + """ + Records the quality metrics gathered during comparison. + + Args: + passed (bool): + Whether the output passed all enabled quality metric comparisons. + psnr (float): + The Peak Signal-to-Noise Ratio between the outputs. + May be None if PSNR comparison was not enabled. + snr (float): + The Signal-to-Noise Ratio between the outputs. + May be None if SNR comparison was not enabled. + """ + self.passed = passed + self.psnr = psnr + self.snr = snr + + def __bool__(self): + """ + Whether the output passed all metric comparisons. + + Returns: + bool + """ + return self.passed + + +@mod.export() +class PerceptualMetricsResult: + """ + Represents the result of comparing a single output using perceptual metrics + between two runners. + """ + + def __init__( + self, + passed, + lpips=None, + ): + """ + Records the perceptual metrics gathered during comparison. + + Args: + passed (bool): + Whether the output passed all enabled perceptual metric comparisons. + lpips (float): + The Learned Perceptual Image Patch Similarity score between the outputs. + Lower values indicate more perceptually similar outputs. + May be None if LPIPS computation failed. + """ + self.passed = passed + self.lpips = lpips + + def __bool__(self): + """ + Whether the output passed all metric comparisons. + + Returns: + bool + """ + return self.passed + + def default_find_output_func(output_name, index, iter_result, base_iter_result): found_name = util.find_str_in_iterable(output_name, iter_result.keys(), index) if found_name is None: @@ -437,6 +551,9 @@ def req_tol(mean_diff, median_diff, max_diff, quantile_diff): msg = f"Minimum Required Tolerance: {per_out_err_stat} error | [abs={req_tol(mean_absdiff, median_absdiff, max_absdiff, quantile_absdiff):.5g}] OR [rel={req_tol(mean_reldiff, median_reldiff, max_reldiff, quantile_reldiff):.5g}]" if per_out_err_stat == "elemwise": msg += " (requirements may be lower if both abs/rel tolerances are set)" + elif per_out_err_stat == "quantile": + msg += f" (quantile={per_out_quantile:.4g})" + G_LOGGER.info(msg) if save_error_metrics_plot or show_error_metrics_plot: @@ -757,3 +874,1004 @@ def match(out0_name, output0, out1_name, output1): ) return compare_output + + @staticmethod + def distance_metrics( + l2_tolerance=None, + cosine_similarity_threshold=None, + check_shapes=None, + fail_fast=None, + find_output_func=None, + ): + """ + Creates a function that compares two IterationResults using distance metrics (L2 norm and cosine similarity), + and can be used as the `compare_func` argument in ``Comparator.compare_accuracy``. + + Args: + l2_tolerance (Union[float, Dict[str, float]]): + The tolerance to use when checking L2 norm (Euclidean distance). + This can be provided on a per-output basis using a dictionary. In that case, + use an empty string ("") as the key to specify default tolerance for outputs not explicitly listed. + Defaults to 1e-5. + cosine_similarity_threshold (Union[float, Dict[str, float]]): + The minimum cosine similarity required for outputs to be considered matching. + Cosine similarity measures the cosine of the angle between two vectors, with values between -1 and 1. + A value of 1 means vectors are identical or parallel, 0 means they are orthogonal, and -1 means they point in opposite directions. + This can be provided on a per-output basis using a dictionary. In that case, + use an empty string ("") as the key to specify default threshold for outputs not explicitly listed. + Defaults to 0.997 (which corresponds to a cosine distance of 0.003). + check_shapes (bool): + Whether shapes must match exactly. If this is False, this function may + permute or reshape outputs before comparison. + Defaults to True. + fail_fast (bool): + Whether the function should exit immediately after the first failure. + Defaults to False. + find_output_func (Callable(str, int, IterationResult) -> List[str]): + A callback that returns a list of output names to compare against from the provided + IterationResult, given an output name and index from another IterationResult. + The comparison function will always iterate over the output names of the + first IterationResult, expecting names from the second. A return value of + `[]` or `None` indicates that the output should be skipped. + + Returns: + Callable(IterationResult, IterationResult) -> OrderedDict[str, DistanceMetricsResult]: + A callable that returns a mapping of output names to `DistanceMetricsResult` s, indicating + whether the corresponding output matched based on the distance metrics. + """ + check_shapes = util.default(check_shapes, True) + default_l2_tolerance = 1e-5 + default_cosine_similarity_threshold = 0.997 + l2_tolerance = util.default(l2_tolerance, default_l2_tolerance) + cosine_similarity_threshold = util.default(cosine_similarity_threshold, default_cosine_similarity_threshold) + fail_fast = util.default(fail_fast, False) + + def compute_l2_norm(array1, array2): + """Compute L2 norm (Euclidean distance) between two arrays.""" + diff = util.array.subtract(comp_util.cast_up(array1), comp_util.cast_up(array2)) + squared_diff = util.array.power(diff, 2) + sum_squared_diff = util.array.sum(squared_diff) + return util.array.sqrt(sum_squared_diff) + + def compute_cosine_similarity(array1, array2): + """Compute cosine similarity between two arrays.""" + array1_flat = util.array.ravel(comp_util.cast_up(array1)) + array2_flat = util.array.ravel(comp_util.cast_up(array2)) + + # Calculate dot product + dot_product = util.array.sum(util.array.multiply(array1_flat, array2_flat)) + + # Calculate magnitudes + magnitude1 = util.array.sqrt(util.array.sum(util.array.power(array1_flat, 2))) + magnitude2 = util.array.sqrt(util.array.sum(util.array.power(array2_flat, 2))) + + # Avoid division by zero + if magnitude1 == 0 and magnitude2 == 0: + return 1.0 # If both vectors are zero, they are identical (similarity = 1.0) + elif magnitude1 == 0 or magnitude2 == 0: + return 0.0 # If only one vector is zero, they are orthogonal (similarity = 0.0) + + # Cosine similarity is dot_product / (magnitude1 * magnitude2) + cosine_similarity = dot_product / (magnitude1 * magnitude2) + + # Handle floating point issues that might make cosine_similarity slightly outside [-1, 1] + return max(-1.0, min(1.0, cosine_similarity)) + + def check_outputs_match( + out0, + out0_name, + out1, + out1_name, + per_out_l2_tol, + per_out_cosine_sim_threshold, + runner0_name, + runner1_name, + ): + """ + Checks whether two outputs matched using L2 norm and cosine similarity. + + Args: + out0 (Union[np.array, torch.Tensor]): The first output. + out0_name (str): The name of the first output. + out1 (Union[np.array, torch.Tensor]): The second output. + out1_name (str): The name of the second output. + per_out_l2_tol (float): The L2 norm tolerance to use for comparison. + per_out_cosine_sim_threshold (float): The minimum cosine similarity required for a match. + runner0_name (str): The name of the runner that generated the first output. + runner1_name (str): The name of the runner that generated the second output. + + Returns: + DistanceMetricsResult: Details on whether the outputs matched. + """ + G_LOGGER.super_verbose( + f"{runner0_name:35} | Output: {out0_name} (dtype={util.array.dtype(out0)}, shape={util.array.shape(out0)}):\n{util.indent_block(out0)}" + ) + G_LOGGER.super_verbose( + f"{runner1_name:35} | Output: {out1_name} (dtype={util.array.dtype(out1)}, shape={util.array.shape(out1)}):\n{util.indent_block(out1)}" + ) + + # Compute metrics + l2_norm = compute_l2_norm(out0, out1) + cosine_sim = compute_cosine_similarity(out0, out1) + + # Check if outputs match based on the metrics + l2_passed = bool(l2_norm <= per_out_l2_tol) + cosine_passed = bool(cosine_sim >= per_out_cosine_sim_threshold) + + # Overall pass requires all enabled metrics to pass + passed = bool(l2_passed and cosine_passed) + + # Log information + hist_bin_range = ( + min(comp_util.compute_min(out0), comp_util.compute_min(out1)), + max(comp_util.compute_max(out0), comp_util.compute_max(out1)), + ) + comp_util.log_output_stats( + out0, not passed, f"{runner0_name}: {out0_name}", hist_range=hist_bin_range + ) + comp_util.log_output_stats( + out1, not passed, f"{runner1_name}: {out1_name}", hist_range=hist_bin_range + ) + + G_LOGGER.info(f"Distance Metrics: {out0_name}") + with G_LOGGER.indent(): + G_LOGGER.info(f"L2 Norm: {l2_norm:.5g} (tolerance: {per_out_l2_tol:.5g}) | {'PASSED' if l2_passed else 'FAILED'}") + G_LOGGER.info(f"Cosine Similarity: {cosine_sim:.5g} (threshold: {per_out_cosine_sim_threshold:.5g}) | {'PASSED' if cosine_passed else 'FAILED'}") + + # Create a proper DistanceMetricsResult object with our metrics + result = DistanceMetricsResult( + passed=passed, + l2_norm=l2_norm, + cosine_similarity=cosine_sim + ) + + if not passed: + if not l2_passed: + G_LOGGER.error( + f"FAILED | Output: '{out0_name}' | L2 Norm ({l2_norm:.5g}) exceeds tolerance ({per_out_l2_tol:.5g})" + ) + if not cosine_passed: + G_LOGGER.error( + f"FAILED | Output: '{out0_name}' | Cosine Similarity ({cosine_sim:.5g}) below threshold ({per_out_cosine_sim_threshold:.5g})" + ) + else: + metrics_passed = ["L2 Norm", "Cosine Similarity"] + G_LOGGER.finish( + f"PASSED | Output: '{out0_name}' | All metrics passed: {', '.join(metrics_passed)}" + ) + + return result + + def compare_output(iter_result0, iter_result1): + """ + Compare the outputs of two runners from a single iteration using distance metrics. + + This function will always iterate over the output names of the first IterationResult, + and attempt to find corresponding output names in the second. + If no corresponding output name is found, the output is skipped. + If all output names are skipped, then this function raises an error. + + Args: + iter_result0 (IterationResult): The result of the first runner. + iter_result1 (IterationResult): The result of the second runner. + + Returns: + OrderedDict[str, DistanceMetricsResult]: + The name of the outputs compared, derived from the first IterationResult, + and whether they matched. If an output name is not found, it is omitted from this dictionary. + + Raises: + PolygraphyException: If all output names are skipped, and thus no outputs are compared. + """ + def check_dict(dct, dict_name): + if isinstance(dct, dict): + util.check_sequence_contains( + dct.keys(), + set(iter_result0.keys()) | set(iter_result1.keys()) | {""}, + name=dict_name, + log_func=G_LOGGER.warning, + check_missing=False, + ) + + check_dict(l2_tolerance, "the l2_tolerance dictionary") + check_dict(cosine_similarity_threshold, "the cosine_similarity_threshold dictionary") + + if not check_shapes: + G_LOGGER.info( + "Strict shape checking disabled. Will attempt to match output shapes before comparisons" + ) + + def match(out0_name, output0, out1_name, output1): + per_out_l2_tol = util.value_or_from_dict(l2_tolerance, out0_name, default_l2_tolerance) + per_out_cosine_sim_threshold = util.value_or_from_dict(cosine_similarity_threshold, out0_name, default_cosine_similarity_threshold) + + # Build tolerance message showing all enabled metrics + tolerance_msg = [f"L2={per_out_l2_tol:.5g}", f"Cosine Similarity min={per_out_cosine_sim_threshold:.5g}"] + + G_LOGGER.info(f"Tolerance: [{', '.join(tolerance_msg)}]") + G_LOGGER.extra_verbose( + f"Note: Comparing {iter_result0.runner_name} vs. {iter_result1.runner_name}" + ) + + if check_shapes and util.array.shape(output0) != util.array.shape( + output1 + ): + G_LOGGER.error( + f"FAILED | Output: `{out0_name}` | Will not compare outputs of different shapes.\n" + f"Note: Output shapes are {util.array.shape(output0)} and {util.array.shape(output1)}." + ) + G_LOGGER.error( + "Note: Use --no-shape-check or set check_shapes=False to " + "attempt to compare values anyway.", + mode=LogMode.ONCE, + ) + return False + + output1 = util.try_match_shape(output1, util.array.shape(output0)) + output0 = util.array.view( + output0, + DataType.from_dtype(util.array.dtype(output0)), + util.array.shape(output1), + ) + + outputs_matched = check_outputs_match( + output0, + out0_name, + output1, + out1_name, + per_out_l2_tol=per_out_l2_tol, + per_out_cosine_sim_threshold=per_out_cosine_sim_threshold, + runner0_name=iter_result0.runner_name, + runner1_name=iter_result1.runner_name, + ) + + return outputs_matched + + nonlocal find_output_func + find_output_func = util.default( + find_output_func, + functools.partial( + default_find_output_func, base_iter_result=iter_result0 + ), + ) + return run_comparison( + match, fail_fast, iter_result0, iter_result1, find_output_func + ) + + return compare_output + + @staticmethod + def quality_metrics( + psnr_tolerance=None, + snr_tolerance=None, + check_shapes=None, + fail_fast=None, + find_output_func=None, + ): + """ + Creates a function that compares two IterationResults using quality metrics (PSNR and SNR), + and can be used as the `compare_func` argument in ``Comparator.compare_accuracy``. + + Args: + psnr_tolerance (Union[float, Dict[str, float]]): + The minimum PSNR (Peak Signal-to-Noise Ratio) value required for outputs to be considered matching. + Higher values of PSNR indicate better quality matches. Typical acceptable values are 30 dB or above. + This can be provided on a per-output basis using a dictionary. In that case, + use an empty string ("") as the key to specify default tolerance for outputs not explicitly listed. + If None, PSNR check will be skipped. Defaults to 30.0. + snr_tolerance (Union[float, Dict[str, float]]): + The minimum SNR (Signal-to-Noise Ratio) value required for outputs to be considered matching. + Higher values of SNR indicate better quality matches. + This can be provided on a per-output basis using a dictionary. In that case, + use an empty string ("") as the key to specify default tolerance for outputs not explicitly listed. + If None, SNR check will be skipped. Defaults to 20.0. + check_shapes (bool): + Whether shapes must match exactly. If this is False, this function may + permute or reshape outputs before comparison. + Defaults to True. + fail_fast (bool): + Whether the function should exit immediately after the first failure. + Defaults to False. + find_output_func (Callable(str, int, IterationResult) -> List[str]): + A callback that returns a list of output names to compare against from the provided + IterationResult, given an output name and index from another IterationResult. + The comparison function will always iterate over the output names of the + first IterationResult, expecting names from the second. A return value of + `[]` or `None` indicates that the output should be skipped. + + Returns: + Callable(IterationResult, IterationResult) -> OrderedDict[str, QualityMetricsResult]: + A callable that returns a mapping of output names to `QualityMetricsResult` s, indicating + whether the corresponding output matched based on the quality metrics. + """ + check_shapes = util.default(check_shapes, True) + default_psnr_tolerance = 30.0 + default_snr_tolerance = 20.0 + psnr_tolerance = util.default(psnr_tolerance, default_psnr_tolerance) + snr_tolerance = util.default(snr_tolerance, default_snr_tolerance) + fail_fast = util.default(fail_fast, False) + + def compute_psnr(array1, array2): + """ + Compute Peak Signal-to-Noise Ratio between two arrays. + Higher values indicate better matches. + """ + array1_cast = comp_util.cast_up(array1) + array2_cast = comp_util.cast_up(array2) + + # Compute Mean Squared Error + mse = util.array.mean(util.array.power( + util.array.subtract(array1_cast, array2_cast), 2 + )) + + # Avoid division by zero + if mse == 0: + return float('inf') # Perfect match + + # Compute data range (max value in reference array) + max_val = comp_util.compute_max(array1_cast) + if max_val <= 0: + max_val = 1.0 # Default to 1.0 if max value is non-positive + + # PSNR formula: 20 * log10(MAX) - 10 * log10(MSE) + psnr = 20 * np.log10(max_val) - 10 * np.log10(mse) + return psnr + + def compute_snr(array1, array2): + """ + Compute Signal-to-Noise Ratio between two arrays. + Higher values indicate better matches. + """ + array1_cast = comp_util.cast_up(array1) + array2_cast = comp_util.cast_up(array2) + + # Signal power + signal_power = util.array.mean(util.array.power(array1_cast, 2)) + + # Noise is the difference between the arrays + noise = util.array.subtract(array1_cast, array2_cast) + noise_power = util.array.mean(util.array.power(noise, 2)) + + # Avoid division by zero + if noise_power == 0: + return float('inf') # Perfect match + if signal_power == 0: + return -float('inf') # No signal + + # SNR formula: 10 * log10(signal_power / noise_power) + snr = 10 * np.log10(signal_power / noise_power) + return snr + + def check_outputs_match( + out0, + out0_name, + out1, + out1_name, + per_out_psnr_tol, + per_out_snr_tol, + runner0_name, + runner1_name, + ): + """ + Checks whether two outputs matched using quality metrics (PSNR and SNR). + + Args: + out0 (Union[np.array, torch.Tensor]): The first output. + out0_name (str): The name of the first output. + out1 (Union[np.array, torch.Tensor]): The second output. + out1_name (str): The name of the second output. + per_out_psnr_tol (float): The minimum PSNR value required for a match. + per_out_snr_tol (float): The minimum SNR value required for a match. + runner0_name (str): The name of the runner that generated the first output. + runner1_name (str): The name of the runner that generated the second output. + + Returns: + QualityMetricsResult: Details on whether the outputs matched. + """ + G_LOGGER.super_verbose( + f"{runner0_name:35} | Output: {out0_name} (dtype={util.array.dtype(out0)}, shape={util.array.shape(out0)}):\n{util.indent_block(out0)}" + ) + G_LOGGER.super_verbose( + f"{runner1_name:35} | Output: {out1_name} (dtype={util.array.dtype(out1)}, shape={util.array.shape(out1)}):\n{util.indent_block(out1)}" + ) + + # Compute metrics + psnr_value = None + if per_out_psnr_tol is not None: + psnr_value = compute_psnr(out0, out1) + + snr_value = None + if per_out_snr_tol is not None: + snr_value = compute_snr(out0, out1) + + # Check if outputs match based on the metrics + # Default to True for metrics that weren't computed + psnr_passed = True + if per_out_psnr_tol is not None and psnr_value is not None: + psnr_passed = bool(psnr_value >= per_out_psnr_tol) + + snr_passed = True + if per_out_snr_tol is not None and snr_value is not None: + snr_passed = bool(snr_value >= per_out_snr_tol) + + # Overall pass requires all enabled metrics to pass + passed = bool(psnr_passed and snr_passed) + + # Log information + hist_bin_range = ( + min(comp_util.compute_min(out0), comp_util.compute_min(out1)), + max(comp_util.compute_max(out0), comp_util.compute_max(out1)), + ) + comp_util.log_output_stats( + out0, not passed, f"{runner0_name}: {out0_name}", hist_range=hist_bin_range + ) + comp_util.log_output_stats( + out1, not passed, f"{runner1_name}: {out1_name}", hist_range=hist_bin_range + ) + + G_LOGGER.info(f"Quality Metrics: {out0_name}") + with G_LOGGER.indent(): + if per_out_psnr_tol is not None and psnr_value is not None: + G_LOGGER.info(f"PSNR: {psnr_value:.5g} dB (min required: {per_out_psnr_tol:.5g} dB) | {'PASSED' if psnr_passed else 'FAILED'}") + + if per_out_snr_tol is not None and snr_value is not None: + G_LOGGER.info(f"SNR: {snr_value:.5g} dB (min required: {per_out_snr_tol:.5g} dB) | {'PASSED' if snr_passed else 'FAILED'}") + + # Create a proper QualityMetricsResult object with our metrics + result = QualityMetricsResult( + passed=passed, + psnr=psnr_value, + snr=snr_value + ) + + if not passed: + if per_out_psnr_tol is not None and psnr_value is not None and not psnr_passed: + G_LOGGER.error( + f"FAILED | Output: '{out0_name}' | PSNR ({psnr_value:.5g} dB) below required minimum ({per_out_psnr_tol:.5g} dB)" + ) + if per_out_snr_tol is not None and snr_value is not None and not snr_passed: + G_LOGGER.error( + f"FAILED | Output: '{out0_name}' | SNR ({snr_value:.5g} dB) below required minimum ({per_out_snr_tol:.5g} dB)" + ) + else: + metrics_passed = [] + if per_out_psnr_tol is not None and psnr_value is not None: + metrics_passed.append("PSNR") + if per_out_snr_tol is not None and snr_value is not None: + metrics_passed.append("SNR") + + if metrics_passed: + G_LOGGER.finish( + f"PASSED | Output: '{out0_name}' | All quality metrics passed: {', '.join(metrics_passed)}" + ) + else: + G_LOGGER.warning( + f"PASSED | Output: '{out0_name}' | No quality metrics were successfully computed" + ) + + return result + + def compare_output(iter_result0, iter_result1): + """ + Compare the outputs of two runners from a single iteration using quality metrics. + + This function will always iterate over the output names of the first IterationResult, + and attempt to find corresponding output names in the second. + If no corresponding output name is found, the output is skipped. + If all output names are skipped, then this function raises an error. + + Args: + iter_result0 (IterationResult): The result of the first runner. + iter_result1 (IterationResult): The result of the second runner. + + Returns: + OrderedDict[str, QualityMetricsResult]: + The name of the outputs compared, derived from the first IterationResult, + and whether they matched. If an output name is not found, it is omitted from this dictionary. + + Raises: + PolygraphyException: If all output names are skipped, and thus no outputs are compared. + """ + def check_dict(dct, dict_name): + if isinstance(dct, dict): + util.check_sequence_contains( + dct.keys(), + set(iter_result0.keys()) | set(iter_result1.keys()) | {""}, + name=dict_name, + log_func=G_LOGGER.warning, + check_missing=False, + ) + + check_dict(psnr_tolerance, "the psnr_tolerance dictionary") + check_dict(snr_tolerance, "the snr_tolerance dictionary") + + if not check_shapes: + G_LOGGER.info( + "Strict shape checking disabled. Will attempt to match output shapes before comparisons" + ) + + def match(out0_name, output0, out1_name, output1): + per_out_psnr_tol = None + if psnr_tolerance is not None: + per_out_psnr_tol = util.value_or_from_dict(psnr_tolerance, out0_name, default_psnr_tolerance) + + per_out_snr_tol = None + if snr_tolerance is not None: + per_out_snr_tol = util.value_or_from_dict(snr_tolerance, out0_name, default_snr_tolerance) + + # Build tolerance message showing all enabled metrics + tolerance_msg = [] + if per_out_psnr_tol is not None: + tolerance_msg.append(f"PSNR min={per_out_psnr_tol:.5g} dB") + if per_out_snr_tol is not None: + tolerance_msg.append(f"SNR min={per_out_snr_tol:.5g} dB") + + if tolerance_msg: + G_LOGGER.info(f"Quality Metrics Tolerance: [{', '.join(tolerance_msg)}]") + else: + G_LOGGER.warning("No quality metrics enabled for comparison") + + G_LOGGER.extra_verbose( + f"Note: Comparing {iter_result0.runner_name} vs. {iter_result1.runner_name}" + ) + + if check_shapes and util.array.shape(output0) != util.array.shape( + output1 + ): + G_LOGGER.error( + f"FAILED | Output: `{out0_name}` | Will not compare outputs of different shapes.\n" + f"Note: Output shapes are {util.array.shape(output0)} and {util.array.shape(output1)}." + ) + G_LOGGER.error( + "Note: Use --no-shape-check or set check_shapes=False to " + "attempt to compare values anyway.", + mode=LogMode.ONCE, + ) + return False + + output1 = util.try_match_shape(output1, util.array.shape(output0)) + output0 = util.array.view( + output0, + DataType.from_dtype(util.array.dtype(output0)), + util.array.shape(output1), + ) + + outputs_matched = check_outputs_match( + output0, + out0_name, + output1, + out1_name, + per_out_psnr_tol=per_out_psnr_tol, + per_out_snr_tol=per_out_snr_tol, + runner0_name=iter_result0.runner_name, + runner1_name=iter_result1.runner_name, + ) + + return outputs_matched + + nonlocal find_output_func + find_output_func = util.default( + find_output_func, + functools.partial( + default_find_output_func, base_iter_result=iter_result0 + ), + ) + return run_comparison( + match, fail_fast, iter_result0, iter_result1, find_output_func + ) + + return compare_output + + @staticmethod + def perceptual_metrics( + lpips_threshold=None, + check_shapes=None, + fail_fast=None, + find_output_func=None, + ): + """ + Creates a function that compares two IterationResults using perceptual metrics (LPIPS), + and can be used as the `compare_func` argument in ``Comparator.compare_accuracy``. + + This function specifically targets image-like data and uses perceptual similarity metrics + that correlate better with human perception than traditional distance metrics. + + Args: + lpips_threshold (Union[float, Dict[str, float]]): + The maximum LPIPS (Learned Perceptual Image Patch Similarity) score allowed for outputs to be considered matching. + Lower values indicate more perceptually similar outputs. Typical values are below 0.1. + This can be provided on a per-output basis using a dictionary. In that case, + use an empty string ("") as the key to specify default threshold for outputs not explicitly listed. + If None, a default value of 0.1 will be used. + check_shapes (bool): + Whether shapes must match exactly. If this is False, this function may + permute or reshape outputs before comparison. + Defaults to True. + fail_fast (bool): + Whether the function should exit immediately after the first failure. + Defaults to False. + find_output_func (Callable(str, int, IterationResult) -> List[str]): + A callback that returns a list of output names to compare against from the provided + IterationResult, given an output name and index from another IterationResult. + The comparison function will always iterate over the output names of the + first IterationResult, expecting names from the second. A return value of + `[]` or `None` indicates that the output should be skipped. + + Returns: + Callable(IterationResult, IterationResult) -> OrderedDict[str, PerceptualMetricsResult]: + A callable that returns a mapping of output names to `PerceptualMetricsResult` s, indicating + whether the corresponding output matched based on the perceptual metrics. + """ + check_shapes = util.default(check_shapes, True) + default_lpips_threshold = 0.1 + lpips_threshold = util.default(lpips_threshold, default_lpips_threshold) + fail_fast = util.default(fail_fast, False) + + # Try to import torch and lpips if available + torch = None + lpips_model = None + try: + torch = mod.lazy_import("torch") + lpips = mod.lazy_import("lpips") + + # Initialize LPIPS model with explicit device specification + device = torch.device('cpu') + + # Try with different initialization approaches + try: + # First try with default initialization + lpips_model = lpips.LPIPS(net='alex', version='0.1').to(device) + except Exception as e1: + G_LOGGER.warning(f"First LPIPS initialization approach failed: {e1}. Trying alternative method...") + + try: + # Try with a different network if AlexNet fails + lpips_model = lpips.LPIPS(net='vgg', version='0.1').to(device) + except Exception as e2: + G_LOGGER.warning(f"Second LPIPS initialization approach failed: {e2}. Trying basic initialization...") + + try: + # As a last resort, try with the most basic initialization + model = lpips.LPIPS(net_type='alex') + model.eval() + lpips_model = model.to(device) + except Exception as e3: + G_LOGGER.warning( + f"Failed to initialize LPIPS model with all methods. LPIPS check will be skipped. " + f"Errors: {e1}; {e2}; {e3}" + ) + except ImportError: + G_LOGGER.warning( + "LPIPS comparison requested but torch or lpips module not found. " + "Install with: pip install torch==1.9.0 lpips==0.1.4. " + "LPIPS check will be skipped." + ) + + def compute_lpips(array1, array2): + """ + Compute LPIPS (Learned Perceptual Image Patch Similarity) between two arrays. + Lower values indicate more perceptually similar outputs. + + Requires PyTorch and the LPIPS package. + """ + if torch is None or lpips_model is None: + return None + + try: + # Cast arrays to numpy to ensure compatibility + array1_np = util.array.to_numpy(comp_util.cast_up(array1)) + array2_np = util.array.to_numpy(comp_util.cast_up(array2)) + + # Check dimensions - LPIPS expects image data + shape1 = array1_np.shape + shape2 = array2_np.shape + + # We need at least 3D arrays for LPIPS (typically B,C,H,W or H,W,C) + if len(shape1) < 3 or len(shape2) < 3: + G_LOGGER.warning(f"LPIPS requires at least 3D arrays, got shapes {shape1} and {shape2}. LPIPS check will be skipped.") + return None + + # Log input shapes for debugging + G_LOGGER.verbose(f"Original tensor shapes: {shape1} and {shape2}") + + # Get device information - use CPU for consistency + device = torch.device('cpu') + + # Convert to PyTorch tensors with shape B,C,H,W + # LPIPS expects values in range [-1, 1] for both color and grayscale images + def prepare_for_lpips(arr): + # Determine input format and convert to B,C,H,W format + if len(arr.shape) == 3: # H,W,C or C,H,W + if arr.shape[2] <= 3: # H,W,C format + # Convert H,W,C to B,C,H,W (add batch dimension) + arr = arr.transpose(2, 0, 1)[None, ...] + else: # C,H,W format + # Add batch dimension + arr = arr[None, ...] + elif len(arr.shape) == 4: # B,C,H,W or B,H,W,C + if arr.shape[3] <= 3: # B,H,W,C format + arr = arr.transpose(0, 3, 1, 2) + # else: already in B,C,H,W format + + # Convert to float and normalize to [-1, 1] range if needed + arr = arr.astype(np.float32) + if arr.max() > 1.0: + arr = arr / 255.0 + if arr.max() <= 1.0 and arr.min() >= 0.0: + arr = arr * 2.0 - 1.0 # [0,1] -> [-1,1] + + # Force 3-channel RGB format required by LPIPS + if arr.shape[1] == 1: # Grayscale (single channel) + # Repeat the channel 3 times to create RGB + arr = np.repeat(arr, 3, axis=1) + elif arr.shape[1] == 2: # Two channels + # Create a third channel (could duplicate channel 2 or create a new one) + third_channel = arr[:, 1:2] # Use second channel as the third + arr = np.concatenate([arr, third_channel], axis=1) + elif arr.shape[1] > 3: # More than 3 channels + arr = arr[:, :3] # Use only first 3 channels + + # Convert to tensor + tensor = torch.from_numpy(arr) + return tensor.float().to(device) + + # Convert both inputs to torch tensors in correct format + img1 = prepare_for_lpips(array1_np) + img2 = prepare_for_lpips(array2_np) + + G_LOGGER.verbose(f"Prepared tensor shapes: {img1.shape} and {img2.shape}") + + # Ensure tensors have the same size in all dimensions + if img1.shape != img2.shape: + G_LOGGER.warning(f"Tensor shapes don't match: {img1.shape} vs {img2.shape}. Adjusting...") + + # For channels, ensure both have 3 channels + if img1.shape[1] != 3: + if img1.shape[1] == 1: + img1 = img1.repeat(1, 3, 1, 1) + elif img1.shape[1] == 2: + img1 = torch.cat([img1, img1[:, 1:2]], dim=1) + else: # > 3 channels + img1 = img1[:, :3] + + if img2.shape[1] != 3: + if img2.shape[1] == 1: + img2 = img2.repeat(1, 3, 1, 1) + elif img2.shape[1] == 2: + img2 = torch.cat([img2, img2[:, 1:2]], dim=1) + else: # > 3 channels + img2 = img2[:, :3] + + # For spatial dimensions, resize to match + if img1.shape[2:] != img2.shape[2:]: + # Use the larger of the two spatial dimensions + target_size = (max(img1.shape[2], img2.shape[2]), max(img1.shape[3], img2.shape[3])) + + # Only import interpolate if needed + try: + from torch.nn.functional import interpolate + + if img1.shape[2:] != target_size: + img1 = interpolate(img1, size=target_size, mode='bilinear', align_corners=False) + + if img2.shape[2:] != target_size: + img2 = interpolate(img2, size=target_size, mode='bilinear', align_corners=False) + + except ImportError: + G_LOGGER.warning("Failed to resize tensors: torch.nn.functional.interpolate not available") + if img1.shape[2:] != img2.shape[2:]: + G_LOGGER.warning("Cannot compute LPIPS with tensors of different spatial dimensions") + return None + + G_LOGGER.verbose(f"Final tensor shapes: {img1.shape} and {img2.shape}") + + # Make sure the model is in eval mode + lpips_model.eval() + + # Compute LPIPS distance (using no_grad to avoid storing gradients) + with torch.no_grad(): + try: + # Try the direct method + lpips_dist = lpips_model(img1, img2) + if isinstance(lpips_dist, torch.Tensor): + lpips_dist = lpips_dist.item() + except Exception as e: + G_LOGGER.warning(f"Standard LPIPS computation failed: {e}. Trying fallback method...") + try: + # Try an alternative approach + lpips_dist = lpips_model.forward(img1, img2) + if isinstance(lpips_dist, torch.Tensor): + lpips_dist = lpips_dist.mean().item() + except Exception as e2: + G_LOGGER.warning(f"Fallback LPIPS computation failed: {e2}. LPIPS check will be skipped.") + return None + + return lpips_dist + + except Exception as e: + G_LOGGER.warning(f"Error computing LPIPS: {e}. LPIPS check will be skipped.") + return None + + def check_perceptual_metrics( + out0, + out0_name, + out1, + out1_name, + per_out_lpips_threshold, + runner0_name, + runner1_name, + ): + """ + Checks whether two outputs match using perceptual metrics. + + Args: + out0 (Union[np.array, torch.Tensor]): The first output. + out0_name (str): The name of the first output. + out1 (Union[np.array, torch.Tensor]): The second output. + out1_name (str): The name of the second output. + per_out_lpips_threshold (float): The maximum LPIPS score allowed for a match. + runner0_name (str): The name of the runner that generated the first output. + runner1_name (str): The name of the runner that generated the second output. + + Returns: + PerceptualMetricsResult: Details on whether the outputs matched. + """ + # Log input information + G_LOGGER.super_verbose( + f"{runner0_name:35} | Output: {out0_name} (dtype={util.array.dtype(out0)}, shape={util.array.shape(out0)}):\n{util.indent_block(out0)}" + ) + G_LOGGER.super_verbose( + f"{runner1_name:35} | Output: {out1_name} (dtype={util.array.dtype(out1)}, shape={util.array.shape(out1)}):\n{util.indent_block(out1)}" + ) + + # Compute LPIPS + lpips_value = compute_lpips(out0, out1) + + # Check if outputs match based on the metrics + lpips_passed = True + if lpips_value is not None: + lpips_passed = bool(lpips_value <= per_out_lpips_threshold) + + # Overall pass only depends on LPIPS for now + passed = lpips_passed + + # Log information about the outputs + hist_bin_range = ( + min(comp_util.compute_min(out0), comp_util.compute_min(out1)), + max(comp_util.compute_max(out0), comp_util.compute_max(out1)), + ) + comp_util.log_output_stats( + out0, not passed, f"{runner0_name}: {out0_name}", hist_range=hist_bin_range + ) + comp_util.log_output_stats( + out1, not passed, f"{runner1_name}: {out1_name}", hist_range=hist_bin_range + ) + + # Log perceptual metrics + G_LOGGER.info(f"Perceptual Metrics: {out0_name}") + with G_LOGGER.indent(): + if lpips_value is not None: + G_LOGGER.info(f"LPIPS: {lpips_value:.5g} (max allowed: {per_out_lpips_threshold:.5g}) | {'PASSED' if lpips_passed else 'FAILED'}") + else: + G_LOGGER.warning("LPIPS computation was skipped or failed") + + # Create a PerceptualMetricsResult object + result = PerceptualMetricsResult( + passed=passed, + lpips=lpips_value + ) + + # Log pass/fail status + if not passed: + if lpips_value is not None and not lpips_passed: + G_LOGGER.error( + f"FAILED | Output: '{out0_name}' | LPIPS ({lpips_value:.5g}) exceeds maximum threshold ({per_out_lpips_threshold:.5g})" + ) + else: + metrics_passed = [] + if lpips_value is not None: + metrics_passed.append("LPIPS") + + if metrics_passed: + G_LOGGER.finish( + f"PASSED | Output: '{out0_name}' | All perceptual metrics passed: {', '.join(metrics_passed)}" + ) + else: + G_LOGGER.warning( + f"PASSED | Output: '{out0_name}' | No perceptual metrics were successfully computed" + ) + + return result + + def compare_output(iter_result0, iter_result1): + """ + Compare the outputs of two runners from a single iteration using perceptual metrics. + + This function will always iterate over the output names of the first IterationResult, + and attempt to find corresponding output names in the second. + If no corresponding output name is found, the output is skipped. + If all output names are skipped, then this function raises an error. + + Args: + iter_result0 (IterationResult): The result of the first runner. + iter_result1 (IterationResult): The result of the second runner. + + Returns: + OrderedDict[str, PerceptualMetricsResult]: + The name of the outputs compared, derived from the first IterationResult, + and whether they matched. If an output name is not found, it is omitted from this dictionary. + + Raises: + PolygraphyException: If all output names are skipped, and thus no outputs are compared. + """ + def check_dict(dct, dict_name): + if isinstance(dct, dict): + util.check_sequence_contains( + dct.keys(), + set(iter_result0.keys()) | set(iter_result1.keys()) | {""}, + name=dict_name, + log_func=G_LOGGER.warning, + check_missing=False, + ) + + check_dict(lpips_threshold, "the lpips_threshold dictionary") + + if not check_shapes: + G_LOGGER.info( + "Strict shape checking disabled. Will attempt to match output shapes before comparisons" + ) + + def match(out0_name, output0, out1_name, output1): + per_out_lpips_threshold = util.value_or_from_dict(lpips_threshold, out0_name, default_lpips_threshold) + + # Log threshold information + G_LOGGER.info(f"Perceptual Tolerance: [LPIPS max={per_out_lpips_threshold:.5g}]") + G_LOGGER.extra_verbose( + f"Note: Comparing {iter_result0.runner_name} vs. {iter_result1.runner_name}" + ) + + if check_shapes and util.array.shape(output0) != util.array.shape( + output1 + ): + G_LOGGER.error( + f"FAILED | Output: `{out0_name}` | Will not compare outputs of different shapes.\n" + f"Note: Output shapes are {util.array.shape(output0)} and {util.array.shape(output1)}." + ) + G_LOGGER.error( + "Note: Use --no-shape-check or set check_shapes=False to " + "attempt to compare values anyway.", + mode=LogMode.ONCE, + ) + return False + + output1 = util.try_match_shape(output1, util.array.shape(output0)) + output0 = util.array.view( + output0, + DataType.from_dtype(util.array.dtype(output0)), + util.array.shape(output1), + ) + + outputs_matched = check_perceptual_metrics( + output0, + out0_name, + output1, + out1_name, + per_out_lpips_threshold=per_out_lpips_threshold, + runner0_name=iter_result0.runner_name, + runner1_name=iter_result1.runner_name, + ) + + return outputs_matched + + nonlocal find_output_func + find_output_func = util.default( + find_output_func, + functools.partial( + default_find_output_func, base_iter_result=iter_result0 + ), + ) + return run_comparison( + match, fail_fast, iter_result0, iter_result1, find_output_func + ) + + return compare_output diff --git a/tools/Polygraphy/polygraphy/comparator/data_loader.py b/tools/Polygraphy/polygraphy/comparator/data_loader.py index c7d1655f5..1ca8374e6 100644 --- a/tools/Polygraphy/polygraphy/comparator/data_loader.py +++ b/tools/Polygraphy/polygraphy/comparator/data_loader.py @@ -423,6 +423,10 @@ def __init__(self, data_loader, save_inputs_path=None): self.cache = [] # List[OrderedDict[str, numpy.ndarray]] self.save_inputs_path = save_inputs_path + @func.constantmethod + def __len__(self): + return len(self.cache) + @func.constantmethod def __getitem__(self, iteration): """ diff --git a/tools/Polygraphy/polygraphy/util/array.py b/tools/Polygraphy/polygraphy/util/array.py index 57231ff88..52a1b0a6f 100644 --- a/tools/Polygraphy/polygraphy/util/array.py +++ b/tools/Polygraphy/polygraphy/util/array.py @@ -1327,3 +1327,93 @@ def where(): "numpy": lambda cond, lhs, rhs: np.where(cond, lhs, rhs), "torch": lambda cond, lhs, rhs: torch.where(cond, lhs, rhs), } + + +@mod.export() +@dispatch(num_arrays=2) +def power(): + """ + Computes the element-wise power of an array to the given exponent. + + Args: + obj (Union[torch.Tensor, numpy.ndarray]): + The base array or tensor. + exponent (Union[int, float, torch.Tensor, numpy.ndarray]): + The exponent value or array. + + Returns: + Union[torch.Tensor, numpy.ndarray]: The power result. + + Raises: + PolygraphyException: if the input is of an unrecognized type. + """ + return { + "numpy": lambda obj, exponent: np.power(obj, exponent), + "torch": lambda obj, exponent: torch.pow(obj, exponent), + } + + +@mod.export() +@dispatch() +def sum(): + """ + Computes the sum of all elements in the array. + + Args: + obj (Union[torch.Tensor, numpy.ndarray]): The array or tensor. + + Returns: + Union[Number, torch.Tensor, numpy.ndarray]: The sum of all elements. + + Raises: + PolygraphyException: if the input is of an unrecognized type. + """ + return { + "numpy": lambda obj: np.sum(obj), + "torch": lambda obj: torch.sum(obj), + } + + +@mod.export() +@dispatch() +def sqrt(): + """ + Computes the element-wise square root of an array. + + Args: + obj (Union[torch.Tensor, numpy.ndarray]): The array or tensor. + + Returns: + Union[torch.Tensor, numpy.ndarray]: The square root results. + + Raises: + PolygraphyException: if the input is of an unrecognized type. + """ + return { + "numpy": lambda obj: np.sqrt(obj), + "torch": lambda obj: torch.sqrt(obj), + } + + +@mod.export() +@dispatch(num_arrays=2) +def multiply(): + """ + Computes the element-wise multiplication of two arrays. + + Args: + lhs (Union[torch.Tensor, numpy.ndarray]): + The first array or tensor. + rhs (Union[torch.Tensor, numpy.ndarray]): + The second array or tensor. + + Returns: + Union[torch.Tensor, numpy.ndarray]: The element-wise product. + + Raises: + PolygraphyException: if the input is of an unrecognized type. + """ + return { + "numpy": lambda lhs, rhs: np.multiply(lhs, rhs), + "torch": lambda lhs, rhs: torch.mul(lhs, rhs), + } diff --git a/tools/Polygraphy/tests/cuda/test_cuda.py b/tools/Polygraphy/tests/cuda/test_cuda.py index 7552f2f6e..40736e5b4 100644 --- a/tools/Polygraphy/tests/cuda/test_cuda.py +++ b/tools/Polygraphy/tests/cuda/test_cuda.py @@ -156,7 +156,7 @@ def test_copy_from_overhead(self): copy_from_time = time_func(lambda: dev_buf.copy_from(host_buf)) print(f"memcpy time: {memcpy_time}, copy_from time: {copy_from_time}") - assert copy_from_time <= (memcpy_time * 1.08) + assert copy_from_time <= (memcpy_time * 1.12) @pytest.mark.flaky @pytest.mark.serial @@ -175,7 +175,7 @@ def test_copy_to_overhead(self): copy_to_time = time_func(lambda: dev_buf.copy_to(host_buf)) print(f"memcpy time: {memcpy_time}, copy_to time: {copy_to_time}") - assert copy_to_time <= (memcpy_time * 1.08) + assert copy_to_time <= (memcpy_time * 1.12) def test_raw(self): with DeviceArray.raw((25,)) as buf: diff --git a/tools/onnx-graphsurgeon/CHANGELOG.md b/tools/onnx-graphsurgeon/CHANGELOG.md index fd4c3d87f..00285bfef 100644 --- a/tools/onnx-graphsurgeon/CHANGELOG.md +++ b/tools/onnx-graphsurgeon/CHANGELOG.md @@ -2,6 +2,12 @@ Dates are in YYYY-MM-DD format. +## v0.5.8 (2025-04-08) + +### Fixed + +- Unpin ONNX version <= 1.16.1 + ## v0.5.7 (2025-03-24) ### Fixed diff --git a/tools/onnx-graphsurgeon/onnx_graphsurgeon/__init__.py b/tools/onnx-graphsurgeon/onnx_graphsurgeon/__init__.py index f0ae9e6a1..676e28255 100644 --- a/tools/onnx-graphsurgeon/onnx_graphsurgeon/__init__.py +++ b/tools/onnx-graphsurgeon/onnx_graphsurgeon/__init__.py @@ -7,4 +7,4 @@ from onnx_graphsurgeon.ir.tensor import Constant, Tensor, Variable from onnx_graphsurgeon.util.exception import OnnxGraphSurgeonException -__version__ = "0.5.7" +__version__ = "0.5.8" From b872b2b2319d736e6c30bad9bc2acef2dedcb717 Mon Sep 17 00:00:00 2001 From: Asfiya Baig Date: Tue, 20 May 2025 10:12:35 -0700 Subject: [PATCH 2/6] Update changelog date Signed-off-by: Asfiya Baig --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 93062dc53..3dbd66a92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # TensorRT OSS Release Changelog -## 10.11.0 GA - 2025-5-16 +## 10.11.0 GA - 2025-5-20 Key Features and Updates: From d2477734f66395275fd727e120a8d1b3f686d2cf Mon Sep 17 00:00:00 2001 From: Asfiya Baig Date: Tue, 20 May 2025 10:15:27 -0700 Subject: [PATCH 3/6] Update ONNX parser Signed-off-by: Asfiya Baig --- parsers/onnx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parsers/onnx b/parsers/onnx index 3b9c961a4..745bde22c 160000 --- a/parsers/onnx +++ b/parsers/onnx @@ -1 +1 @@ -Subproject commit 3b9c961a4318cea6fa1fa5f064562064eb27a9bd +Subproject commit 745bde22c2fe883968cf18cc9ebdfb2e2985166d From e9769da70536ffaa3ac2029978c1ca90b39558bc Mon Sep 17 00:00:00 2001 From: Asfiya Baig Date: Tue, 20 May 2025 13:13:08 -0700 Subject: [PATCH 4/6] add shouldCompileKernel Signed-off-by: Asfiya Baig --- cmake/modules/ShouldCompileKernel.cmake | 41 +++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 cmake/modules/ShouldCompileKernel.cmake diff --git a/cmake/modules/ShouldCompileKernel.cmake b/cmake/modules/ShouldCompileKernel.cmake new file mode 100644 index 000000000..e01928f97 --- /dev/null +++ b/cmake/modules/ShouldCompileKernel.cmake @@ -0,0 +1,41 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Certain cubins are binary compatible between different SM versions, so they are reused. +# This function checks if a SM-named file should be compiled based on current SM enablement. +# Specifically, the SM80 files are compiled if either 80, 86, or 89 are enabled. +function(should_compile_kernel SM OUT_VAR) + # If the target SM is any of 80/86/89, we need to check if any of those are enabled in CMAKE_CUDA_ARCHITECTURES. + if((${SM} EQUAL 80) OR (${SM} EQUAL 86) OR (${SM} EQUAL 89)) + list(FIND CMAKE_CUDA_ARCHITECTURES 80 SM80_INDEX) + list(FIND CMAKE_CUDA_ARCHITECTURES 86 SM86_INDEX) + list(FIND CMAKE_CUDA_ARCHITECTURES 89 SM89_INDEX) + if((NOT ${SM80_INDEX} EQUAL -1) OR + (NOT ${SM86_INDEX} EQUAL -1) OR + (NOT ${SM89_INDEX} EQUAL -1) + ) + set(${OUT_VAR} TRUE PARENT_SCOPE) + else() + set(${OUT_VAR} FALSE PARENT_SCOPE) + endif() + else() + list(FIND CMAKE_CUDA_ARCHITECTURES ${SM} SM_INDEX) + if (NOT ${SM_INDEX} EQUAL -1) + set(${OUT_VAR} TRUE PARENT_SCOPE) + else() + set(${OUT_VAR} FALSE PARENT_SCOPE) + endif() + endif() +endfunction() From b375794340f1cefd57b59aa6ae8b8cb424f151f9 Mon Sep 17 00:00:00 2001 From: Asfiya Baig Date: Wed, 21 May 2025 15:44:09 -0700 Subject: [PATCH 5/6] changelog updates Signed-off-by: Asfiya Baig --- CHANGELOG.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3dbd66a92..efa327cf9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,11 +1,10 @@ # TensorRT OSS Release Changelog -## 10.11.0 GA - 2025-5-20 +## 10.11.0 GA - 2025-5-21 Key Features and Updates: - Plugin changes - - Migrated `IPluginV2`-descendent version 1 of `cropAndResizePluginDynamic`, to version 2, which implements `IPluginV3`. - Migrated `IPluginV2`-descendent version 1 of `DisentangledAttention_TRT`, to version 2, which implements `IPluginV3`. - Migrated `IPluginV2`-descendent version 1 of `MultiscaleDeformableAttnPlugin_TRT`, to version 2, which implements `IPluginV3`. - Note: The newer versions preserve the attributes and I/O of the corresponding older plugin version. The older plugin versions are deprecated and will be removed in a future release. From ed510de6055cfdf72045ed52400212ff26e05581 Mon Sep 17 00:00:00 2001 From: Asfiya Baig Date: Wed, 21 May 2025 15:54:10 -0700 Subject: [PATCH 6/6] Update changelog plugin Signed-off-by: Asfiya Baig --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index efa327cf9..2b5a41da5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ Key Features and Updates: - Plugin changes + - Migrated `IPluginV2`-descendent version 1 of `modulatedDeformConvPlugin`, to version 2, which implements `IPluginV3`. - Migrated `IPluginV2`-descendent version 1 of `DisentangledAttention_TRT`, to version 2, which implements `IPluginV3`. - Migrated `IPluginV2`-descendent version 1 of `MultiscaleDeformableAttnPlugin_TRT`, to version 2, which implements `IPluginV3`. - Note: The newer versions preserve the attributes and I/O of the corresponding older plugin version. The older plugin versions are deprecated and will be removed in a future release.