NVIDIA
diff --git a/‎.clang-format
Lines changed: 1 addition & 1 deletion b/‎.clang-format
Lines changed: 1 addition & 1 deletion
diff --git a/‎CHANGELOG.md
Lines changed: 49 additions & 1 deletion b/‎CHANGELOG.md
Lines changed: 49 additions & 1 deletion
diff --git a/‎CMakeLists.txt
Lines changed: 1 addition & 1 deletion b/‎CMakeLists.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎LICENSE
Lines changed: 20 additions & 1 deletion b/‎LICENSE
Lines changed: 20 additions & 1 deletion
diff --git a/‎README.md
Lines changed: 28 additions & 28 deletions b/‎README.md
Lines changed: 28 additions & 28 deletions
diff --git a/‎VERSION
Lines changed: 1 addition & 1 deletion b/‎VERSION
Lines changed: 1 addition & 1 deletion
@@ -74,7 +74,7 @@ SpacesInContainerLiterals: true
 SpacesInParentheses: false
 SpacesInSquareBrackets: false
 Standard:        Cpp11
-StatementMacros: [API_ENTRY_TRY]
+StatementMacros: [API_ENTRY_TRY,TRT_TRY]
 TabWidth:        4
 UseTab:          Never
 ...
@@ -1,6 +1,54 @@
 # TensorRT OSS Release Changelog
 
-## 10.2.0 GA - 2024-07-10
+## 10.4.0 GA - 2024-09-11
+Key Features and Updates:
+
+- Demo changes
+    - Added [Stable Cascade](demo/Diffusion) pipeline.
+    - Enabled INT8 and FP8 quantization for Stable Diffusion v1.5, v2.0 and v2.1 pipelines.
+    - Enabled FP8 quantization for Stable Diffusion XL pipeline.
+- Sample changes
+    - Add a new python sample `aliased_io_plugin` which demonstrates how in-place updates to plugin inputs can be achieved through I/O aliasing.
+- Plugin changes
+    - Migrated IPluginV2-descendent versions (a) of the following plugins to newer versions (b) which implement IPluginV3 (a->b):
+        - scatterElementsPlugin (1->2)
+        - skipLayerNormPlugin (1->5, 2->6, 3->7, 4->8)
+        - embLayerNormPlugin (2->4, 3->5)
+        - bertQKVToContextPlugin (1->4, 2->5, 3->6)
+    - Note
+        - The newer versions preserve the corresponding attributes and I/O of the corresponding older plugin version.
+        - The older plugin versions are deprecated and will be removed in a future release.
+
+- Quickstart guide
+    - Updated deploy_to_triton guide and removed legacy APIs.
+    - Removed legacy TF-TRT code as the project is no longer supported.
+    - Removed quantization_tutorial as pytorch_quantization has been deprecated. Check out https://github.com/NVIDIA/TensorRT-Model-Optimizer for the latest quantization support. Check [Stable Diffusion XL (Base/Turbo) and Stable Diffusion 1.5 Quantization with Model Optimizer](https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/diffusers/quantization) for integration with TensorRT.
+- Parser changes
+    - Added support for tensor `axes` for `Pad` operations.
+    - Added support for `BlackmanWindow`, `HammingWindow`, and `HannWindow` operations.
+    - Improved error handling in `IParserRefitter`.
+    - Fixed kernel shape inference in multi-input convolutions.
+
+- Updated tooling
+    - polygraphy-extension-trtexec v0.0.9
+
+## 10.3.0 GA - 2024-08-02
+
+Key Features and Updates:
+
+ - Demo changes
+   - Added [Stable Video Diffusion](demo/Diffusion)(`SVD`) pipeline.
+ - Plugin changes
+   - Deprecated Version 1 of [ScatterElements plugin](plugin/scatterElementsPlugin). It is superseded by Version 2, which implements the `IPluginV3` interface.
+ - Quickstart guide
+   - Updated the [SemanticSegmentation](quickstart/SemanticSegmentation) guide with latest APIs.
+ - Parser changes
+   - Added support for tensor `axes` inputs for `Slice` node.
+   - Updated `ScatterElements` importer to use Version 2 of [ScatterElements plugin](plugin/scatterElementsPlugin), which implements the `IPluginV3` interface.
+ - Updated tooling
+   - Polygraphy v0.49.13
+
+## 10.2.0 GA - 2024-07-09
 
 Key Features and Updates:
 
 
@@ -80,7 +80,7 @@ option(BUILD_PARSERS "Build TensorRT parsers" ON)
 option(BUILD_SAMPLES "Build TensorRT samples" ON)
 
 # C++14
-set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 set(CMAKE_CXX_EXTENSIONS OFF)
 
 
@@ -337,10 +337,11 @@
      limitations under the License.
 
    > demo/Diffusion/utilities.py
+   > demo/Diffusion/stable_video_diffusion_pipeline.py
 
      HuggingFace diffusers library.
 
-     Copyright 2022 The HuggingFace Team.
+     Copyright 2024 The HuggingFace Team.
 
      Licensed under the Apache License, Version 2.0 (the "License");
      you may not use this file except in compliance with the License.
@@ -380,3 +381,21 @@
       LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
       OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
       SOFTWARE.
+
+   > demo/Diffusion/utilities.py
+
+      ModelScope library.
+
+      Copyright (c) Alibaba, Inc. and its affiliates.
+
+      Licensed under the Apache License, Version 2.0 (the "License");
+      you may not use this file except in compliance with the License.
+      You may obtain a copy of the License at
+
+         http://www.apache.org/licenses/LICENSE-2.0
+
+      Unless required by applicable law or agreed to in writing, software
+      distributed under the License is distributed on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+      See the License for the specific language governing permissions and
+      limitations under the License.
@@ -26,13 +26,13 @@ You can skip the **Build** section to enjoy TensorRT with Python.
 To build the TensorRT-OSS components, you will first need the following software packages.
 
 **TensorRT GA build**
-* TensorRT v10.2.0.19
+* TensorRT v10.4.0.26
   * Available from direct download links listed below
 
 **System Packages**
 * [CUDA](https://developer.nvidia.com/cuda-toolkit)
   * Recommended versions:
-  * cuda-12.5.0 + cuDNN-8.9
+  * cuda-12.6.0 + cuDNN-8.9
   * cuda-11.8.0 + cuDNN-8.9
 * [GNU make](https://ftp.gnu.org/gnu/make/) >= v4.1
 * [cmake](https://github.com/Kitware/CMake/releases) >= v3.13
@@ -73,25 +73,25 @@ To build the TensorRT-OSS components, you will first need the following software
     If using the TensorRT OSS build container, TensorRT libraries are preinstalled under `/usr/lib/x86_64-linux-gnu` and you may skip this step.
 
     Else download and extract the TensorRT GA build from [NVIDIA Developer Zone](https://developer.nvidia.com) with the direct links below:
-      - [TensorRT 10.2.0.19 for CUDA 11.8, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.2.0/tars/TensorRT-10.2.0.19.Linux.x86_64-gnu.cuda-11.8.tar.gz)
-      - [TensorRT 10.2.0.19 for CUDA 12.5, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.2.0/tars/TensorRT-10.2.0.19.Linux.x86_64-gnu.cuda-12.5.tar.gz)
-      - [TensorRT 10.2.0.19 for CUDA 11.8, Windows x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.2.0/zip/TensorRT-10.2.0.19.Windows.win10.cuda-11.8.zip)
-      - [TensorRT 10.2.0.19 for CUDA 12.5, Windows x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.2.0/zip/TensorRT-10.2.0.19.Windows.win10.cuda-12.5.zip)
+      - [TensorRT 10.4.0.26 for CUDA 11.8, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/tars/TensorRT-10.4.0.26.Linux.x86_64-gnu.cuda-11.8.tar.gz)
+      - [TensorRT 10.4.0.26 for CUDA 12.6, Linux x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/tars/TensorRT-10.4.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz)
+      - [TensorRT 10.4.0.26 for CUDA 11.8, Windows x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/zip/TensorRT-10.4.0.26.Windows.win10.cuda-11.8.zip)
+      - [TensorRT 10.4.0.26 for CUDA 12.6, Windows x86_64](https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.4.0/zip/TensorRT-10.4.0.26.Windows.win10.cuda-12.6.zip)
 
 
-    **Example: Ubuntu 20.04 on x86-64 with cuda-12.5**
+    **Example: Ubuntu 20.04 on x86-64 with cuda-12.6**
 
     ```bash
     cd ~/Downloads
-    tar -xvzf TensorRT-10.2.0.19.Linux.x86_64-gnu.cuda-12.5.tar.gz
-    export TRT_LIBPATH=`pwd`/TensorRT-10.2.0.19
+    tar -xvzf TensorRT-10.4.0.26.Linux.x86_64-gnu.cuda-12.6.tar.gz
+    export TRT_LIBPATH=`pwd`/TensorRT-10.4.0.26
     ```
 
-    **Example: Windows on x86-64 with cuda-12.5**
+    **Example: Windows on x86-64 with cuda-12.6**
 
     ```powershell
-    Expand-Archive -Path TensorRT-10.2.0.19.Windows.win10.cuda-12.5.zip
-    $env:TRT_LIBPATH="$pwd\TensorRT-10.2.0.19\lib"
+    Expand-Archive -Path TensorRT-10.4.0.26.Windows.win10.cuda-12.6.zip
+    $env:TRT_LIBPATH="$pwd\TensorRT-10.4.0.26\lib"
     ```
 
 ## Setting Up The Build Environment
@@ -101,27 +101,27 @@ For Linux platforms, we recommend that you generate a docker container for build
 1. #### Generate the TensorRT-OSS build container.
     The TensorRT-OSS build container can be generated using the supplied Dockerfiles and build scripts. The build containers are configured for building TensorRT OSS out-of-the-box.
 
-    **Example: Ubuntu 20.04 on x86-64 with cuda-12.5 (default)**
+    **Example: Ubuntu 20.04 on x86-64 with cuda-12.6 (default)**
     ```bash
-    ./docker/build.sh --file docker/ubuntu-20.04.Dockerfile --tag tensorrt-ubuntu20.04-cuda12.5
+    ./docker/build.sh --file docker/ubuntu-20.04.Dockerfile --tag tensorrt-ubuntu20.04-cuda12.6
     ```
-    **Example: Rockylinux8 on x86-64 with cuda-12.5**
+    **Example: Rockylinux8 on x86-64 with cuda-12.6**
     ```bash
-    ./docker/build.sh --file docker/rockylinux8.Dockerfile --tag tensorrt-rockylinux8-cuda12.5
+    ./docker/build.sh --file docker/rockylinux8.Dockerfile --tag tensorrt-rockylinux8-cuda12.6
     ```
-    **Example: Ubuntu 22.04 cross-compile for Jetson (aarch64) with cuda-12.5 (JetPack SDK)**
+    **Example: Ubuntu 22.04 cross-compile for Jetson (aarch64) with cuda-12.6 (JetPack SDK)**
     ```bash
-    ./docker/build.sh --file docker/ubuntu-cross-aarch64.Dockerfile --tag tensorrt-jetpack-cuda12.5
+    ./docker/build.sh --file docker/ubuntu-cross-aarch64.Dockerfile --tag tensorrt-jetpack-cuda12.6
     ```
-    **Example: Ubuntu 22.04 on aarch64 with cuda-12.5**
+    **Example: Ubuntu 22.04 on aarch64 with cuda-12.6**
     ```bash
-    ./docker/build.sh --file docker/ubuntu-22.04-aarch64.Dockerfile --tag tensorrt-aarch64-ubuntu22.04-cuda12.5
+    ./docker/build.sh --file docker/ubuntu-22.04-aarch64.Dockerfile --tag tensorrt-aarch64-ubuntu22.04-cuda12.6
     ```
 
 2. #### Launch the TensorRT-OSS build container.
     **Example: Ubuntu 20.04 build container**
 	```bash
-	./docker/launch.sh --tag tensorrt-ubuntu20.04-cuda12.5 --gpus all
+	./docker/launch.sh --tag tensorrt-ubuntu20.04-cuda12.6 --gpus all
 	```
 	> NOTE:
   <br> 1. Use the `--tag` corresponding to build container generated in Step 1.
@@ -132,38 +132,38 @@ For Linux platforms, we recommend that you generate a docker container for build
 ## Building TensorRT-OSS
 * Generate Makefiles and build.
 
-    **Example: Linux (x86-64) build with default cuda-12.5**
+    **Example: Linux (x86-64) build with default cuda-12.6**
 	```bash
 	cd $TRT_OSSPATH
 	mkdir -p build && cd build
 	cmake .. -DTRT_LIB_DIR=$TRT_LIBPATH -DTRT_OUT_DIR=`pwd`/out
 	make -j$(nproc)
 	```
-    **Example: Linux (aarch64) build with default cuda-12.5**
+    **Example: Linux (aarch64) build with default cuda-12.6**
 	```bash
 	cd $TRT_OSSPATH
 	mkdir -p build && cd build
 	cmake .. -DTRT_LIB_DIR=$TRT_LIBPATH -DTRT_OUT_DIR=`pwd`/out -DCMAKE_TOOLCHAIN_FILE=$TRT_OSSPATH/cmake/toolchains/cmake_aarch64-native.toolchain
 	make -j$(nproc)
 	```
-    **Example: Native build on Jetson (aarch64) with cuda-12.5**
+    **Example: Native build on Jetson (aarch64) with cuda-12.6**
 	```bash
 	cd $TRT_OSSPATH
 	mkdir -p build && cd build
-	cmake .. -DTRT_LIB_DIR=$TRT_LIBPATH -DTRT_OUT_DIR=`pwd`/out -DTRT_PLATFORM_ID=aarch64 -DCUDA_VERSION=12.5
+	cmake .. -DTRT_LIB_DIR=$TRT_LIBPATH -DTRT_OUT_DIR=`pwd`/out -DTRT_PLATFORM_ID=aarch64 -DCUDA_VERSION=12.6
   CC=/usr/bin/gcc make -j$(nproc)
 	```
   > NOTE: C compiler must be explicitly specified via CC= for native aarch64 builds of protobuf.
 
-    **Example: Ubuntu 22.04 Cross-Compile for Jetson (aarch64) with cuda-12.5 (JetPack)**
+    **Example: Ubuntu 22.04 Cross-Compile for Jetson (aarch64) with cuda-12.6 (JetPack)**
 	```bash
 	cd $TRT_OSSPATH
 	mkdir -p build && cd build
-	cmake .. -DCMAKE_TOOLCHAIN_FILE=$TRT_OSSPATH/cmake/toolchains/cmake_aarch64.toolchain -DCUDA_VERSION=12.5 -DCUDNN_LIB=/pdk_files/cudnn/usr/lib/aarch64-linux-gnu/libcudnn.so -DCUBLAS_LIB=/usr/local/cuda-12.5/targets/aarch64-linux/lib/stubs/libcublas.so -DCUBLASLT_LIB=/usr/local/cuda-12.5/targets/aarch64-linux/lib/stubs/libcublasLt.so -DTRT_LIB_DIR=/pdk_files/tensorrt/lib
+	cmake .. -DCMAKE_TOOLCHAIN_FILE=$TRT_OSSPATH/cmake/toolchains/cmake_aarch64.toolchain -DCUDA_VERSION=12.6 -DCUDNN_LIB=/pdk_files/cudnn/usr/lib/aarch64-linux-gnu/libcudnn.so -DCUBLAS_LIB=/usr/local/cuda-12.6/targets/aarch64-linux/lib/stubs/libcublas.so -DCUBLASLT_LIB=/usr/local/cuda-12.6/targets/aarch64-linux/lib/stubs/libcublasLt.so -DTRT_LIB_DIR=/pdk_files/tensorrt/lib
 	make -j$(nproc)
 	```
 
-    **Example: Native builds on Windows (x86) with cuda-12.5**
+    **Example: Native builds on Windows (x86) with cuda-12.6**
 	```powershell
 	cd $TRT_OSSPATH
 	mkdir -p build
 
@@ -1 +1 @@
-10.2.0.19
+10.4.0.26