@@ -1677,35 +1677,41 @@ def acc_ops_logical_xor(
1677
1677
)
1678
1678
1679
1679
1680
- @tensorrt_converter (acc_ops .isinf )
1681
- def acc_ops_isinf (
1682
- network : TRTNetwork ,
1683
- target : Target ,
1684
- args : Tuple [Argument , ...],
1685
- kwargs : Dict [str , Argument ],
1686
- name : str ,
1687
- ) -> Union [TRTTensor , Sequence [TRTTensor ]]:
1688
- input_t = kwargs ["input" ]
1689
- if not isinstance (input_t , TRTTensor ):
1690
- raise RuntimeError (
1691
- f"isinf received input { input_t } that is not part "
1692
- "of the TensorRT region!"
1693
- )
1694
- inf_t = torch .ones (tuple (input_t .shape ))
1695
- inf_t = inf_t * float ("inf" )
1696
- inf_t = get_trt_tensor (network , inf_t , f"{ name } _inf_t" )
1697
-
1698
- ninf_t = torch .ones (tuple (input_t .shape ))
1699
- ninf_t = ninf_t * float ("-inf" )
1700
- ninf_t = get_trt_tensor (network , ninf_t , f"{ name } _ninf_t" )
1701
-
1702
- kwargs_new = {"input" : input_t , "other" : inf_t }
1703
- inf_output = acc_ops_eq (network , target , None , kwargs_new , name + "_compare_inf" )
1704
- kwargs_new = {"input" : input_t , "other" : ninf_t }
1705
- ninf_output = acc_ops_eq (network , target , None , kwargs_new , name + "_compare_ninf" )
1706
- kwargs_new = {"input" : inf_output , "other" : ninf_output }
1707
- output = acc_ops_logical_or (network , target , None , kwargs_new , name + "_compare" )
1708
- return output
1680
+ # T113156424 Have some accuracy problems in hf_T5.
1681
+ # [TRT] [W] Weights [name=isinf_1_inf_t]: Converted FP32 value in weights (either FP32 infinity or FP32 value outside FP16 range) to corresponding FP16 infinity. If this is not the desired behavior, please modify the weights or retrain with regularization to reduce the magnitude of the weights.
1682
+ # @tensorrt_converter(acc_ops.isinf)
1683
+ # def acc_ops_isinf(
1684
+ # network: TRTNetwork,
1685
+ # target: Target,
1686
+ # args: Tuple[Argument, ...],
1687
+ # kwargs: Dict[str, Argument],
1688
+ # name: str,
1689
+ # ) -> Union[TRTTensor, Sequence[TRTTensor]]:
1690
+ # input_t = kwargs["input"]
1691
+ # if not isinstance(input_t, TRTTensor):
1692
+ # raise RuntimeError(
1693
+ # f"isinf received input {input_t} that is not part "
1694
+ # "of the TensorRT region!"
1695
+ # )
1696
+ # tdtype = torch_dtype_from_trt(input_t.dtype)
1697
+
1698
+ # inf_t = torch.ones(tuple(input_t.shape))
1699
+ # inf_t = inf_t * float("inf")
1700
+ # inf_t = inf_t.to(tdtype)
1701
+ # inf_t = get_trt_tensor(network, inf_t, f"{name}_inf_t")
1702
+
1703
+ # ninf_t = torch.ones(tuple(input_t.shape))
1704
+ # ninf_t = ninf_t * float("-inf")
1705
+ # ninf_t = ninf_t.to(tdtype)
1706
+ # ninf_t = get_trt_tensor(network, ninf_t, f"{name}_ninf_t")
1707
+
1708
+ # kwargs_new = {"input": input_t, "other": inf_t}
1709
+ # inf_output = acc_ops_eq(network, target, None, kwargs_new, name + "_compare_inf")
1710
+ # kwargs_new = {"input": input_t, "other": ninf_t}
1711
+ # ninf_output = acc_ops_eq(network, target, None, kwargs_new, name + "_compare_ninf")
1712
+ # kwargs_new = {"input": inf_output, "other": ninf_output}
1713
+ # output = acc_ops_logical_or(network, target, None, kwargs_new, name + "_compare")
1714
+ # return output
1709
1715
1710
1716
1711
1717
@tensorrt_converter (acc_ops .any )
@@ -1785,68 +1791,70 @@ def acc_ops_fmod(
1785
1791
return sub_value
1786
1792
1787
1793
1788
- @tensorrt_converter (acc_ops .embedding , no_implicit_batch_dim = True )
1789
- def acc_ops_embedding (
1790
- network : TRTNetwork ,
1791
- target : Target ,
1792
- args : Tuple [Argument , ...],
1793
- kwargs : Dict [str , Argument ],
1794
- name : str ,
1795
- ) -> Union [TRTTensor , Sequence [TRTTensor ]]:
1796
- if network .has_implicit_batch_dimension :
1797
- raise RuntimeError (
1798
- "The `embedding` function should be called with explicit batch dimension."
1799
- )
1800
-
1801
- indices_tensor = kwargs ["input" ]
1802
- embedding_tensor = kwargs ["weight" ]
1803
- if isinstance (indices_tensor , torch .Tensor ) and indices_tensor .dtype == torch .int64 :
1804
- indices_tensor = indices_tensor .to (torch .int32 )
1805
- warnings .warn (
1806
- "Embedding op has indices_tensor dtype=int64. Reduce it to int32 to run on TRT. Accuracy may not be correct!"
1807
- )
1808
- if (
1809
- isinstance (embedding_tensor , torch .Tensor )
1810
- and embedding_tensor .dtype == torch .int64
1811
- ):
1812
- embedding_tensor = embedding_tensor .to (torch .int32 )
1813
- warnings .warn (
1814
- "Embedding op has embedding_tensor dtype=int64. Reduce it to int32 to run on TRT. Accuracy may not be correct!"
1815
- )
1816
- indices_tensor = get_trt_tensor (network , indices_tensor , f"{ name } _indices_tensor" )
1817
- embedding_tensor = get_trt_tensor (
1818
- network , embedding_tensor , f"{ name } _embedding_tensor"
1819
- )
1820
-
1821
- # unsupported parameters
1822
- # ignore padding_idx since it is meaningful for training only
1823
- max_norm = kwargs ["max_norm" ]
1824
- norm_type = kwargs ["norm_type" ]
1825
- scale_grad_by_freq = kwargs ["scale_grad_by_freq" ]
1826
- sparse = kwargs ["sparse" ]
1827
-
1828
- if max_norm is not None :
1829
- raise RuntimeError (
1830
- f"Currently we don't support specifying max_norm, got { max_norm } ."
1831
- )
1832
-
1833
- if norm_type != 2.0 :
1834
- raise RuntimeError (
1835
- f"Currently we don't support specifying max_norm, got { norm_type } for norm_type."
1836
- )
1837
-
1838
- if scale_grad_by_freq :
1839
- raise RuntimeError (
1840
- "Currently we don't support scale gradient by word frequency."
1841
- )
1842
-
1843
- if sparse :
1844
- raise RuntimeError ("Currently we don't support sparse gradient." )
1845
-
1846
- # Implement embedding lookup with gather layer
1847
- gather_layer = network .add_gather (embedding_tensor , indices_tensor , axis = 0 )
1848
- set_layer_name (gather_layer , target , name + "_gather" )
1849
- return gather_layer .get_output (0 )
1794
+ # T113156424 embedding implemenatation is very limited and shows no usage in hf models due to the indices are int64.
1795
+ # if we cast to int32, it will create accuracy issues. We'd better leave it to future implementation.
1796
+ # @tensorrt_converter(acc_ops.embedding, no_implicit_batch_dim=True)
1797
+ # def acc_ops_embedding(
1798
+ # network: TRTNetwork,
1799
+ # target: Target,
1800
+ # args: Tuple[Argument, ...],
1801
+ # kwargs: Dict[str, Argument],
1802
+ # name: str,
1803
+ # ) -> Union[TRTTensor, Sequence[TRTTensor]]:
1804
+ # if network.has_implicit_batch_dimension:
1805
+ # raise RuntimeError(
1806
+ # "The `embedding` function should be called with explicit batch dimension."
1807
+ # )
1808
+
1809
+ # indices_tensor = kwargs["input"]
1810
+ # embedding_tensor = kwargs["weight"]
1811
+ # if isinstance(indices_tensor, torch.Tensor) and indices_tensor.dtype == torch.int64:
1812
+ # indices_tensor = indices_tensor.to(torch.int32)
1813
+ # warnings.warn(
1814
+ # "Embedding op has indices_tensor dtype=int64. Reduce it to int32 to run on TRT. Accuracy may not be correct!"
1815
+ # )
1816
+ # if (
1817
+ # isinstance(embedding_tensor, torch.Tensor)
1818
+ # and embedding_tensor.dtype == torch.int64
1819
+ # ):
1820
+ # embedding_tensor = embedding_tensor.to(torch.int32)
1821
+ # warnings.warn(
1822
+ # "Embedding op has embedding_tensor dtype=int64. Reduce it to int32 to run on TRT. Accuracy may not be correct!"
1823
+ # )
1824
+ # indices_tensor = get_trt_tensor(network, indices_tensor, f"{name}_indices_tensor")
1825
+ # embedding_tensor = get_trt_tensor(
1826
+ # network, embedding_tensor, f"{name}_embedding_tensor"
1827
+ # )
1828
+
1829
+ # # unsupported parameters
1830
+ # # ignore padding_idx since it is meaningful for training only
1831
+ # max_norm = kwargs["max_norm"]
1832
+ # norm_type = kwargs["norm_type"]
1833
+ # scale_grad_by_freq = kwargs["scale_grad_by_freq"]
1834
+ # sparse = kwargs["sparse"]
1835
+
1836
+ # if max_norm is not None:
1837
+ # raise RuntimeError(
1838
+ # f"Currently we don't support specifying max_norm, got {max_norm}."
1839
+ # )
1840
+
1841
+ # if norm_type != 2.0:
1842
+ # raise RuntimeError(
1843
+ # f"Currently we don't support specifying max_norm, got {norm_type} for norm_type."
1844
+ # )
1845
+
1846
+ # if scale_grad_by_freq:
1847
+ # raise RuntimeError(
1848
+ # "Currently we don't support scale gradient by word frequency."
1849
+ # )
1850
+
1851
+ # if sparse:
1852
+ # raise RuntimeError("Currently we don't support sparse gradient.")
1853
+
1854
+ # # Implement embedding lookup with gather layer
1855
+ # gather_layer = network.add_gather(embedding_tensor, indices_tensor, axis=0)
1856
+ # set_layer_name(gather_layer, target, name + "_gather")
1857
+ # return gather_layer.get_output(0)
1850
1858
1851
1859
1852
1860
@tensorrt_converter (acc_ops .max_pool1d )
@@ -2342,12 +2350,8 @@ def acc_ops_reshape(
2342
2350
name : str ,
2343
2351
) -> Union [TRTTensor , Sequence [TRTTensor ]]:
2344
2352
input_val = kwargs ["input" ]
2345
-
2346
- if not isinstance (input_val , TRTTensor ):
2347
- raise RuntimeError (
2348
- f"Reshape received input { input_val } that is not part "
2349
- "of the TensorRT region!"
2350
- )
2353
+ # for case where input_val is TRTensor
2354
+ input_val = get_trt_tensor (network , input_val , f"{ name } _input_val" )
2351
2355
2352
2356
shape = kwargs ["acc_out_ty" ].shape # type: ignore[misc]
2353
2357
if network .has_implicit_batch_dimension :
0 commit comments