[fx2trt] setitem pass improvement, disable isinf and embedding (#86)

Wei Wei · Wei Wei · commit f91fc25e393d · 2022-06-03T17:54:13.000-07:00
Summary: Pull Request resolved: https://github.com/pytorch/fx2trt/pull/86 1. delete dead code should be used cautiously. We need to keep the other inplace op for future optimization pass. 2. add support for type 3. disable isinf converter since the implementation will generate inaccurate results in hf_T5 model. The reason is not very clear 4. disable embedding converter for a few reasons 1) embedding in hf models are all int64 indices which will skip this op anyway 2)if the indices are constant node, it should throw error and this whole subgraph will fall back to non-TRT. So it is better uncommented and leave it to future. hf models in torchbench are summarized below. 3 models do not work well for TRT with speedup close 1x since major subgraph fall back to non-TRT. Reviewed By: 842974287 Differential Revision: D36731186 fbshipit-source-id: 8f56fb875419b9a3f03432f0ef5c00fdfc6b6741
diff --git a/fx/converters/acc_ops_converters.py b/fx/converters/acc_ops_converters.py
@@ -1677,35 +1677,41 @@ def acc_ops_logical_xor(
     )
 
 
-@tensorrt_converter(acc_ops.isinf)
-def acc_ops_isinf(
-    network: TRTNetwork,
-    target: Target,
-    args: Tuple[Argument, ...],
-    kwargs: Dict[str, Argument],
-    name: str,
-) -> Union[TRTTensor, Sequence[TRTTensor]]:
-    input_t = kwargs["input"]
-    if not isinstance(input_t, TRTTensor):
-        raise RuntimeError(
-            f"isinf received input {input_t} that is not part "
-            "of the TensorRT region!"
-        )
-    inf_t = torch.ones(tuple(input_t.shape))
-    inf_t = inf_t * float("inf")
-    inf_t = get_trt_tensor(network, inf_t, f"{name}_inf_t")
-
-    ninf_t = torch.ones(tuple(input_t.shape))
-    ninf_t = ninf_t * float("-inf")
-    ninf_t = get_trt_tensor(network, ninf_t, f"{name}_ninf_t")
-
-    kwargs_new = {"input": input_t, "other": inf_t}
-    inf_output = acc_ops_eq(network, target, None, kwargs_new, name + "_compare_inf")
-    kwargs_new = {"input": input_t, "other": ninf_t}
-    ninf_output = acc_ops_eq(network, target, None, kwargs_new, name + "_compare_ninf")
-    kwargs_new = {"input": inf_output, "other": ninf_output}
-    output = acc_ops_logical_or(network, target, None, kwargs_new, name + "_compare")
-    return output
+# T113156424 Have some accuracy problems in hf_T5.
+# [TRT] [W] Weights [name=isinf_1_inf_t]: Converted FP32 value in weights (either FP32 infinity or FP32 value outside FP16 range) to corresponding FP16 infinity. If this is not the desired behavior, please modify the weights or retrain with regularization to reduce the magnitude of the weights.
+# @tensorrt_converter(acc_ops.isinf)
+# def acc_ops_isinf(
+#     network: TRTNetwork,
+#     target: Target,
+#     args: Tuple[Argument, ...],
+#     kwargs: Dict[str, Argument],
+#     name: str,
+# ) -> Union[TRTTensor, Sequence[TRTTensor]]:
+#     input_t = kwargs["input"]
+#     if not isinstance(input_t, TRTTensor):
+#         raise RuntimeError(
+#             f"isinf received input {input_t} that is not part "
+#             "of the TensorRT region!"
+#         )
+#     tdtype = torch_dtype_from_trt(input_t.dtype)
+
+#     inf_t = torch.ones(tuple(input_t.shape))
+#     inf_t = inf_t * float("inf")
+#     inf_t = inf_t.to(tdtype)
+#     inf_t = get_trt_tensor(network, inf_t, f"{name}_inf_t")
+
+#     ninf_t = torch.ones(tuple(input_t.shape))
+#     ninf_t = ninf_t * float("-inf")
+#     ninf_t = ninf_t.to(tdtype)
+#     ninf_t = get_trt_tensor(network, ninf_t, f"{name}_ninf_t")
+
+#     kwargs_new = {"input": input_t, "other": inf_t}
+#     inf_output = acc_ops_eq(network, target, None, kwargs_new, name + "_compare_inf")
+#     kwargs_new = {"input": input_t, "other": ninf_t}
+#     ninf_output = acc_ops_eq(network, target, None, kwargs_new, name + "_compare_ninf")
+#     kwargs_new = {"input": inf_output, "other": ninf_output}
+#     output = acc_ops_logical_or(network, target, None, kwargs_new, name + "_compare")
+#     return output
 
 
 @tensorrt_converter(acc_ops.any)
@@ -1785,68 +1791,70 @@ def acc_ops_fmod(
     return sub_value
 
 
-@tensorrt_converter(acc_ops.embedding, no_implicit_batch_dim=True)
-def acc_ops_embedding(
-    network: TRTNetwork,
-    target: Target,
-    args: Tuple[Argument, ...],
-    kwargs: Dict[str, Argument],
-    name: str,
-) -> Union[TRTTensor, Sequence[TRTTensor]]:
-    if network.has_implicit_batch_dimension:
-        raise RuntimeError(
-            "The `embedding` function should be called with explicit batch dimension."
-        )
-
-    indices_tensor = kwargs["input"]
-    embedding_tensor = kwargs["weight"]
-    if isinstance(indices_tensor, torch.Tensor) and indices_tensor.dtype == torch.int64:
-        indices_tensor = indices_tensor.to(torch.int32)
-        warnings.warn(
-            "Embedding op has indices_tensor dtype=int64. Reduce it to int32 to run on TRT. Accuracy may not be correct!"
-        )
-    if (
-        isinstance(embedding_tensor, torch.Tensor)
-        and embedding_tensor.dtype == torch.int64
-    ):
-        embedding_tensor = embedding_tensor.to(torch.int32)
-        warnings.warn(
-            "Embedding op has embedding_tensor dtype=int64. Reduce it to int32 to run on TRT. Accuracy may not be correct!"
-        )
-    indices_tensor = get_trt_tensor(network, indices_tensor, f"{name}_indices_tensor")
-    embedding_tensor = get_trt_tensor(
-        network, embedding_tensor, f"{name}_embedding_tensor"
-    )
-
-    # unsupported parameters
-    # ignore padding_idx since it is meaningful for training only
-    max_norm = kwargs["max_norm"]
-    norm_type = kwargs["norm_type"]
-    scale_grad_by_freq = kwargs["scale_grad_by_freq"]
-    sparse = kwargs["sparse"]
-
-    if max_norm is not None:
-        raise RuntimeError(
-            f"Currently we don't support specifying max_norm, got {max_norm}."
-        )
-
-    if norm_type != 2.0:
-        raise RuntimeError(
-            f"Currently we don't support specifying max_norm, got {norm_type} for norm_type."
-        )
-
-    if scale_grad_by_freq:
-        raise RuntimeError(
-            "Currently we don't support scale gradient by word frequency."
-        )
-
-    if sparse:
-        raise RuntimeError("Currently we don't support sparse gradient.")
-
-    # Implement embedding lookup with gather layer
-    gather_layer = network.add_gather(embedding_tensor, indices_tensor, axis=0)
-    set_layer_name(gather_layer, target, name + "_gather")
-    return gather_layer.get_output(0)
+# T113156424 embedding implemenatation is very limited and shows no usage in hf models due to the indices are int64.
+# if we cast to int32, it will create accuracy issues. We'd better leave it to future implementation.
+# @tensorrt_converter(acc_ops.embedding, no_implicit_batch_dim=True)
+# def acc_ops_embedding(
+#     network: TRTNetwork,
+#     target: Target,
+#     args: Tuple[Argument, ...],
+#     kwargs: Dict[str, Argument],
+#     name: str,
+# ) -> Union[TRTTensor, Sequence[TRTTensor]]:
+#     if network.has_implicit_batch_dimension:
+#         raise RuntimeError(
+#             "The `embedding` function should be called with explicit batch dimension."
+#         )
+
+#     indices_tensor = kwargs["input"]
+#     embedding_tensor = kwargs["weight"]
+#     if isinstance(indices_tensor, torch.Tensor) and indices_tensor.dtype == torch.int64:
+#         indices_tensor = indices_tensor.to(torch.int32)
+#         warnings.warn(
+#             "Embedding op has indices_tensor dtype=int64. Reduce it to int32 to run on TRT. Accuracy may not be correct!"
+#         )
+#     if (
+#         isinstance(embedding_tensor, torch.Tensor)
+#         and embedding_tensor.dtype == torch.int64
+#     ):
+#         embedding_tensor = embedding_tensor.to(torch.int32)
+#         warnings.warn(
+#             "Embedding op has embedding_tensor dtype=int64. Reduce it to int32 to run on TRT. Accuracy may not be correct!"
+#         )
+#     indices_tensor = get_trt_tensor(network, indices_tensor, f"{name}_indices_tensor")
+#     embedding_tensor = get_trt_tensor(
+#         network, embedding_tensor, f"{name}_embedding_tensor"
+#     )
+
+#     # unsupported parameters
+#     # ignore padding_idx since it is meaningful for training only
+#     max_norm = kwargs["max_norm"]
+#     norm_type = kwargs["norm_type"]
+#     scale_grad_by_freq = kwargs["scale_grad_by_freq"]
+#     sparse = kwargs["sparse"]
+
+#     if max_norm is not None:
+#         raise RuntimeError(
+#             f"Currently we don't support specifying max_norm, got {max_norm}."
+#         )
+
+#     if norm_type != 2.0:
+#         raise RuntimeError(
+#             f"Currently we don't support specifying max_norm, got {norm_type} for norm_type."
+#         )
+
+#     if scale_grad_by_freq:
+#         raise RuntimeError(
+#             "Currently we don't support scale gradient by word frequency."
+#         )
+
+#     if sparse:
+#         raise RuntimeError("Currently we don't support sparse gradient.")
+
+#     # Implement embedding lookup with gather layer
+#     gather_layer = network.add_gather(embedding_tensor, indices_tensor, axis=0)
+#     set_layer_name(gather_layer, target, name + "_gather")
+#     return gather_layer.get_output(0)
 
 
 @tensorrt_converter(acc_ops.max_pool1d)
@@ -2342,12 +2350,8 @@ def acc_ops_reshape(
     name: str,
 ) -> Union[TRTTensor, Sequence[TRTTensor]]:
     input_val = kwargs["input"]
-
-    if not isinstance(input_val, TRTTensor):
-        raise RuntimeError(
-            f"Reshape received input {input_val} that is not part "
-            "of the TensorRT region!"
-        )
+    # for case where input_val is TRTensor
+    input_val = get_trt_tensor(network, input_val, f"{name}_input_val")
 
     shape = kwargs["acc_out_ty"].shape  # type: ignore[misc]
     if network.has_implicit_batch_dimension:
diff --git a/fx/passes/lower_basic_pass.py b/fx/passes/lower_basic_pass.py
@@ -451,8 +451,8 @@ def transform_setitem(gm: torch.fx.GraphModule, input: Input):
                     continue
                 node.replace_all_uses_with(concat_node_0)
                 map_replace[input_node] = concat_node_0
+            gm.graph.erase_node(node)
 
-    gm.graph.eliminate_dead_code()
     gm.graph.lint()
     gm.recompile()
     return gm
diff --git a/test/converters/acc_op/test_embedding.py b/test/converters/acc_op/test_embedding.py
@@ -1,10 +1,12 @@
+import unittest
+
 import fx2trt_oss.tracer.acc_tracer.acc_ops as acc_ops
 import torch
 from parameterized import param, parameterized
-from torch.testing._internal.common_fx2trt import AccTestCase, InputTensorSpec
+from torch.testing._internal.common_fx2trt import AccTestCase
 from torch.testing._internal.common_utils import run_tests
 
-
+@unittest.skip("Current implementation is limited. All implementations in hf use int64. T113156424")
 class TestEmbeddingConverter(AccTestCase):
     @parameterized.expand(
         [
diff --git a/test/converters/acc_op/test_isinf.py b/test/converters/acc_op/test_isinf.py
@@ -1,9 +1,12 @@
+import unittest
+
 import fx2trt_oss.tracer.acc_tracer.acc_ops as acc_ops
 import torch
 from torch.testing._internal.common_fx2trt import AccTestCase
 from torch.testing._internal.common_utils import run_tests
 
 
+@unittest.skip("Implementation is commented out due to accuracy issue T113156424")
 class TestInfConverter(AccTestCase):
     def test_isinf(self):
         class Test(torch.nn.Module):
diff --git a/test/converters/acc_op/test_type_as.py b/test/converters/acc_op/test_type_as.py
@@ -86,6 +86,43 @@ def forward(self, input, other):
             precision=LowerPrecision.FP16,
         )
 
+    def test_type_tensor(self):
+        class Type_as(torch.nn.Module):
+            def forward(self, input):
+                return input.type(dtype=torch.float16)
+
+        input = torch.randn(2, 2)
+
+        inputs = [
+            input,
+        ]
+        self.run_test(
+            Type_as(),
+            inputs,
+            expected_ops={acc_ops.to_dtype},
+            precision=LowerPrecision.FP16,
+        )
+
+    def test_type_tensor_ext(self):
+        class Type_as(torch.nn.Module):
+            def forward(self, input, other):
+                t = input.type()
+                return other.type(t)
+
+        input = torch.randn(2, 2).to(dtype=torch.float16)
+        other = torch.randn(2, 2)
+
+        inputs = [
+            input,
+            other,
+        ]
+        self.run_test(
+            Type_as(),
+            inputs,
+            expected_ops={acc_ops.to_dtype, acc_ops.dtype},
+            precision=LowerPrecision.FP16,
+        )
+
 
 if __name__ == "__main__":
     run_tests()
diff --git a/tracer/acc_tracer/acc_ops.py b/tracer/acc_tracer/acc_ops.py
@@ -223,6 +223,30 @@ def avg_pool2d(
 def sign(*, input):
     return torch.sign(input)
 
+@register_custom_acc_mapper_fn(
+    op_and_target=("call_method", "type"),
+    arg_replacement_tuples=[
+        ("input", "input"),
+        ("dtype", "dtype", this_arg_is_optional),
+    ],
+)
+def custom_type_mapper(node: torch.fx.Node, _: nn.Module) -> torch.fx.Node:
+    input_obj = node.kwargs["input"]
+    dtype_obj = node.kwargs.get("dtype")
+    with node.graph.inserting_before(node):
+        if dtype_obj == None:
+            dtype_node = node.graph.call_function(dtype, kwargs={"input": input_obj})
+            dtype_node.meta["type"] = torch.dtype
+            return dtype_node
+        else:
+            new_kwargs = {
+                "input": input_obj,
+                "acc_out_ty": acc_utils.build_raw_tensor_meta(dtype=dtype_obj),
+            }
+            new_node = node.graph.call_function(to_dtype, kwargs=new_kwargs)
+            new_node.meta = node.meta
+            return new_node
+
 
 @register_custom_acc_mapper_fn(
     op_and_target=("call_method", "type_as"),
diff --git a/tracer/acc_tracer/acc_tracer.py b/tracer/acc_tracer/acc_tracer.py
@@ -272,6 +272,7 @@ def create_node(
             name_target[-1] == "_"
             and name_target[0] != "_"
             and not (name_target in allow_list)
+            and kind != "placeholder"
         ):
             raise RuntimeError(
                 f"Tried to trace mutable operation {name_target}. FX only supports functional code"