fix fp8

manman-ren · manman-ren · commit 937b451a6856 · 2024-12-04T17:16:19.000-08:00
Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/submodules/generative-recommenders b/submodules/generative-recommenders
@@ -1 +1 @@
-Subproject commit 7906fe791bfe4671c04887b270e53f53bf0529d4
+Subproject commit 77d8b9474e9a1acc7a892f0caf9fcec16d287aa0
diff --git a/tritonbench/kernels/triton_fused_attention.py b/tritonbench/kernels/triton_fused_attention.py
@@ -458,10 +458,10 @@ def _attn_fwd_inner_ws(
             num_warps=w,
         )
     )
-    for BM in [128]  # 64, 128]
-    for BN in [128]  # 64, 128]
-    for s in [3]  # 3, 4, 7]
-    for w in [8]  # 4, 8]
+    for BM in [64, 128]
+    for BN in [64, 128]
+    for s in [3, 4, 7]
+    for w in [4, 8]
 ]
 # TMA, WS, and CompPipe
 configsTmaWS = [
diff --git a/tritonbench/operators/fp8_attention/operator.py b/tritonbench/operators/fp8_attention/operator.py
@@ -110,7 +110,7 @@ def triton_flash_v2(
         triton_q, triton_k, triton_v = self.triton_preprocess(q, k, v)
         # full fp8 will be enabled if type of q,k,v is fp8
         return lambda: triton_attention(
-            triton_q, triton_k, triton_v, False, self.sm_scale, "base"
+            triton_q, triton_k, triton_v, False, self.sm_scale, "base", "base"
         )
 
     def get_x_val(self, _example_inputs) -> Tuple[int, int, int, int]:

Original file line number	Diff line number	Diff line change
`@@ -110,7 +110,7 @@ def triton_flash_v2(`
`110`	`110`	`triton_q, triton_k, triton_v = self.triton_preprocess(q, k, v)`
`111`	`111`	`# full fp8 will be enabled if type of q,k,v is fp8`
`112`	`112`	`return lambda: triton_attention(`
`113`		`- triton_q, triton_k, triton_v, False, self.sm_scale, "base"`
	`113`	`+ triton_q, triton_k, triton_v, False, self.sm_scale, "base", "base"`
`114`	`114`	`)`
`115`	`115`
`116`	`116`	`def get_x_val(self, _example_inputs) -> Tuple[int, int, int, int]:`