fix llama and baichuan typo (#1883)

lvyufeng · web-flow · commit 68a624799441 · 2024-12-23T17:53:50.000+08:00
diff --git a/.github/pylint.conf b/.github/pylint.conf
@@ -217,7 +217,8 @@ disable=raw-checker-failed,
         fixme,
         use-a-generator,
         nested-min-max,
-        method-hidden
+        method-hidden,
+        unsubscriptable-object
 
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option
diff --git a/mindnlp/transformers/models/baichuan/modeling_baichuan.py b/mindnlp/transformers/models/baichuan/modeling_baichuan.py
@@ -1550,11 +1550,11 @@ def forward(
         if attention_mask is not None:
             if len(attention_mask.shape) == 2:
                 expanded_mask = attention_mask.to(alibi_mask.dtype)
-                expanded_mask = ops.tril(ops.gt(expanded_mask[:, :, None] * expanded_mask[:, None, :], 0)
-                                ) * ops.eq(expanded_mask[:, :, None] - expanded_mask[:, None, :], 0)
+                expanded_mask = ops.tril((ops.gt(expanded_mask[:, :, None] * expanded_mask[:, None, :], 0)
+                                ) * ops.eq(expanded_mask[:, :, None] - expanded_mask[:, None, :], 0).int()).bool()
             else:
                 expanded_mask = attention_mask
-            bsz = inputs_embeds.size(0)
+            bsz = inputs_embeds.shape[0]
             src_len, tgt_len = alibi_mask.shape[-2:]
             expanded_mask = expanded_mask.unsqueeze(1).broadcast_to((bsz, 1, src_len, tgt_len)).to(alibi_mask.dtype)
             inverted_mask = 1.0 - expanded_mask
diff --git a/mindnlp/transformers/models/llama/modeling_llama.py b/mindnlp/transformers/models/llama/modeling_llama.py
@@ -300,7 +300,7 @@ def forward(self, x):
             )
             up_proj = ops.cat([F.linear(x, up_proj_slices[i]) for i in range(self.config.pretraining_tp)], dim=-1)
 
-            intermediate_states = (self.act_fn(gate_proj) * up_proj).split(slice, dim=2)
+            intermediate_states = ops.split((self.act_fn(gate_proj) * up_proj), slice, dim=2)
             down_proj = [
                 F.linear(intermediate_states[i], down_proj_slices[i]) for i in range(self.config.pretraining_tp)
             ]

Original file line number	Diff line number	Diff line change
`@@ -300,7 +300,7 @@ def forward(self, x):`
`300`	`300`	`)`
`301`	`301`	`up_proj = ops.cat([F.linear(x, up_proj_slices[i]) for i in range(self.config.pretraining_tp)], dim=-1)`
`302`	`302`
`303`		`- intermediate_states = (self.act_fn(gate_proj) * up_proj).split(slice, dim=2)`
	`303`	`+ intermediate_states = ops.split((self.act_fn(gate_proj) * up_proj), slice, dim=2)`
`304`	`304`	`down_proj = [`
`305`	`305`	`F.linear(intermediate_states[i], down_proj_slices[i]) for i in range(self.config.pretraining_tp)`
`306`	`306`	`]`