Skip to content

Commit 67cbb1c

Browse files
committed
aarch64: Fix bogus cnot optimisation [PR114603]
aarch64-sve.md had a pattern that combined: cmpeq pb.T, pa/z, zc.T, #0 mov zd.T, pb/z, #1 into: cnot zd.T, pa/m, zc.T But this is only valid if pa.T is a ptrue. In other cases, the original would set inactive elements of zd.T to 0, whereas the combined form would copy elements from zc.T. gcc/ PR target/114603 * config/aarch64/aarch64-sve.md (@aarch64_pred_cnot<mode>): Replace with... (@aarch64_ptrue_cnot<mode>): ...this, requiring operand 1 to be a ptrue. (*cnot<mode>): Require operand 1 to be a ptrue. * config/aarch64/aarch64-sve-builtins-base.cc (svcnot_impl::expand): Use aarch64_ptrue_cnot<mode> for _x operations that are predicated with a ptrue. Represent other _x operations as fully-defined _m operations. gcc/testsuite/ PR target/114603 * gcc.target/aarch64/sve/acle/general/cnot_1.c: New test.
1 parent e4d0743 commit 67cbb1c

File tree

3 files changed

+50
-20
lines changed

3 files changed

+50
-20
lines changed

gcc/config/aarch64/aarch64-sve-builtins-base.cc

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -517,15 +517,22 @@ class svcnot_impl : public function_base
517517
expand (function_expander &e) const override
518518
{
519519
machine_mode mode = e.vector_mode (0);
520-
if (e.pred == PRED_x)
521-
{
522-
/* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs
523-
a ptrue hint. */
524-
e.add_ptrue_hint (0, e.gp_mode (0));
525-
return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode));
526-
}
527-
528-
return e.use_cond_insn (code_for_cond_cnot (mode), 0);
520+
machine_mode pred_mode = e.gp_mode (0);
521+
/* The underlying _x pattern is effectively:
522+
523+
dst = src == 0 ? 1 : 0
524+
525+
rather than an UNSPEC_PRED_X. Using this form allows autovec
526+
constructs to be matched by combine, but it means that the
527+
predicate on the src == 0 comparison must be all-true.
528+
529+
For simplicity, represent other _x operations as fully-defined _m
530+
operations rather than using a separate bespoke pattern. */
531+
if (e.pred == PRED_x
532+
&& gen_lowpart (pred_mode, e.args[0]) == CONSTM1_RTX (pred_mode))
533+
return e.use_pred_x_insn (code_for_aarch64_ptrue_cnot (mode));
534+
return e.use_cond_insn (code_for_cond_cnot (mode),
535+
e.pred == PRED_x ? 1 : 0);
529536
}
530537
};
531538

gcc/config/aarch64/aarch64-sve.md

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3363,24 +3363,24 @@
33633363
;; - CNOT
33643364
;; -------------------------------------------------------------------------
33653365

3366-
;; Predicated logical inverse.
3367-
(define_expand "@aarch64_pred_cnot<mode>"
3366+
;; Logical inverse, predicated with a ptrue.
3367+
(define_expand "@aarch64_ptrue_cnot<mode>"
33683368
[(set (match_operand:SVE_FULL_I 0 "register_operand")
33693369
(unspec:SVE_FULL_I
33703370
[(unspec:<VPRED>
33713371
[(match_operand:<VPRED> 1 "register_operand")
3372-
(match_operand:SI 2 "aarch64_sve_ptrue_flag")
3372+
(const_int SVE_KNOWN_PTRUE)
33733373
(eq:<VPRED>
3374-
(match_operand:SVE_FULL_I 3 "register_operand")
3375-
(match_dup 4))]
3374+
(match_operand:SVE_FULL_I 2 "register_operand")
3375+
(match_dup 3))]
33763376
UNSPEC_PRED_Z)
3377-
(match_dup 5)
3378-
(match_dup 4)]
3377+
(match_dup 4)
3378+
(match_dup 3)]
33793379
UNSPEC_SEL))]
33803380
"TARGET_SVE"
33813381
{
3382-
operands[4] = CONST0_RTX (<MODE>mode);
3383-
operands[5] = CONST1_RTX (<MODE>mode);
3382+
operands[3] = CONST0_RTX (<MODE>mode);
3383+
operands[4] = CONST1_RTX (<MODE>mode);
33843384
}
33853385
)
33863386

@@ -3389,7 +3389,7 @@
33893389
(unspec:SVE_I
33903390
[(unspec:<VPRED>
33913391
[(match_operand:<VPRED> 1 "register_operand")
3392-
(match_operand:SI 5 "aarch64_sve_ptrue_flag")
3392+
(const_int SVE_KNOWN_PTRUE)
33933393
(eq:<VPRED>
33943394
(match_operand:SVE_I 2 "register_operand")
33953395
(match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
@@ -11001,4 +11001,4 @@
1100111001
GET_MODE (operands[2]));
1100211002
return "sel\t%0.<Vetype>, %3, %2.<Vetype>, %1.<Vetype>";
1100311003
}
11004-
)
11004+
)
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/* { dg-options "-O2" } */
2+
/* { dg-final { check-function-bodies "**" "" } } */
3+
4+
#include <arm_sve.h>
5+
6+
#ifdef __cplusplus
7+
extern "C" {
8+
#endif
9+
10+
/*
11+
** foo:
12+
** cmpeq (p[0-7])\.s, p0/z, z0\.s, #0
13+
** mov z0\.s, \1/z, #1
14+
** ret
15+
*/
16+
svint32_t foo(svbool_t pg, svint32_t y)
17+
{
18+
return svsel(svcmpeq(pg, y, 0), svdup_s32(1), svdup_s32(0));
19+
}
20+
21+
#ifdef __cplusplus
22+
}
23+
#endif

0 commit comments

Comments
 (0)