Skip to content

Commit 301dc60

Browse files
committed
expand: Add new clrsb fallback expansion [PR101950]
As suggested in the PR, the following patch adds two new clrsb expansion possibilities if target doesn't have clrsb_optab for the requested nor wider modes, but does have clz_optab for the requested mode. One expansion is clrsb (op0) expands as clz (op0 ^ (((stype)op0) >> (prec-1))) - 1 which is usable if CLZ_DEFINED_VALUE_AT_ZERO is 2 with value of prec, because the clz argument can be 0 and clrsb should give prec-1 in that case. The other expansion is clz (((op0 << 1) ^ (((stype)op0) >> (prec-1))) | 1) where the clz argument is never 0, but it is one operation longer. E.g. on x86_64-linux with -O2 -mno-lzcnt, this results for int foo (int x) { return __builtin_clrsb (x); } in - subq $8, %rsp - movslq %edi, %rdi - call __clrsbdi2 - addq $8, %rsp - subl $32, %eax + leal (%rdi,%rdi), %eax + sarl $31, %edi + xorl %edi, %eax + orl $1, %eax + bsrl %eax, %eax + xorl $31, %eax and with -O2 -mlzcnt: + movl %edi, %eax + sarl $31, %eax + xorl %edi, %eax + lzcntl %eax, %eax + subl $1, %eax On armv7hl-linux-gnueabi with -O2: - push {r4, lr} - bl __clrsbsi2 - pop {r4, pc} + @ link register save eliminated. + eor r0, r0, r0, asr #31 + clz r0, r0 + sub r0, r0, #1 + bx lr As it (at least usually) will make code larger, it is disabled for -Os or cold instructions. 2021-08-19 Jakub Jelinek <[email protected]> PR middle-end/101950 * optabs.c (expand_clrsb_using_clz): New function. (expand_unop): Use it as another clrsb expansion fallback. * gcc.target/i386/pr101950-1.c: New test. * gcc.target/i386/pr101950-2.c: New test.
1 parent c04d766 commit 301dc60

File tree

3 files changed

+118
-0
lines changed

3 files changed

+118
-0
lines changed

gcc/optabs.c

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2600,6 +2600,82 @@ widen_leading (scalar_int_mode mode, rtx op0, rtx target, optab unoptab)
26002600
return 0;
26012601
}
26022602

2603+
/* Attempt to emit (clrsb:mode op0) as
2604+
(plus:mode (clz:mode (xor:mode op0 (ashr:mode op0 (const_int prec-1))))
2605+
(const_int -1))
2606+
if CLZ_DEFINED_VALUE_AT_ZERO (mode, val) is 2 and val is prec,
2607+
or as
2608+
(clz:mode (ior:mode (xor:mode (ashl:mode op0 (const_int 1))
2609+
(ashr:mode op0 (const_int prec-1)))
2610+
(const_int 1)))
2611+
otherwise. */
2612+
2613+
static rtx
2614+
expand_clrsb_using_clz (scalar_int_mode mode, rtx op0, rtx target)
2615+
{
2616+
if (optimize_insn_for_size_p ()
2617+
|| optab_handler (clz_optab, mode) == CODE_FOR_nothing)
2618+
return NULL_RTX;
2619+
2620+
start_sequence ();
2621+
HOST_WIDE_INT val = 0;
2622+
if (CLZ_DEFINED_VALUE_AT_ZERO (mode, val) != 2
2623+
|| val != GET_MODE_PRECISION (mode))
2624+
val = 0;
2625+
else
2626+
val = 1;
2627+
2628+
rtx temp2 = op0;
2629+
if (!val)
2630+
{
2631+
temp2 = expand_binop (mode, ashl_optab, op0, const1_rtx,
2632+
NULL_RTX, 0, OPTAB_DIRECT);
2633+
if (!temp2)
2634+
{
2635+
fail:
2636+
end_sequence ();
2637+
return NULL_RTX;
2638+
}
2639+
}
2640+
2641+
rtx temp = expand_binop (mode, ashr_optab, op0,
2642+
GEN_INT (GET_MODE_PRECISION (mode) - 1),
2643+
NULL_RTX, 0, OPTAB_DIRECT);
2644+
if (!temp)
2645+
goto fail;
2646+
2647+
temp = expand_binop (mode, xor_optab, temp2, temp, NULL_RTX, 0,
2648+
OPTAB_DIRECT);
2649+
if (!temp)
2650+
goto fail;
2651+
2652+
if (!val)
2653+
{
2654+
temp = expand_binop (mode, ior_optab, temp, const1_rtx,
2655+
NULL_RTX, 0, OPTAB_DIRECT);
2656+
if (!temp)
2657+
goto fail;
2658+
}
2659+
temp = expand_unop_direct (mode, clz_optab, temp, val ? NULL_RTX : target,
2660+
true);
2661+
if (!temp)
2662+
goto fail;
2663+
if (val)
2664+
{
2665+
temp = expand_binop (mode, add_optab, temp, constm1_rtx,
2666+
target, 0, OPTAB_DIRECT);
2667+
if (!temp)
2668+
goto fail;
2669+
}
2670+
2671+
rtx_insn *seq = get_insns ();
2672+
end_sequence ();
2673+
2674+
add_equal_note (seq, temp, CLRSB, op0, NULL_RTX, mode);
2675+
emit_insn (seq);
2676+
return temp;
2677+
}
2678+
26032679
/* Try calculating clz of a double-word quantity as two clz's of word-sized
26042680
quantities, choosing which based on whether the high word is nonzero. */
26052681
static rtx
@@ -3171,6 +3247,9 @@ expand_unop (machine_mode mode, optab unoptab, rtx op0, rtx target,
31713247
temp = widen_leading (int_mode, op0, target, unoptab);
31723248
if (temp)
31733249
return temp;
3250+
temp = expand_clrsb_using_clz (int_mode, op0, target);
3251+
if (temp)
3252+
return temp;
31743253
}
31753254
goto try_libcall;
31763255
}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
/* PR middle-end/101950 */
2+
/* { dg-do compile } */
3+
/* { dg-options "-O2 -mno-lzcnt" } */
4+
/* { dg-final { scan-assembler-not "call\[^\n\r]*__clrsb.i2" } } */
5+
/* { dg-final { scan-assembler-times "\tbsr\[ql]\t" 2 } } */
6+
/* { dg-final { scan-assembler-times "\txor\[ql]\t" 4 } } */
7+
/* { dg-final { scan-assembler-times "\tor\[ql]\t" 2 } } */
8+
/* { dg-final { scan-assembler-times "\tsar\[ql]\t|\tcltd" 2 } } */
9+
10+
int
11+
foo (long x)
12+
{
13+
return __builtin_clrsbl (x);
14+
}
15+
16+
int
17+
bar (int x)
18+
{
19+
return __builtin_clrsb (x);
20+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/* PR middle-end/101950 */
2+
/* { dg-do compile } */
3+
/* { dg-options "-O2 -mlzcnt" } */
4+
/* { dg-final { scan-assembler-not "call\[^\n\r]*__clrsb.i2" } } */
5+
/* { dg-final { scan-assembler-times "\tlzcnt\[ql]\t" 2 } } */
6+
/* { dg-final { scan-assembler-times "\txor\[ql]\t" 2 } } */
7+
/* { dg-final { scan-assembler-times "\tsar\[ql]\t|\tcltd" 2 } } */
8+
9+
int
10+
foo (long x)
11+
{
12+
return __builtin_clrsbl (x);
13+
}
14+
15+
int
16+
bar (int x)
17+
{
18+
return __builtin_clrsb (x);
19+
}

0 commit comments

Comments
 (0)