Skip to content

Commit 4eb8f93

Browse files
committed
modulo-sched: Carefully process loop counter initialization [PR97421]
Do not allow direct adjustment of pre-header initialization instruction for count register if is read in some instruction below in that basic block. gcc/ChangeLog: PR rtl-optimization/97421 * modulo-sched.c (generate_prolog_epilog): Remove forward declaration, adjust last argument name and type. (const_iteration_count): Add bool pointer parameter to return whether count register is read in pre-header after its initialization. (sms_schedule): Fix count register initialization adjustment procedure according to what const_iteration_count said. gcc/testsuite/ChangeLog: PR rtl-optimization/97421 * gcc.c-torture/execute/pr97421-1.c: New test. * gcc.c-torture/execute/pr97421-2.c: New test. * gcc.c-torture/execute/pr97421-3.c: New test.
1 parent 7ae210d commit 4eb8f93

File tree

4 files changed

+103
-31
lines changed

4 files changed

+103
-31
lines changed

gcc/modulo-sched.c

Lines changed: 40 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -210,8 +210,6 @@ static int sms_order_nodes (ddg_ptr, int, int *, int *);
210210
static void set_node_sched_params (ddg_ptr);
211211
static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
212212
static void permute_partial_schedule (partial_schedule_ptr, rtx_insn *);
213-
static void generate_prolog_epilog (partial_schedule_ptr, class loop *,
214-
rtx, rtx);
215213
static int calculate_stage_count (partial_schedule_ptr, int);
216214
static void calculate_must_precede_follow (ddg_node_ptr, int, int,
217215
int, int, sbitmap, sbitmap, sbitmap);
@@ -391,30 +389,40 @@ doloop_register_get (rtx_insn *head, rtx_insn *tail)
391389
this constant. Otherwise return 0. */
392390
static rtx_insn *
393391
const_iteration_count (rtx count_reg, basic_block pre_header,
394-
int64_t * count)
392+
int64_t *count, bool* adjust_inplace)
395393
{
396394
rtx_insn *insn;
397395
rtx_insn *head, *tail;
398396

397+
*adjust_inplace = false;
398+
bool read_after = false;
399+
399400
if (! pre_header)
400401
return NULL;
401402

402403
get_ebb_head_tail (pre_header, pre_header, &head, &tail);
403404

404405
for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
405-
if (NONDEBUG_INSN_P (insn) && single_set (insn) &&
406-
rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
406+
if (single_set (insn) && rtx_equal_p (count_reg,
407+
SET_DEST (single_set (insn))))
407408
{
408409
rtx pat = single_set (insn);
409410

410411
if (CONST_INT_P (SET_SRC (pat)))
411412
{
412413
*count = INTVAL (SET_SRC (pat));
414+
*adjust_inplace = !read_after;
413415
return insn;
414416
}
415417

416418
return NULL;
417419
}
420+
else if (NONDEBUG_INSN_P (insn) && reg_mentioned_p (count_reg, insn))
421+
{
422+
read_after = true;
423+
if (reg_set_p (count_reg, insn))
424+
break;
425+
}
418426

419427
return NULL;
420428
}
@@ -1126,7 +1134,7 @@ duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
11261134
/* Generate the instructions (including reg_moves) for prolog & epilog. */
11271135
static void
11281136
generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
1129-
rtx count_reg, rtx count_init)
1137+
rtx count_reg, bool adjust_init)
11301138
{
11311139
int i;
11321140
int last_stage = PS_STAGE_COUNT (ps) - 1;
@@ -1135,12 +1143,12 @@ generate_prolog_epilog (partial_schedule_ptr ps, class loop *loop,
11351143
/* Generate the prolog, inserting its insns on the loop-entry edge. */
11361144
start_sequence ();
11371145

1138-
if (!count_init)
1146+
if (adjust_init)
11391147
{
11401148
/* Generate instructions at the beginning of the prolog to
1141-
adjust the loop count by STAGE_COUNT. If loop count is constant
1142-
(count_init), this constant is adjusted by STAGE_COUNT in
1143-
generate_prolog_epilog function. */
1149+
adjust the loop count by STAGE_COUNT. If loop count is constant
1150+
and it not used anywhere in prologue, this constant is adjusted by
1151+
STAGE_COUNT outside of generate_prolog_epilog function. */
11441152
rtx sub_reg = NULL_RTX;
11451153

11461154
sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, count_reg,
@@ -1528,7 +1536,8 @@ sms_schedule (void)
15281536
rtx_insn *count_init;
15291537
int mii, rec_mii, stage_count, min_cycle;
15301538
int64_t loop_count = 0;
1531-
bool opt_sc_p;
1539+
bool opt_sc_p, adjust_inplace = false;
1540+
basic_block pre_header;
15321541

15331542
if (! (g = g_arr[loop->num]))
15341543
continue;
@@ -1569,19 +1578,13 @@ sms_schedule (void)
15691578
}
15701579

15711580

1572-
/* In case of th loop have doloop register it gets special
1573-
handling. */
1574-
count_init = NULL;
1575-
if ((count_reg = doloop_register_get (head, tail)))
1576-
{
1577-
basic_block pre_header;
1578-
1579-
pre_header = loop_preheader_edge (loop)->src;
1580-
count_init = const_iteration_count (count_reg, pre_header,
1581-
&loop_count);
1582-
}
1581+
count_reg = doloop_register_get (head, tail);
15831582
gcc_assert (count_reg);
15841583

1584+
pre_header = loop_preheader_edge (loop)->src;
1585+
count_init = const_iteration_count (count_reg, pre_header, &loop_count,
1586+
&adjust_inplace);
1587+
15851588
if (dump_file && count_init)
15861589
{
15871590
fprintf (dump_file, "SMS const-doloop ");
@@ -1701,9 +1704,20 @@ sms_schedule (void)
17011704
print_partial_schedule (ps, dump_file);
17021705
}
17031706

1704-
/* case the BCT count is not known , Do loop-versioning */
1705-
if (count_reg && ! count_init)
1707+
if (count_init)
1708+
{
1709+
if (adjust_inplace)
1710+
{
1711+
/* When possible, set new iteration count of loop kernel in
1712+
place. Otherwise, generate_prolog_epilog creates an insn
1713+
to adjust. */
1714+
SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1715+
- stage_count + 1);
1716+
}
1717+
}
1718+
else
17061719
{
1720+
/* case the BCT count is not known , Do loop-versioning */
17071721
rtx comp_rtx = gen_rtx_GT (VOIDmode, count_reg,
17081722
gen_int_mode (stage_count,
17091723
GET_MODE (count_reg)));
@@ -1713,12 +1727,7 @@ sms_schedule (void)
17131727
loop_version (loop, comp_rtx, &condition_bb,
17141728
prob, prob.invert (),
17151729
prob, prob.invert (), true);
1716-
}
1717-
1718-
/* Set new iteration count of loop kernel. */
1719-
if (count_reg && count_init)
1720-
SET_SRC (single_set (count_init)) = GEN_INT (loop_count
1721-
- stage_count + 1);
1730+
}
17221731

17231732
/* Now apply the scheduled kernel to the RTL of the loop. */
17241733
permute_partial_schedule (ps, g->closing_branch->first_note);
@@ -1735,7 +1744,7 @@ sms_schedule (void)
17351744
if (dump_file)
17361745
print_node_sched_params (dump_file, g->num_nodes, ps);
17371746
/* Generate prolog and epilog. */
1738-
generate_prolog_epilog (ps, loop, count_reg, count_init);
1747+
generate_prolog_epilog (ps, loop, count_reg, !adjust_inplace);
17391748
break;
17401749
}
17411750

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/* PR rtl-optimization/97421 */
2+
/* { dg-additional-options "-fmodulo-sched" } */
3+
4+
int a, b, d, e;
5+
int *volatile c = &a;
6+
7+
__attribute__((noinline))
8+
void f(void)
9+
{
10+
for (int g = 2; g >= 0; g--) {
11+
d = 0;
12+
for (b = 0; b <= 2; b++)
13+
;
14+
e = *c;
15+
}
16+
}
17+
18+
int main(void)
19+
{
20+
f();
21+
if (b != 3)
22+
__builtin_abort();
23+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
/* PR rtl-optimization/97421 */
2+
/* { dg-additional-options "-fmodulo-sched -fno-dce -fno-strict-aliasing" } */
3+
4+
static int a, b, c;
5+
int *d = &c;
6+
int **e = &d;
7+
int ***f = &e;
8+
int main()
9+
{
10+
int h;
11+
for (a = 2; a; a--)
12+
for (h = 0; h <= 2; h++)
13+
for (b = 0; b <= 2; b++)
14+
***f = 6;
15+
16+
if (b != 3)
17+
__builtin_abort();
18+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
/* PR rtl-optimization/97421 */
2+
/* { dg-additional-options "-fmodulo-sched" } */
3+
4+
int a, b, c;
5+
short d;
6+
void e(void) {
7+
unsigned f = 0;
8+
for (; f <= 2; f++) {
9+
int g[1];
10+
int h = (long)g;
11+
c = 0;
12+
for (; c < 10; c++)
13+
g[0] = a = 0;
14+
for (; a <= 2; a++)
15+
b = d;
16+
}
17+
}
18+
int main(void) {
19+
e();
20+
if (a != 3)
21+
__builtin_abort();
22+
}

0 commit comments

Comments
 (0)