Merge lp:~eres/gcc-linaro/sms_doloop_for_ARM_new_version into lp:gcc-linaro/4.5
- sms_doloop_for_ARM_new_version
- Merge into 4.5
Status: | Merged |
---|---|
Approved by: | Richard Sandiford |
Approved revision: | no longer in the source branch. |
Merged at revision: | 99511 |
Proposed branch: | lp:~eres/gcc-linaro/sms_doloop_for_ARM_new_version |
Merge into: | lp:gcc-linaro/4.5 |
Diff against target: |
589 lines (+131/-226) 6 files modified
ChangeLog.linaro (+35/-0) gcc/ddg.c (+7/-73) gcc/ddg.h (+1/-1) gcc/doc/invoke.texi (+4/-0) gcc/modulo-sched.c (+79/-152) gcc/params.def (+5/-0) |
To merge this branch: | bzr merge lp:~eres/gcc-linaro/sms_doloop_for_ARM_new_version |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Richard Sandiford | Approve | ||
Review via email:
|
Commit message
Description of the change
Implement doloop pattern for ARM and support it in SMS.
The change consists of these patches that sent upstream:
1) Doloop pattern for ARM:
http://
2) Support closing_
http://
3) New flag to apply SMS when SC equals 1
http://

Linaro Toolchain Builder (cbuild) wrote : | # |

Linaro Toolchain Builder (cbuild) wrote : | # |
cbuild successfully built this on armv7l-
The build results are available at:
http://
The test suite results were unchanged compared to the branch point lp:gcc-linaro+bzr99506.
The full testsuite results are at:
http://
cbuild-checked: armv7l-

Linaro Toolchain Builder (cbuild) wrote : | # |
cbuild successfully built this on i686-lucid-
The build results are available at:
http://
The test suite results were unchanged compared to the branch point lp:gcc-linaro+bzr99506.
The full testsuite results are at:
http://
cbuild-checked: i686-lucid-

Richard Sandiford (rsandifo) wrote : | # |
OK.
(I don't know the code well enough to offer a meaningful review,
but upstream accepted the patches concurrently with this merge
request.)
Preview Diff
1 | === modified file 'ChangeLog.linaro' |
2 | --- ChangeLog.linaro 2011-05-06 11:02:43 +0000 |
3 | +++ ChangeLog.linaro 2011-05-11 08:13:47 +0000 |
4 | @@ -1,3 +1,38 @@ |
5 | +2011-05-11 Revital Eres <revital.eres@linaro.org> |
6 | + |
7 | + gcc/ |
8 | + * loop-doloop.c (doloop_condition_get): Support new form of |
9 | + doloop pattern and use prev_nondebug_insn instead of PREV_INSN. |
10 | + * config/arm/thumb2.md (*thumb2_addsi3_compare0): Remove "*". |
11 | + (doloop_end): New. |
12 | + * config/arm/arm.md (*addsi3_compare0): Remove "*". |
13 | + * params.def (sms-min-sc): New param flag. |
14 | + * doc/invoke.texi (sms-min-sc): Document it. |
15 | + * ddg.c (create_ddg_dep_from_intra_loop_link): If a true dep edge |
16 | + enters the branch create an anti edge in the opposite direction |
17 | + to prevent the creation of reg-moves. |
18 | + (get_node_of_insn_uid, check_closing_branch_deps): Delete |
19 | + functions. |
20 | + (create_ddg): Restore previous definition and implementation. |
21 | + * ddg.h (create_ddg): Restore previous definition. |
22 | + * modulo-sched.c: Adjust comment to reflect the fact we are |
23 | + scheduling closing branch. |
24 | + (PS_STAGE_COUNT): Rename to CALC_STAGE_COUNT and redefine. |
25 | + (stage_count): New field in struct partial_schedule. |
26 | + (calculate_stage_count): New function. |
27 | + (normalize_sched_times): Rename to reset_sched_times and handle |
28 | + incrementing the sched time of the nodes by a constant value |
29 | + passed as parameter. |
30 | + (duplicate_insns_of_cycles): Skip closing branch. |
31 | + (sms_schedule_by_order): Schedule closing branch. |
32 | + (ps_insn_find_column): Handle closing branch. |
33 | + (sms_schedule): Call reset_sched_times and adjust the code to |
34 | + support scheduling of the closing branch. Use sms-min-sc. |
35 | + Support new form of doloop pattern. |
36 | + (ps_insert_empty_row): Update calls to normalize_sched_times |
37 | + and rotate_partial_schedule functions. |
38 | + (mark_doloop_insns): Remove. |
39 | + |
40 | 2011-05-06 Richard Sandiford <richard.sandiford@linaro.org> |
41 | |
42 | gcc/ |
43 | |
44 | === modified file 'gcc/ddg.c' |
45 | --- gcc/ddg.c 2011-03-24 07:45:38 +0000 |
46 | +++ gcc/ddg.c 2011-05-11 08:13:47 +0000 |
47 | @@ -60,8 +60,6 @@ |
48 | static ddg_edge_ptr create_ddg_edge (ddg_node_ptr, ddg_node_ptr, dep_type, |
49 | dep_data_type, int, int); |
50 | static void add_edge_to_ddg (ddg_ptr g, ddg_edge_ptr); |
51 | -static ddg_node_ptr get_node_of_insn_uid (ddg_ptr, int); |
52 | - |
53 | |
54 | |
55 | /* Auxiliary variable for mem_read_insn_p/mem_write_insn_p. */ |
56 | static bool mem_ref_p; |
57 | @@ -199,6 +197,11 @@ |
58 | } |
59 | } |
60 | |
61 | + /* If a true dep edge enters the branch create an anti edge in the |
62 | + opposite direction to prevent the creation of reg-moves. */ |
63 | + if ((DEP_TYPE (link) == REG_DEP_TRUE) && JUMP_P (dest_node->insn)) |
64 | + create_ddg_dep_no_link (g, dest_node, src_node, ANTI_DEP, REG_DEP, 1); |
65 | + |
66 | latency = dep_cost (link); |
67 | e = create_ddg_edge (src_node, dest_node, t, dt, latency, distance); |
68 | add_edge_to_ddg (g, e); |
69 | @@ -452,65 +455,12 @@ |
70 | sched_free_deps (head, tail, false); |
71 | } |
72 | |
73 | -/* Given DOLOOP_INSNS which holds the instructions that |
74 | - belong to the do-loop part; mark closing_branch_deps field in ddg G |
75 | - as TRUE if the do-loop part's instructions are dependent on the other |
76 | - loop instructions. Otherwise mark it as FALSE. */ |
77 | -static void |
78 | -check_closing_branch_deps (ddg_ptr g, sbitmap doloop_insns) |
79 | -{ |
80 | - sbitmap_iterator sbi; |
81 | - unsigned int u = 0; |
82 | - |
83 | - EXECUTE_IF_SET_IN_SBITMAP (doloop_insns, 0, u, sbi) |
84 | - { |
85 | - ddg_edge_ptr e; |
86 | - ddg_node_ptr u_node = get_node_of_insn_uid (g, u); |
87 | - |
88 | - gcc_assert (u_node); |
89 | - |
90 | - for (e = u_node->in; e != 0; e = e->next_in) |
91 | - { |
92 | - ddg_node_ptr v_node = e->src; |
93 | - |
94 | - if (((unsigned int) INSN_UID (v_node->insn) == u) |
95 | - || DEBUG_INSN_P (v_node->insn)) |
96 | - continue; |
97 | - |
98 | - /* Ignore dependencies between memory writes and the |
99 | - jump. */ |
100 | - if (JUMP_P (u_node->insn) |
101 | - && e->type == OUTPUT_DEP |
102 | - && mem_write_insn_p (v_node->insn)) |
103 | - continue; |
104 | - if (!TEST_BIT (doloop_insns, INSN_UID (v_node->insn))) |
105 | - { |
106 | - g->closing_branch_deps = 1; |
107 | - return; |
108 | - } |
109 | - } |
110 | - for (e = u_node->out; e != 0; e = e->next_out) |
111 | - { |
112 | - ddg_node_ptr v_node = e->dest; |
113 | - |
114 | - if (((unsigned int) INSN_UID (v_node->insn) == u) |
115 | - || DEBUG_INSN_P (v_node->insn)) |
116 | - continue; |
117 | - if (!TEST_BIT (doloop_insns, INSN_UID (v_node->insn))) |
118 | - { |
119 | - g->closing_branch_deps = 1; |
120 | - return; |
121 | - } |
122 | - } |
123 | - } |
124 | - g->closing_branch_deps = 0; |
125 | -} |
126 | |
127 | /* Given a basic block, create its DDG and return a pointer to a variable |
128 | of ddg type that represents it. |
129 | Initialize the ddg structure fields to the appropriate values. */ |
130 | ddg_ptr |
131 | -create_ddg (basic_block bb, sbitmap doloop_insns) |
132 | +create_ddg (basic_block bb, int closing_branch_deps) |
133 | { |
134 | ddg_ptr g; |
135 | rtx insn, first_note; |
136 | @@ -520,6 +470,7 @@ |
137 | g = (ddg_ptr) xcalloc (1, sizeof (struct ddg)); |
138 | |
139 | g->bb = bb; |
140 | + g->closing_branch_deps = closing_branch_deps; |
141 | |
142 | /* Count the number of insns in the BB. */ |
143 | for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); |
144 | @@ -592,11 +543,6 @@ |
145 | /* Build the data dependency graph. */ |
146 | build_intra_loop_deps (g); |
147 | build_inter_loop_deps (g); |
148 | - |
149 | - /* Check whether the do-loop part is decoupled from the other loop |
150 | - instructions. */ |
151 | - check_closing_branch_deps (g, doloop_insns); |
152 | - |
153 | return g; |
154 | } |
155 | |
156 | @@ -890,18 +836,6 @@ |
157 | return NULL; |
158 | } |
159 | |
160 | -/* Given the uid of an instruction UID return the node that represents it. */ |
161 | -static ddg_node_ptr |
162 | -get_node_of_insn_uid (ddg_ptr g, int uid) |
163 | -{ |
164 | - int i; |
165 | - |
166 | - for (i = 0; i < g->num_nodes; i++) |
167 | - if (uid == INSN_UID (g->nodes[i].insn)) |
168 | - return &g->nodes[i]; |
169 | - return NULL; |
170 | -} |
171 | - |
172 | /* Given a set OPS of nodes in the DDG, find the set of their successors |
173 | which are not in OPS, and set their bits in SUCC. Bits corresponding to |
174 | OPS are cleared from SUCC. Leaves the other bits in SUCC unchanged. */ |
175 | |
176 | === modified file 'gcc/ddg.h' |
177 | --- gcc/ddg.h 2011-03-24 07:45:38 +0000 |
178 | +++ gcc/ddg.h 2011-05-11 08:13:47 +0000 |
179 | @@ -167,7 +167,7 @@ |
180 | }; |
181 | |
182 | |
183 | |
184 | -ddg_ptr create_ddg (basic_block, sbitmap); |
185 | +ddg_ptr create_ddg (basic_block, int closing_branch_deps); |
186 | void free_ddg (ddg_ptr); |
187 | |
188 | void print_ddg (FILE *, ddg_ptr); |
189 | |
190 | === modified file 'gcc/doc/invoke.texi' |
191 | --- gcc/doc/invoke.texi 2011-04-17 23:04:58 +0000 |
192 | +++ gcc/doc/invoke.texi 2011-05-11 08:13:47 +0000 |
193 | @@ -8430,6 +8430,10 @@ |
194 | The maximum number of best instructions in the ready list that are considered |
195 | for renaming in the selective scheduler. The default value is 2. |
196 | |
197 | +@item sms-min-sc |
198 | +The minimum value of stage count that swing modulo scheduler will |
199 | +generate. The default value is 2. |
200 | + |
201 | @item max-last-value-rtl |
202 | The maximum size measured as number of RTLs that can be recorded in an expression |
203 | in combiner for a pseudo register as last known value of that register. The default |
204 | |
205 | === modified file 'gcc/modulo-sched.c' |
206 | --- gcc/modulo-sched.c 2011-03-24 07:45:38 +0000 |
207 | +++ gcc/modulo-sched.c 2011-05-11 08:13:47 +0000 |
208 | @@ -84,14 +84,13 @@ |
209 | II cycles (i.e. use register copies to prevent a def from overwriting |
210 | itself before reaching the use). |
211 | |
212 | - SMS works with countable loops (1) whose control part can be easily |
213 | - decoupled from the rest of the loop and (2) whose loop count can |
214 | - be easily adjusted. This is because we peel a constant number of |
215 | - iterations into a prologue and epilogue for which we want to avoid |
216 | - emitting the control part, and a kernel which is to iterate that |
217 | - constant number of iterations less than the original loop. So the |
218 | - control part should be a set of insns clearly identified and having |
219 | - its own iv, not otherwise used in the loop (at-least for now), which |
220 | + SMS works with countable loops whose loop count can be easily |
221 | + adjusted. This is because we peel a constant number of iterations |
222 | + into a prologue and epilogue for which we want to avoid emitting |
223 | + the control part, and a kernel which is to iterate that constant |
224 | + number of iterations less than the original loop. So the control |
225 | + part should be a set of insns clearly identified and having its |
226 | + own iv, not otherwise used in the loop (at-least for now), which |
227 | initializes a register before the loop to the number of iterations. |
228 | Currently SMS relies on the do-loop pattern to recognize such loops, |
229 | where (1) the control part comprises of all insns defining and/or |
230 | @@ -116,7 +115,7 @@ |
231 | |
232 | /* The number of different iterations the nodes in ps span, assuming |
233 | the stage boundaries are placed efficiently. */ |
234 | -#define CALC_STAGE_COUNT(min_cycle,max_cycle,ii) ((max_cycle - min_cycle \ |
235 | +#define CALC_STAGE_COUNT(max_cycle,min_cycle,ii) ((max_cycle - min_cycle \ |
236 | + 1 + ii - 1) / ii) |
237 | /* The stage count of ps. */ |
238 | #define PS_STAGE_COUNT(ps) (((partial_schedule_ptr)(ps))->stage_count) |
239 | @@ -200,7 +199,6 @@ |
240 | static void duplicate_insns_of_cycles (partial_schedule_ptr, |
241 | int, int, int, rtx); |
242 | static int calculate_stage_count (partial_schedule_ptr ps); |
243 | - |
244 | #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap) |
245 | #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time) |
246 | #define SCHED_FIRST_REG_MOVE(x) \ |
247 | @@ -318,7 +316,7 @@ |
248 | : prev_nondebug_insn (tail)); |
249 | |
250 | for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn)) |
251 | - if (reg_mentioned_p (reg, insn) && NONDEBUG_INSN_P (insn)) |
252 | + if (reg_mentioned_p (reg, insn) && !DEBUG_INSN_P (insn)) |
253 | { |
254 | if (dump_file) |
255 | { |
256 | @@ -337,24 +335,6 @@ |
257 | #endif |
258 | } |
259 | |
260 | -/* Mark in DOLOOP_INSNS the instructions that belong to the do-loop part. |
261 | - Use TAIL to recognize that part. */ |
262 | -static void |
263 | -mark_doloop_insns (sbitmap doloop_insns, rtx tail) |
264 | -{ |
265 | - rtx first_insn_not_to_check, insn; |
266 | - |
267 | - /* This is the first instruction which belongs the doloop part. */ |
268 | - first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail |
269 | - : prev_nondebug_insn (tail)); |
270 | - |
271 | - sbitmap_zero (doloop_insns); |
272 | - for (insn = first_insn_not_to_check; insn != NEXT_INSN (tail); |
273 | - insn = NEXT_INSN (insn)) |
274 | - if (NONDEBUG_INSN_P (insn)) |
275 | - SET_BIT (doloop_insns, INSN_UID (insn)); |
276 | -} |
277 | - |
278 | /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so |
279 | that the number of iterations is a compile-time constant. If so, |
280 | return the rtx that sets COUNT_REG to a constant, and set COUNT to |
281 | @@ -607,44 +587,42 @@ |
282 | ddg_node_ptr u = crr_insn->node; |
283 | int normalized_time = SCHED_TIME (u) - amount; |
284 | int new_min_cycle = PS_MIN_CYCLE (ps) - amount; |
285 | - /* The first cycle in row zero after the rotation. */ |
286 | - int new_first_cycle_in_row_zero = |
287 | - new_min_cycle + ii - SMODULO (new_min_cycle, ii); |
288 | + int sc_until_cycle_zero, stage; |
289 | |
290 | - if (dump_file) |
291 | - fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\ |
292 | - min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME |
293 | - (u), ps->min_cycle); |
294 | + if (dump_file) |
295 | + { |
296 | + /* Print the scheduling times after the rotation. */ |
297 | + fprintf (dump_file, "crr_insn->node=%d (insn id %d), " |
298 | + "crr_insn->cycle=%d, min_cycle=%d", crr_insn->node->cuid, |
299 | + INSN_UID (crr_insn->node->insn), SCHED_TIME (u), |
300 | + normalized_time); |
301 | + if (JUMP_P (crr_insn->node->insn)) |
302 | + fprintf (dump_file, " (branch)"); |
303 | + fprintf (dump_file, "\n"); |
304 | + } |
305 | + |
306 | gcc_assert (SCHED_TIME (u) >= ps->min_cycle); |
307 | gcc_assert (SCHED_TIME (u) <= ps->max_cycle); |
308 | SCHED_TIME (u) = normalized_time; |
309 | - crr_insn->cycle = normalized_time; |
310 | SCHED_ROW (u) = SMODULO (normalized_time, ii); |
311 | - |
312 | - /* If min_cycle is in row zero after the rotation then |
313 | - the stage count can be calculated by dividing the cycle |
314 | - with ii. Otherwise, the calculation is done by dividing the |
315 | - SMSed kernel into two intervals: |
316 | - |
317 | - 1) min_cycle <= interval 0 < first_cycle_in_row_zero |
318 | - 2) first_cycle_in_row_zero <= interval 1 < max_cycle |
319 | - |
320 | - Cycles in interval 0 are in stage 0. The stage of cycles |
321 | - in interval 1 should be added by 1 to take interval 0 into |
322 | - account. */ |
323 | - if (SMODULO (new_min_cycle, ii) == 0) |
324 | - SCHED_STAGE (u) = normalized_time / ii; |
325 | - else |
326 | - { |
327 | - if (crr_insn->cycle < new_first_cycle_in_row_zero) |
328 | - SCHED_STAGE (u) = 0; |
329 | - else |
330 | - SCHED_STAGE (u) = |
331 | - ((SCHED_TIME (u) - new_first_cycle_in_row_zero) / ii) + 1; |
332 | + |
333 | + /* The calculation of stage count is done adding the number |
334 | + of stages before cycle zero and after cycle zero. */ |
335 | + sc_until_cycle_zero = CALC_STAGE_COUNT (-1, new_min_cycle, ii); |
336 | + |
337 | + if (SCHED_TIME (u) < 0) |
338 | + { |
339 | + stage = CALC_STAGE_COUNT (-1, SCHED_TIME (u), ii); |
340 | + SCHED_STAGE (u) = sc_until_cycle_zero - stage; |
341 | + } |
342 | + else |
343 | + { |
344 | + stage = CALC_STAGE_COUNT (SCHED_TIME (u), 0, ii); |
345 | + SCHED_STAGE (u) = sc_until_cycle_zero + stage - 1; |
346 | } |
347 | } |
348 | } |
349 | - |
350 | + |
351 | /* Set SCHED_COLUMN of each node according to its position in PS. */ |
352 | static void |
353 | set_columns_for_ps (partial_schedule_ptr ps) |
354 | @@ -694,8 +672,8 @@ |
355 | |
356 | /* Do not duplicate any insn which refers to count_reg as it |
357 | belongs to the control part. |
358 | - If closing_branch_deps is true the closing branch is scheduled |
359 | - as well and thus should be ignored. |
360 | + The closing branch is scheduled as well and thus should |
361 | + be ignored. |
362 | TODO: This should be done by analyzing the control part of |
363 | the loop. */ |
364 | if (reg_mentioned_p (count_reg, u_node->insn) |
365 | @@ -945,8 +923,7 @@ |
366 | basic_block condition_bb = NULL; |
367 | edge latch_edge; |
368 | gcov_type trip_count = 0; |
369 | - sbitmap doloop_insns; |
370 | - |
371 | + |
372 | loop_optimizer_init (LOOPS_HAVE_PREHEADERS |
373 | | LOOPS_HAVE_RECORDED_EXITS); |
374 | if (number_of_loops () <= 1) |
375 | @@ -971,7 +948,6 @@ |
376 | setup_sched_infos (); |
377 | haifa_sched_init (); |
378 | |
379 | - doloop_insns = sbitmap_alloc (get_max_uid () + 1); |
380 | /* Allocate memory to hold the DDG array one entry for each loop. |
381 | We use loop->num as index into this array. */ |
382 | g_arr = XCNEWVEC (ddg_ptr, number_of_loops ()); |
383 | @@ -1104,16 +1080,18 @@ |
384 | |
385 | continue; |
386 | } |
387 | - mark_doloop_insns (doloop_insns, tail); |
388 | - if (! (g = create_ddg (bb, doloop_insns))) |
389 | + |
390 | + /* Always schedule the closing branch with the rest of the |
391 | + instructions. The branch is rotated to be in row ii-1 at the |
392 | + end of the scheduling procedure to make sure it's the last |
393 | + instruction in the iteration. */ |
394 | + if (! (g = create_ddg (bb, 1))) |
395 | { |
396 | if (dump_file) |
397 | fprintf (dump_file, "SMS create_ddg failed\n"); |
398 | continue; |
399 | } |
400 | - if (dump_file) |
401 | - fprintf (dump_file, "SMS closing_branch_deps: %d\n", |
402 | - g->closing_branch_deps); |
403 | + |
404 | g_arr[loop->num] = g; |
405 | if (dump_file) |
406 | fprintf (dump_file, "...OK\n"); |
407 | @@ -1215,16 +1193,17 @@ |
408 | |
409 | ps = sms_schedule_by_order (g, mii, maxii, node_order); |
410 | |
411 | - if (ps) |
412 | - { |
413 | - stage_count = calculate_stage_count (ps); |
414 | - gcc_assert(stage_count >= 1); |
415 | - PS_STAGE_COUNT(ps) = stage_count; |
416 | - } |
417 | - |
418 | - /* Stage count of 1 means that there is no interleaving between |
419 | - iterations, let the scheduling passes do the job. */ |
420 | - if (stage_count <= 1 |
421 | + if (ps) |
422 | + { |
423 | + stage_count = calculate_stage_count (ps); |
424 | + gcc_assert(stage_count >= 1); |
425 | + PS_STAGE_COUNT(ps) = stage_count; |
426 | + } |
427 | + |
428 | + /* The default value of PARAM_SMS_MIN_SC is 2 as stage count of |
429 | + 1 means that there is no interleaving between iterations thus |
430 | + we let the scheduling passes do the job in this case. */ |
431 | + if (stage_count < (unsigned) PARAM_VALUE (PARAM_SMS_MIN_SC) |
432 | || (count_init && (loop_count <= stage_count)) |
433 | || (flag_branch_probabilities && (trip_count <= stage_count))) |
434 | { |
435 | @@ -1242,21 +1221,12 @@ |
436 | else |
437 | { |
438 | struct undo_replace_buff_elem *reg_move_replaces; |
439 | - int amount; |
440 | - |
441 | - /* Set the stage boundaries. If the DDG is built with closing_branch_deps, |
442 | - the closing_branch was scheduled and should appear in the last (ii-1) |
443 | - row. Otherwise, we are free to schedule the branch, and we let nodes |
444 | - that were scheduled at the first PS_MIN_CYCLE cycle appear in the first |
445 | - row; this should reduce stage_count to minimum. |
446 | - TODO: Revisit the issue of scheduling the insns of the |
447 | - control part relative to the branch when the control part |
448 | - has more than one insn. */ |
449 | - amount = (g->closing_branch_deps)? SCHED_TIME (g->closing_branch) + 1: |
450 | - PS_MIN_CYCLE (ps); |
451 | + int amount = SCHED_TIME (g->closing_branch) + 1; |
452 | + |
453 | + /* Set the stage boundaries. The closing_branch was scheduled |
454 | + and should appear in the last (ii-1) row. */ |
455 | reset_sched_times (ps, amount); |
456 | rotate_partial_schedule (ps, amount); |
457 | - |
458 | set_columns_for_ps (ps); |
459 | |
460 | canon_loop (loop); |
461 | @@ -1267,13 +1237,8 @@ |
462 | "SMS succeeded %d %d (with ii, sc)\n", ps->ii, |
463 | stage_count); |
464 | print_partial_schedule (ps, dump_file); |
465 | - if (!g->closing_branch_deps) |
466 | - fprintf (dump_file, |
467 | - "SMS Branch (%d) will later be scheduled at \ |
468 | - cycle %d.\n", |
469 | - g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1); |
470 | - } |
471 | - |
472 | + } |
473 | + |
474 | /* case the BCT count is not known , Do loop-versioning */ |
475 | if (count_reg && ! count_init) |
476 | { |
477 | @@ -1318,7 +1283,6 @@ |
478 | } |
479 | |
480 | free (g_arr); |
481 | - sbitmap_free (doloop_insns); |
482 | |
483 | /* Release scheduler data, needed until now because of DFA. */ |
484 | haifa_sched_finish (); |
485 | @@ -1826,13 +1790,6 @@ |
486 | RESET_BIT (tobe_scheduled, u); |
487 | continue; |
488 | } |
489 | - /* Closing branch handled later unless closing_branch_deps |
490 | - is true. */ |
491 | - if (JUMP_P (insn) && !g->closing_branch_deps) |
492 | - { |
493 | - RESET_BIT (tobe_scheduled, u); |
494 | - continue; |
495 | - } |
496 | |
497 | if (TEST_BIT (sched_nodes, u)) |
498 | continue; |
499 | @@ -2675,9 +2632,9 @@ |
500 | last_in_row = next_ps_i; |
501 | } |
502 | |
503 | - /* If closing_branch_deps is true we are scheduling the closing |
504 | - branch as well. Make sure there is no dependent instruction after |
505 | - it as the branch should be the last instruction. */ |
506 | + /* The closing branch is scheduled as well. Make sure there is no |
507 | + dependent instruction after it as the branch should be the last |
508 | + instruction in the row. */ |
509 | if (JUMP_P (ps_i->node->insn)) |
510 | { |
511 | if (first_must_follow) |
512 | @@ -2918,51 +2875,21 @@ |
513 | return ps_i; |
514 | } |
515 | |
516 | -/* Calculate the stage count of the partial schedule PS. */ |
517 | +/* Calculate the stage count of the partial schedule PS. The calculation |
518 | + takes into account the rotation to bring the closing branch to row |
519 | + ii-1. */ |
520 | int |
521 | calculate_stage_count (partial_schedule_ptr ps) |
522 | { |
523 | - int stage_count; |
524 | - |
525 | - /* If closing_branch_deps is false then the stage |
526 | - boundaries are placed efficiently, meaning that min_cycle will be |
527 | - placed at row 0. Otherwise, the closing branch will be placed in |
528 | - row ii-1. For the later case we assume the final SMSed kernel can |
529 | - be divided into two intervals. This assumption is used for the |
530 | - stage count calculation: |
531 | - |
532 | - 1) min_cycle <= interval 0 < first_cycle_in_row_zero |
533 | - 2) first_cycle_in_row_zero <= interval 1 < max_cycle |
534 | - */ |
535 | - stage_count = |
536 | - CALC_STAGE_COUNT (PS_MIN_CYCLE (ps), PS_MAX_CYCLE (ps), ps->ii); |
537 | - if (ps->g->closing_branch_deps) |
538 | - { |
539 | - int new_min_cycle; |
540 | - int new_min_cycle_row; |
541 | - int rotation_amount = SCHED_TIME (ps->g->closing_branch) + 1; |
542 | - |
543 | - /* This is the new value of min_cycle after the final rotation to |
544 | - bring closing branch into row ii-1. */ |
545 | - new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount; |
546 | - /* This is the row which the the new min_cycle will be placed in. */ |
547 | - new_min_cycle_row = SMODULO (new_min_cycle, ps->ii); |
548 | - /* If the row of min_cycle is zero then interval 0 is empty. |
549 | - Otherwise, we need to calculate interval 1 and add it by one |
550 | - to take interval 0 into account. */ |
551 | - if (new_min_cycle_row != 0) |
552 | - { |
553 | - int new_max_cycle, first_cycle_in_row_zero; |
554 | - |
555 | - new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount; |
556 | - first_cycle_in_row_zero = |
557 | - new_min_cycle + ps->ii - new_min_cycle_row; |
558 | - |
559 | - stage_count = |
560 | - CALC_STAGE_COUNT (first_cycle_in_row_zero, new_max_cycle, |
561 | - ps->ii) + 1; |
562 | - } |
563 | - } |
564 | + int rotation_amount = (SCHED_TIME (ps->g->closing_branch)) + 1; |
565 | + int new_min_cycle = PS_MIN_CYCLE (ps) - rotation_amount; |
566 | + int new_max_cycle = PS_MAX_CYCLE (ps) - rotation_amount; |
567 | + int stage_count = CALC_STAGE_COUNT (-1, new_min_cycle, ps->ii); |
568 | + |
569 | + /* The calculation of stage count is done adding the number of stages |
570 | + before cycle zero and after cycle zero. */ |
571 | + stage_count += CALC_STAGE_COUNT (new_max_cycle, 0, ps->ii); |
572 | + |
573 | return stage_count; |
574 | } |
575 | |
576 | |
577 | === modified file 'gcc/params.def' |
578 | --- gcc/params.def 2011-02-01 14:20:13 +0000 |
579 | +++ gcc/params.def 2011-05-11 08:13:47 +0000 |
580 | @@ -324,6 +324,11 @@ |
581 | "sms-max-ii-factor", |
582 | "A factor for tuning the upper bound that swing modulo scheduler uses for scheduling a loop", |
583 | 100, 0, 0) |
584 | +/* The minimum value of stage count that swing modulo scheduler will generate. */ |
585 | +DEFPARAM(PARAM_SMS_MIN_SC, |
586 | + "sms-min-sc", |
587 | + "The minimum value of stage count that swing modulo scheduler will generate.", |
588 | + 2, 1, 1) |
589 | DEFPARAM(PARAM_SMS_DFA_HISTORY, |
590 | "sms-dfa-history", |
591 | "The number of cycles the swing modulo scheduler considers when checking conflicts using DFA", |
cbuild has taken a snapshot of this branch at r99507 and queued it for build.
The snapshot is available at: ex.seabright. co.nz/snapshots /gcc-linaro- 4.5+bzr99507~ eres~sms_ doloop_ for_ARM_ new_version. tar.xdelta3. xz
http://
and will be built on the following builders:
a9-builder i686 x86_64
You can track the build queue at: ex.seabright. co.nz/helpers/ scheduler
http://
cbuild-snapshot: gcc-linaro- 4.5+bzr99507~ eres~sms_ doloop_ for_ARM_ new_version
cbuild-ancestor: lp:gcc-linaro+bzr99506
cbuild-state: check