Merge lp:~ams-codesourcery/gcc-linaro/lp663939 into lp:gcc-linaro/4.6

Proposed by Andrew Stubbs
Status: Superseded
Proposed branch: lp:~ams-codesourcery/gcc-linaro/lp663939
Merge into: lp:gcc-linaro/4.6
Diff against target: 949 lines (+542/-206) (has conflicts)
9 files modified
ChangeLog.linaro (+56/-0)
gcc/config/arm/arm-protos.h (+1/-0)
gcc/config/arm/arm.c (+307/-196)
gcc/config/arm/arm.md (+13/-9)
gcc/config/arm/constraints.md (+13/-1)
gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c (+27/-0)
gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c (+75/-0)
gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c (+28/-0)
gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c (+22/-0)
Text conflict in ChangeLog.linaro
To merge this branch: bzr merge lp:~ams-codesourcery/gcc-linaro/lp663939
Reviewer Review Type Date Requested Status
Linaro Toolchain Developers Pending
Review via email: mp+45750@code.launchpad.net

This proposal has been superseded by a proposal from 2011-06-02.

Description of the change

This patch improves support for Thumb replicated constants, adds support for ADDW and SUBW, and ensures that the most efficient sense is used (inverted, negated, or normal).

This addresses the problems identified in LP:663939.

It is awaiting upstream approval here:
 http://old.nabble.com/-PATCH--ARM--Thumb2-constant-loading-optimization-to30405142.html

To post a comment you must log in.
Revision history for this message
Loïc Minier (lool) wrote :

Looks like we should ping upstream again here

Revision history for this message
Loïc Minier (lool) wrote :

11:57 < lool> ams_cs: Is
https://code.launchpad.net/~ams-codesourcery/gcc-linaro/lp663939/+merge/45750
              still work in progress? It seems really old now
[...]
11:58 < ams_cs> lool: last activity 12th april
11:58 < ams_cs> lool: I have to do some reworking
11:58 < ams_cs> lool: I've also discussed this patch with Ramana quite a bit

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'ChangeLog.linaro'
--- ChangeLog.linaro 2011-06-02 12:12:00 +0000
+++ ChangeLog.linaro 2011-06-02 12:58:15 +0000
@@ -1,3 +1,4 @@
1<<<<<<< TREE
12001-06-02 Richard Sandiford <richard.sandiford@linaro.org>22001-06-02 Richard Sandiford <richard.sandiford@linaro.org>
23
3 gcc/4 gcc/
@@ -336,6 +337,61 @@
336 * config/arm/arm.h (CANNOT_CHANGE_MODE_CLASS): Restrict FPA_REGS337 * config/arm/arm.h (CANNOT_CHANGE_MODE_CLASS): Restrict FPA_REGS
337 case to VFPv1.338 case to VFPv1.
338339
340=======
3412011-06-02 Andrew Stubbs <ams@codesourcery.com>
342
343 Backport of patch proposed for FSF:
344
345 2011-05-09 Andrew Stubbs <ams@codesourcery.com>
346
347 gcc/
348 * config/arm/arm.c (struct four_ints): New type.
349 (count_insns_for_constant): Delete function.
350 (find_best_start): Delete function.
351 (optimal_immediate_sequence): New function.
352 (optimal_immediate_sequence_1): New function.
353 (arm_gen_constant): Move constant splitting code to
354 optimal_immediate_sequence.
355 Rewrite constant negation/invertion code.
356
357 gcc/testsuite/
358 * gcc.target/arm/thumb2-replicated-constant1.c: New file.
359 * gcc.target/arm/thumb2-replicated-constant2.c: New file.
360 * gcc.target/arm/thumb2-replicated-constant3.c: New file.
361 * gcc.target/arm/thumb2-replicated-constant4.c: New file.
362
3632011-06-02 Andrew Stubbs <ams@codesourcery.com>
364
365 Backport of patch proposed for FSF:
366
367 2011-06-02 Andrew Stubbs <ams@codesourcery.com>
368
369 gcc/
370 * config/arm/arm-protos.h (const_ok_for_op): Add prototype.
371 * config/arm/arm.c (const_ok_for_op): Add support for addw/subw.
372 Remove prototype. Remove static function type.
373 * config/arm/arm.md (*arm_addsi3): Add addw/subw support.
374 Add arch attribute.
375 * config/arm/constraints.md (Pj, PJ): New constraints.
376
3772011-06-02 Andrew Stubbs <ams@codesourcery.com>
378
379 Backport from FSF:
380
381 2011-04-20 Andrew Stubbs <ams@codesourcery.com>
382
383 * config/arm/arm.c (arm_gen_constant): Move movw support ....
384 (const_ok_for_op): ... to here.
385
3862011-06-02 Andrew Stubbs <ams@codesourcery.com>
387
388 Backport from FSF:
389
390 2011-04-20 Andrew Stubbs <ams@codesourcery.com>
391
392 * config/arm/arm.c (arm_gen_constant): Remove redundant can_invert.
393
394>>>>>>> MERGE-SOURCE
3392011-05-26 Andrew Stubbs <ams@codesourcery.com>3952011-05-26 Andrew Stubbs <ams@codesourcery.com>
340396
341 Merge from FSF GCC 4.6 (svn branches/gcc-4_6-branch 174261).397 Merge from FSF GCC 4.6 (svn branches/gcc-4_6-branch 174261).
342398
=== modified file 'gcc/config/arm/arm-protos.h'
--- gcc/config/arm/arm-protos.h 2011-05-03 15:17:25 +0000
+++ gcc/config/arm/arm-protos.h 2011-06-02 12:58:15 +0000
@@ -46,6 +46,7 @@
46extern bool arm_small_register_classes_for_mode_p (enum machine_mode);46extern bool arm_small_register_classes_for_mode_p (enum machine_mode);
47extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode);47extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode);
48extern int const_ok_for_arm (HOST_WIDE_INT);48extern int const_ok_for_arm (HOST_WIDE_INT);
49extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
49extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,50extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
50 HOST_WIDE_INT, rtx, rtx, int);51 HOST_WIDE_INT, rtx, rtx, int);
51extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *);52extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *);
5253
=== modified file 'gcc/config/arm/arm.c'
--- gcc/config/arm/arm.c 2011-05-11 14:49:48 +0000
+++ gcc/config/arm/arm.c 2011-06-02 12:58:15 +0000
@@ -63,6 +63,11 @@
6363
64void (*arm_lang_output_object_attributes_hook)(void);64void (*arm_lang_output_object_attributes_hook)(void);
6565
66struct four_ints
67{
68 int i[4];
69};
70
66/* Forward function declarations. */71/* Forward function declarations. */
67static bool arm_needs_doubleword_align (enum machine_mode, const_tree);72static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
68static int arm_compute_static_chain_stack_bytes (void);73static int arm_compute_static_chain_stack_bytes (void);
@@ -81,7 +86,6 @@
81static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);86static bool arm_legitimate_address_p (enum machine_mode, rtx, bool);
82static int thumb_far_jump_used_p (void);87static int thumb_far_jump_used_p (void);
83static bool thumb_force_lr_save (void);88static bool thumb_force_lr_save (void);
84static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
85static rtx emit_sfm (int, int);89static rtx emit_sfm (int, int);
86static unsigned arm_size_return_regs (void);90static unsigned arm_size_return_regs (void);
87static bool arm_assemble_integer (rtx, unsigned int, int);91static bool arm_assemble_integer (rtx, unsigned int, int);
@@ -129,7 +133,13 @@
129static int arm_comp_type_attributes (const_tree, const_tree);133static int arm_comp_type_attributes (const_tree, const_tree);
130static void arm_set_default_type_attributes (tree);134static void arm_set_default_type_attributes (tree);
131static int arm_adjust_cost (rtx, rtx, rtx, int);135static int arm_adjust_cost (rtx, rtx, rtx, int);
132static int count_insns_for_constant (HOST_WIDE_INT, int);136static int optimal_immediate_sequence (enum rtx_code code,
137 unsigned HOST_WIDE_INT val,
138 struct four_ints *return_sequence);
139static int optimal_immediate_sequence_1 (enum rtx_code code,
140 unsigned HOST_WIDE_INT val,
141 struct four_ints *return_sequence,
142 int i);
133static int arm_get_strip_length (int);143static int arm_get_strip_length (int);
134static bool arm_function_ok_for_sibcall (tree, tree);144static bool arm_function_ok_for_sibcall (tree, tree);
135static enum machine_mode arm_promote_function_mode (const_tree,145static enum machine_mode arm_promote_function_mode (const_tree,
@@ -2453,7 +2463,7 @@
2453}2463}
24542464
2455/* Return true if I is a valid constant for the operation CODE. */2465/* Return true if I is a valid constant for the operation CODE. */
2456static int2466int
2457const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)2467const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
2458{2468{
2459 if (const_ok_for_arm (i))2469 if (const_ok_for_arm (i))
@@ -2461,7 +2471,21 @@
24612471
2462 switch (code)2472 switch (code)
2463 {2473 {
2474 case SET:
2475 /* See if we can use movw. */
2476 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
2477 return 1;
2478 else
2479 return 0;
2480
2464 case PLUS:2481 case PLUS:
2482 /* See if we can use addw or subw. */
2483 if (TARGET_THUMB2
2484 && ((i & 0xfffff000) == 0
2485 || ((-i) & 0xfffff000) == 0))
2486 return 1;
2487 /* else fall through. */
2488
2465 case COMPARE:2489 case COMPARE:
2466 case EQ:2490 case EQ:
2467 case NE:2491 case NE:
@@ -2577,68 +2601,41 @@
2577 1);2601 1);
2578}2602}
25792603
2580/* Return the number of instructions required to synthesize the given2604/* Return a sequence of integers, in RETURN_SEQUENCE that fit into
2581 constant, if we start emitting them from bit-position I. */2605 ARM/THUMB2 immediates, and add up to VAL.
2582static int2606 Thr function return value gives the number of insns required. */
2583count_insns_for_constant (HOST_WIDE_INT remainder, int i)2607static int
2584{2608optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
2585 HOST_WIDE_INT temp1;2609 struct four_ints *return_sequence)
2586 int step_size = TARGET_ARM ? 2 : 1;
2587 int num_insns = 0;
2588
2589 gcc_assert (TARGET_ARM || i == 0);
2590
2591 do
2592 {
2593 int end;
2594
2595 if (i <= 0)
2596 i += 32;
2597 if (remainder & (((1 << step_size) - 1) << (i - step_size)))
2598 {
2599 end = i - 8;
2600 if (end < 0)
2601 end += 32;
2602 temp1 = remainder & ((0x0ff << end)
2603 | ((i < end) ? (0xff >> (32 - end)) : 0));
2604 remainder &= ~temp1;
2605 num_insns++;
2606 i -= 8 - step_size;
2607 }
2608 i -= step_size;
2609 } while (remainder);
2610 return num_insns;
2611}
2612
2613static int
2614find_best_start (unsigned HOST_WIDE_INT remainder)
2615{2610{
2616 int best_consecutive_zeros = 0;2611 int best_consecutive_zeros = 0;
2617 int i;2612 int i;
2618 int best_start = 0;2613 int best_start = 0;
2614 int insns1, insns2;
2615 struct four_ints tmp_sequence;
26192616
2620 /* If we aren't targetting ARM, the best place to start is always at2617 /* If we aren't targetting ARM, the best place to start is always at
2621 the bottom. */2618 the bottom, otherwise look more closely. */
2622 if (! TARGET_ARM)2619 if (TARGET_ARM)
2623 return 0;
2624
2625 for (i = 0; i < 32; i += 2)
2626 {2620 {
2627 int consecutive_zeros = 0;2621 for (i = 0; i < 32; i += 2)
2628
2629 if (!(remainder & (3 << i)))
2630 {2622 {
2631 while ((i < 32) && !(remainder & (3 << i)))2623 int consecutive_zeros = 0;
2632 {2624
2633 consecutive_zeros += 2;2625 if (!(val & (3 << i)))
2634 i += 2;2626 {
2635 }2627 while ((i < 32) && !(val & (3 << i)))
2636 if (consecutive_zeros > best_consecutive_zeros)2628 {
2637 {2629 consecutive_zeros += 2;
2638 best_consecutive_zeros = consecutive_zeros;2630 i += 2;
2639 best_start = i - consecutive_zeros;2631 }
2640 }2632 if (consecutive_zeros > best_consecutive_zeros)
2641 i -= 2;2633 {
2634 best_consecutive_zeros = consecutive_zeros;
2635 best_start = i - consecutive_zeros;
2636 }
2637 i -= 2;
2638 }
2642 }2639 }
2643 }2640 }
26442641
@@ -2665,13 +2662,161 @@
2665 the constant starting from `best_start', and also starting from2662 the constant starting from `best_start', and also starting from
2666 zero (i.e. with bit 31 first to be output). If `best_start' doesn't2663 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2667 yield a shorter sequence, we may as well use zero. */2664 yield a shorter sequence, we may as well use zero. */
2665 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
2668 if (best_start != 02666 if (best_start != 0
2669 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)2667 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
2670 && (count_insns_for_constant (remainder, 0) <=2668 {
2671 count_insns_for_constant (remainder, best_start)))2669 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
2672 best_start = 0;2670 if (insns2 <= insns1)
26732671 {
2674 return best_start;2672 *return_sequence = tmp_sequence;
2673 insns1 = insns2;
2674 }
2675 }
2676
2677 return insns1;
2678}
2679
2680/* As for optimal_immediate_sequence, but starting at bit-position I. */
2681static int
2682optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
2683 struct four_ints *return_sequence, int i)
2684{
2685 int remainder = val & 0xffffffff;
2686 int insns = 0;
2687
2688 /* Try and find a way of doing the job in either two or three
2689 instructions.
2690
2691 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
2692 location. We start at position I. This may be the MSB, or
2693 optimial_immediate_sequence may have positioned it at the largest block
2694 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
2695 wrapping around to the top of the word when we drop off the bottom.
2696 In the worst case this code should produce no more than four insns.
2697
2698 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
2699 constants, shifted to any arbitrary location. We should always start
2700 at the MSB. */
2701 do
2702 {
2703 int end;
2704 int b1, b2, b3, b4;
2705 unsigned HOST_WIDE_INT result;
2706 int loc;
2707
2708 gcc_assert (insns < 4);
2709
2710 if (i <= 0)
2711 i += 32;
2712
2713 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
2714 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
2715 {
2716 loc = i;
2717 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
2718 /* We can use addw/subw for the last 12 bits. */
2719 result = remainder;
2720 else
2721 {
2722 /* Use an 8-bit shifted/rotated immediate. */
2723 end = i - 8;
2724 if (end < 0)
2725 end += 32;
2726 result = remainder & ((0x0ff << end)
2727 | ((i < end) ? (0xff >> (32 - end))
2728 : 0));
2729 i -= 8;
2730 }
2731 }
2732 else
2733 {
2734 /* Arm allows rotates by a multiple of two. Thumb-2 allows
2735 arbitrary shifts. */
2736 i -= TARGET_ARM ? 2 : 1;
2737 continue;
2738 }
2739
2740 /* Next, see if we can do a better job with a thumb2 replicated
2741 constant.
2742
2743 We do it this way around to catch the cases like 0x01F001E0 where
2744 two 8-bit immediates would work, but a replicated constant would
2745 make it worse.
2746
2747 TODO: 16-bit constants that don't clear all the bits, but still win.
2748 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
2749 if (TARGET_THUMB2)
2750 {
2751 b1 = (remainder & 0xff000000) >> 24;
2752 b2 = (remainder & 0x00ff0000) >> 16;
2753 b3 = (remainder & 0x0000ff00) >> 8;
2754 b4 = remainder & 0xff;
2755
2756 if (loc > 24)
2757 {
2758 /* The 8-bit immediate already found clears b1 (and maybe b2),
2759 but must leave b3 and b4 alone. */
2760
2761 /* First try to find a 32-bit replicated constant that clears
2762 almost everything. We can assume that we can't do it in one,
2763 or else we wouldn't be here. */
2764 unsigned int tmp = b1 & b2 & b3 & b4;
2765 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
2766 + (tmp << 24);
2767 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
2768 + (tmp == b3) + (tmp == b4);
2769 if (tmp
2770 && (matching_bytes >= 3
2771 || (matching_bytes == 2
2772 && const_ok_for_op (remainder & ~tmp2, code))))
2773 {
2774 /* At least 3 of the bytes match, and the fourth has at
2775 least as many bits set, or two of the bytes match
2776 and it will only require one more insn to finish. */
2777 result = tmp2;
2778 i = tmp != b1 ? 32
2779 : tmp != b2 ? 24
2780 : tmp != b3 ? 16
2781 : 8;
2782 }
2783
2784 /* Second, try to find a 16-bit replicated constant that can
2785 leave three of the bytes clear. If b2 or b4 is already
2786 zero, then we can. If the 8-bit from above would not
2787 clear b2 anyway, then we still win. */
2788 else if (b1 == b3 && (!b2 || !b4
2789 || (remainder & 0x00ff0000 & ~result)))
2790 {
2791 result = remainder & 0xff00ff00;
2792 i = 24;
2793 }
2794 }
2795 else if (loc > 16)
2796 {
2797 /* The 8-bit immediate already found clears b2 (and maybe b3)
2798 and we don't get here unless b1 is alredy clear, but it will
2799 leave b4 unchanged. */
2800
2801 /* If we can clear b2 and b4 at once, then we win, since the
2802 8-bits couldn't possibly reach that far. */
2803 if (b2 == b4)
2804 {
2805 result = remainder & 0x00ff00ff;
2806 i = 16;
2807 }
2808 }
2809 }
2810
2811 return_sequence->i[insns++] = result;
2812 remainder &= ~result;
2813
2814 if (code == SET || code == MINUS)
2815 code = PLUS;
2816 }
2817 while (remainder);
2818
2819 return insns;
2675}2820}
26762821
2677/* Emit an instruction with the indicated PATTERN. If COND is2822/* Emit an instruction with the indicated PATTERN. If COND is
@@ -2688,7 +2833,6 @@
26882833
2689/* As above, but extra parameter GENERATE which, if clear, suppresses2834/* As above, but extra parameter GENERATE which, if clear, suppresses
2690 RTL generation. */2835 RTL generation. */
2691/* ??? This needs more work for thumb2. */
26922836
2693static int2837static int
2694arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,2838arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
@@ -2700,15 +2844,15 @@
2700 int final_invert = 0;2844 int final_invert = 0;
2701 int can_negate_initial = 0;2845 int can_negate_initial = 0;
2702 int i;2846 int i;
2703 int num_bits_set = 0;
2704 int set_sign_bit_copies = 0;2847 int set_sign_bit_copies = 0;
2705 int clear_sign_bit_copies = 0;2848 int clear_sign_bit_copies = 0;
2706 int clear_zero_bit_copies = 0;2849 int clear_zero_bit_copies = 0;
2707 int set_zero_bit_copies = 0;2850 int set_zero_bit_copies = 0;
2708 int insns = 0;2851 int insns = 0, neg_insns, inv_insns;
2709 unsigned HOST_WIDE_INT temp1, temp2;2852 unsigned HOST_WIDE_INT temp1, temp2;
2710 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;2853 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
2711 int step_size = TARGET_ARM ? 2 : 1;2854 struct four_ints *immediates;
2855 struct four_ints pos_immediates, neg_immediates, inv_immediates;
27122856
2713 /* Find out which operations are safe for a given CODE. Also do a quick2857 /* Find out which operations are safe for a given CODE. Also do a quick
2714 check for degenerate cases; these can occur when DImode operations2858 check for degenerate cases; these can occur when DImode operations
@@ -2745,9 +2889,6 @@
2745 gen_rtx_SET (VOIDmode, target, source));2889 gen_rtx_SET (VOIDmode, target, source));
2746 return 1;2890 return 1;
2747 }2891 }
2748
2749 if (TARGET_THUMB2)
2750 can_invert = 1;
2751 break;2892 break;
27522893
2753 case AND:2894 case AND:
@@ -2820,9 +2961,7 @@
2820 }2961 }
28212962
2822 /* If we can do it in one insn get out quickly. */2963 /* If we can do it in one insn get out quickly. */
2823 if (const_ok_for_arm (val)2964 if (const_ok_for_op (val, code))
2824 || (can_negate_initial && const_ok_for_arm (-val))
2825 || (can_invert && const_ok_for_arm (~val)))
2826 {2965 {
2827 if (generate)2966 if (generate)
2828 emit_constant_insn (cond,2967 emit_constant_insn (cond,
@@ -2875,15 +3014,6 @@
2875 switch (code)3014 switch (code)
2876 {3015 {
2877 case SET:3016 case SET:
2878 /* See if we can use movw. */
2879 if (arm_arch_thumb2 && (remainder & 0xffff0000) == 0)
2880 {
2881 if (generate)
2882 emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target,
2883 GEN_INT (val)));
2884 return 1;
2885 }
2886
2887 /* See if we can do this by sign_extending a constant that is known3017 /* See if we can do this by sign_extending a constant that is known
2888 to be negative. This is a good, way of doing it, since the shift3018 to be negative. This is a good, way of doing it, since the shift
2889 may well merge into a subsequent insn. */3019 may well merge into a subsequent insn. */
@@ -3234,121 +3364,102 @@
3234 break;3364 break;
3235 }3365 }
32363366
3237 for (i = 0; i < 32; i++)3367 /* Calculate what the instruction sequences would be if we generated it
3238 if (remainder & (1 << i))3368 normally, negated, or inverted. */
3239 num_bits_set++;3369 if (code == AND)
32403370 /* AND cannot be split into multiple insns, so invert and use BIC. */
3241 if ((code == AND)3371 insns = 99;
3242 || (code != IOR && can_invert && num_bits_set > 16))3372 else
3243 remainder ^= 0xffffffff;3373 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
3244 else if (code == PLUS && num_bits_set > 16)3374
3245 remainder = (-remainder) & 0xffffffff;3375 if (can_negate)
32463376 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
3247 /* For XOR, if more than half the bits are set and there's a sequence3377 &neg_immediates);
3248 of more than 8 consecutive ones in the pattern then we can XOR by the3378 else
3249 inverted constant and then invert the final result; this may save an3379 neg_insns = 99;
3250 instruction and might also lead to the final mvn being merged with3380
3251 some other operation. */3381 if (can_invert)
3252 else if (code == XOR && num_bits_set > 163382 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
3253 && (count_insns_for_constant (remainder ^ 0xffffffff,3383 &inv_immediates);
3254 find_best_start3384 else
3255 (remainder ^ 0xffffffff))3385 inv_insns = 99;
3256 < count_insns_for_constant (remainder,3386
3257 find_best_start (remainder))))3387 immediates = &pos_immediates;
3258 {3388
3259 remainder ^= 0xffffffff;3389 /* Is the negated immediate sequence more efficient? */
3260 final_invert = 1;3390 if (neg_insns < insns && neg_insns <= inv_insns)
3261 }3391 {
3262 else3392 insns = neg_insns;
3263 {3393 immediates = &neg_immediates;
3264 can_invert = 0;3394 }
3265 can_negate = 0;3395 else
3266 }3396 can_negate = 0;
32673397
3268 /* Now try and find a way of doing the job in either two or three3398 /* Is the inverted immediate sequence more efficient?
3269 instructions.3399 We must allow for an extra NOT instruction for XOR operations, although
3270 We start by looking for the largest block of zeros that are aligned on3400 there is some chance that the final 'mvn' will get optimized later. */
3271 a 2-bit boundary, we then fill up the temps, wrapping around to the3401 if (inv_insns < insns && (code != XOR || (inv_insns + 1) < insns))
3272 top of the word when we drop off the bottom.3402 {
3273 In the worst case this code should produce no more than four insns.3403 insns = inv_insns;
3274 Thumb-2 constants are shifted, not rotated, so the MSB is always the3404 immediates = &inv_immediates;
3275 best place to start. */3405
32763406 if (code == XOR)
3277 /* ??? Use thumb2 replicated constants when the high and low halfwords are3407 final_invert = 1;
3278 the same. */3408 }
3279 {3409 else
3280 /* Now start emitting the insns. */3410 can_invert = 0;
3281 i = find_best_start (remainder);3411
3282 do3412 /* Now output the chosen sequence as instructions. */
3283 {3413 if (generate)
3284 int end;3414 {
32853415 for (i = 0; i < insns; i++)
3286 if (i <= 0)3416 {
3287 i += 32;3417 rtx new_src, temp1_rtx;
3288 if (remainder & (3 << (i - 2)))3418
3289 {3419 temp1 = immediates->i[i];
3290 end = i - 8;3420
3291 if (end < 0)3421 if (code == SET || code == MINUS)
3292 end += 32;3422 {
3293 temp1 = remainder & ((0x0ff << end)3423 new_src = (subtargets ? gen_reg_rtx (mode) : target);
3294 | ((i < end) ? (0xff >> (32 - end)) : 0));3424 if (can_invert && code != MINUS)
3295 remainder &= ~temp1;3425 temp1 = ~temp1;
32963426 }
3297 if (generate)3427 else
3298 {3428 {
3299 rtx new_src, temp1_rtx;3429 if ((final_invert || i < (insns - 1)) && subtargets)
33003430 new_src = gen_reg_rtx (mode);
3301 if (code == SET || code == MINUS)3431 else
3302 {3432 new_src = target;
3303 new_src = (subtargets ? gen_reg_rtx (mode) : target);3433 if (can_invert)
3304 if (can_invert && code != MINUS)3434 temp1 = ~temp1;
3305 temp1 = ~temp1;3435 else if (can_negate)
3306 }3436 temp1 = -temp1;
3307 else3437 }
3308 {3438
3309 if ((final_invert || remainder) && subtargets)3439 temp1 = trunc_int_for_mode (temp1, mode);
3310 new_src = gen_reg_rtx (mode);3440 temp1_rtx = GEN_INT (temp1);
3311 else3441
3312 new_src = target;3442 if (code == SET)
3313 if (can_invert)3443 ;
3314 temp1 = ~temp1;3444 else if (code == MINUS)
3315 else if (can_negate)3445 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
3316 temp1 = -temp1;3446 else
3317 }3447 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
33183448
3319 temp1 = trunc_int_for_mode (temp1, mode);3449 emit_constant_insn (cond,
3320 temp1_rtx = GEN_INT (temp1);3450 gen_rtx_SET (VOIDmode, new_src,
33213451 temp1_rtx));
3322 if (code == SET)3452 source = new_src;
3323 ;3453
3324 else if (code == MINUS)3454 if (code == SET)
3325 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);3455 {
3326 else3456 can_invert = 0;
3327 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
3328
3329 emit_constant_insn (cond,
3330 gen_rtx_SET (VOIDmode, new_src,
3331 temp1_rtx));
3332 source = new_src;
3333 }
3334
3335 if (code == SET)
3336 {
3337 can_invert = 0;
3338 code = PLUS;
3339 }
3340 else if (code == MINUS)
3341 code = PLUS;3457 code = PLUS;
33423458 }
3343 insns++;3459 else if (code == MINUS)
3344 i -= 8 - step_size;3460 code = PLUS;
3345 }3461 }
3346 /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary3462 }
3347 shifts. */
3348 i -= step_size;
3349 }
3350 while (remainder);
3351 }
33523463
3353 if (final_invert)3464 if (final_invert)
3354 {3465 {
33553466
=== modified file 'gcc/config/arm/arm.md'
--- gcc/config/arm/arm.md 2011-05-13 13:42:39 +0000
+++ gcc/config/arm/arm.md 2011-06-02 12:58:15 +0000
@@ -701,21 +701,24 @@
701;; (plus (reg rN) (reg sp)) into (reg rN). In this case reload will701;; (plus (reg rN) (reg sp)) into (reg rN). In this case reload will
702;; put the duplicated register first, and not try the commutative version.702;; put the duplicated register first, and not try the commutative version.
703(define_insn_and_split "*arm_addsi3"703(define_insn_and_split "*arm_addsi3"
704 [(set (match_operand:SI 0 "s_register_operand" "=r, k,r,r, k,r")704 [(set (match_operand:SI 0 "s_register_operand" "=r, k,r,r, k, r, k,r, k, r")
705 (plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k,rk")705 (plus:SI (match_operand:SI 1 "s_register_operand" "%rk,k,r,rk,k, rk,k,rk,k, rk")
706 (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,L, L,?n")))]706 (match_operand:SI 2 "reg_or_int_operand" "rI,rI,k,Pj,Pj,L, L,PJ,PJ,?n")))]
707 "TARGET_32BIT"707 "TARGET_32BIT"
708 "@708 "@
709 add%?\\t%0, %1, %2709 add%?\\t%0, %1, %2
710 add%?\\t%0, %1, %2710 add%?\\t%0, %1, %2
711 add%?\\t%0, %2, %1711 add%?\\t%0, %2, %1
712 sub%?\\t%0, %1, #%n2712 addw%?\\t%0, %1, %2
713 sub%?\\t%0, %1, #%n2713 addw%?\\t%0, %1, %2
714 sub%?\\t%0, %1, #%n2
715 sub%?\\t%0, %1, #%n2
716 subw%?\\t%0, %1, #%n2
717 subw%?\\t%0, %1, #%n2
714 #"718 #"
715 "TARGET_32BIT719 "TARGET_32BIT
716 && GET_CODE (operands[2]) == CONST_INT720 && GET_CODE (operands[2]) == CONST_INT
717 && !(const_ok_for_arm (INTVAL (operands[2]))721 && !const_ok_for_op (INTVAL (operands[2]), PLUS)
718 || const_ok_for_arm (-INTVAL (operands[2])))
719 && (reload_completed || !arm_eliminable_register (operands[1]))"722 && (reload_completed || !arm_eliminable_register (operands[1]))"
720 [(clobber (const_int 0))]723 [(clobber (const_int 0))]
721 "724 "
@@ -724,8 +727,9 @@
724 operands[1], 0);727 operands[1], 0);
725 DONE;728 DONE;
726 "729 "
727 [(set_attr "length" "4,4,4,4,4,16")730 [(set_attr "length" "4,4,4,4,4,4,4,4,4,16")
728 (set_attr "predicable" "yes")]731 (set_attr "predicable" "yes")
732 (set_attr "arch" "*,*,*,t2,t2,*,*,t2,t2,*")]
729)733)
730734
731(define_insn_and_split "*thumb1_addsi3"735(define_insn_and_split "*thumb1_addsi3"
732736
=== modified file 'gcc/config/arm/constraints.md'
--- gcc/config/arm/constraints.md 2011-01-03 20:52:22 +0000
+++ gcc/config/arm/constraints.md 2011-06-02 12:58:15 +0000
@@ -31,7 +31,7 @@
31;; The following multi-letter normal constraints have been used:31;; The following multi-letter normal constraints have been used:
32;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz32;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dz
33;; in Thumb-1 state: Pa, Pb, Pc, Pd33;; in Thumb-1 state: Pa, Pb, Pc, Pd
34;; in Thumb-2 state: Ps, Pt, Pu, Pv, Pw, Px34;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px
3535
36;; The following memory constraints have been used:36;; The following memory constraints have been used:
37;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us37;; in ARM/Thumb-2 state: Q, Ut, Uv, Uy, Un, Um, Us
@@ -74,6 +74,18 @@
74 (and (match_code "const_int")74 (and (match_code "const_int")
75 (match_test "(ival & 0xffff0000) == 0")))))75 (match_test "(ival & 0xffff0000) == 0")))))
7676
77(define_constraint "Pj"
78 "@internal A 12-bit constant suitable for an ADDW or SUBW instruction. (Thumb-2)"
79 (and (match_code "const_int")
80 (and (match_test "TARGET_THUMB2")
81 (match_test "(ival & 0xfffff000) == 0"))))
82
83(define_constraint "PJ"
84 "@internal A constant that satisfies the Pj constrant if negated."
85 (and (match_code "const_int")
86 (and (match_test "TARGET_THUMB2")
87 (match_test "((-ival) & 0xfffff000) == 0"))))
88
77(define_register_constraint "k" "STACK_REG"89(define_register_constraint "k" "STACK_REG"
78 "@internal The stack register.")90 "@internal The stack register.")
7991
8092
=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c'
--- gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c 1970-01-01 00:00:00 +0000
+++ gcc/testsuite/gcc.target/arm/thumb2-replicated-constant1.c 2011-06-02 12:58:15 +0000
@@ -0,0 +1,27 @@
1/* Ensure simple replicated constant immediates work. */
2/* { dg-options "-mthumb -O2" } */
3/* { dg-require-effective-target arm_thumb2_ok } */
4
5int
6foo1 (int a)
7{
8 return a + 0xfefefefe;
9}
10
11/* { dg-final { scan-assembler "add.*#-16843010" } } */
12
13int
14foo2 (int a)
15{
16 return a - 0xab00ab00;
17}
18
19/* { dg-final { scan-assembler "sub.*#-1426019584" } } */
20
21int
22foo3 (int a)
23{
24 return a & 0x00cd00cd;
25}
26
27/* { dg-final { scan-assembler "and.*#13435085" } } */
028
=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c'
--- gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c 1970-01-01 00:00:00 +0000
+++ gcc/testsuite/gcc.target/arm/thumb2-replicated-constant2.c 2011-06-02 12:58:15 +0000
@@ -0,0 +1,75 @@
1/* Ensure split constants can use replicated patterns. */
2/* { dg-options "-mthumb -O2" } */
3/* { dg-require-effective-target arm_thumb2_ok } */
4
5int
6foo1 (int a)
7{
8 return a + 0xfe00fe01;
9}
10
11/* { dg-final { scan-assembler "add.*#-33489408" } } */
12/* { dg-final { scan-assembler "add.*#1" } } */
13
14int
15foo2 (int a)
16{
17 return a + 0xdd01dd00;
18}
19
20/* { dg-final { scan-assembler "add.*#-587145984" } } */
21/* { dg-final { scan-assembler "add.*#65536" } } */
22
23int
24foo3 (int a)
25{
26 return a + 0x00443344;
27}
28
29/* { dg-final { scan-assembler "add.*#4456516" } } */
30/* { dg-final { scan-assembler "add.*#13056" } } */
31
32int
33foo4 (int a)
34{
35 return a + 0x77330033;
36}
37
38/* { dg-final { scan-assembler "add.*#1996488704" } } */
39/* { dg-final { scan-assembler "add.*#3342387" } } */
40
41int
42foo5 (int a)
43{
44 return a + 0x11221122;
45}
46
47/* { dg-final { scan-assembler "add.*#285217024" } } */
48/* { dg-final { scan-assembler "add.*#2228258" } } */
49
50int
51foo6 (int a)
52{
53 return a + 0x66666677;
54}
55
56/* { dg-final { scan-assembler "add.*#1717986918" } } */
57/* { dg-final { scan-assembler "add.*#17" } } */
58
59int
60foo7 (int a)
61{
62 return a + 0x99888888;
63}
64
65/* { dg-final { scan-assembler "add.*#-2004318072" } } */
66/* { dg-final { scan-assembler "add.*#285212672" } } */
67
68int
69foo8 (int a)
70{
71 return a + 0xdddddfff;
72}
73
74/* { dg-final { scan-assembler "add.*#-572662307" } } */
75/* { dg-final { scan-assembler "addw.*#546" } } */
076
=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c'
--- gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c 1970-01-01 00:00:00 +0000
+++ gcc/testsuite/gcc.target/arm/thumb2-replicated-constant3.c 2011-06-02 12:58:15 +0000
@@ -0,0 +1,28 @@
1/* Ensure negated/inverted replicated constant immediates work. */
2/* { dg-options "-mthumb -O2" } */
3/* { dg-require-effective-target arm_thumb2_ok } */
4
5int
6foo1 (int a)
7{
8 return a | 0xffffff00;
9}
10
11/* { dg-final { scan-assembler "orn.*#255" } } */
12
13int
14foo2 (int a)
15{
16 return a & 0xffeeffee;
17}
18
19/* { dg-final { scan-assembler "bic.*#1114129" } } */
20
21int
22foo3 (int a)
23{
24 return a & 0xaaaaaa00;
25}
26
27/* { dg-final { scan-assembler "and.*#-1431655766" } } */
28/* { dg-final { scan-assembler "bic.*#170" } } */
029
=== added file 'gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c'
--- gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c 1970-01-01 00:00:00 +0000
+++ gcc/testsuite/gcc.target/arm/thumb2-replicated-constant4.c 2011-06-02 12:58:15 +0000
@@ -0,0 +1,22 @@
1/* Ensure replicated constants don't make things worse. */
2/* { dg-options "-mthumb -O2" } */
3/* { dg-require-effective-target arm_thumb2_ok } */
4
5int
6foo1 (int a)
7{
8 /* It might be tempting to use 0x01000100, but it wouldn't help. */
9 return a + 0x01f001e0;
10}
11
12/* { dg-final { scan-assembler "add.*#32505856" } } */
13/* { dg-final { scan-assembler "add.*#480" } } */
14
15int
16foo2 (int a)
17{
18 return a + 0x0f100e10;
19}
20
21/* { dg-final { scan-assembler "add.*#252706816" } } */
22/* { dg-final { scan-assembler "add.*#3600" } } */

Subscribers

People subscribed via source and target branches