Merge lp:~schnetter/pocl/main into lp:~pocl/pocl/trunk

Proposed by Erik Schnetter
Status: Merged
Merged at revision: 160
Proposed branch: lp:~schnetter/pocl/main
Merge into: lp:~pocl/pocl/trunk
Diff against target: 486 lines (+238/-27)
10 files modified
examples/kernel/kernel.c (+1/-1)
examples/kernel/test_fabs.cl (+7/-6)
examples/kernel/test_hadd.cl (+208/-8)
lib/kernel/x86_64/ceil.cl (+3/-1)
lib/kernel/x86_64/fabs.cl (+3/-1)
lib/kernel/x86_64/floor.cl (+3/-1)
lib/kernel/x86_64/max.cl (+4/-1)
lib/kernel/x86_64/min.cl (+4/-1)
lib/kernel/x86_64/sqrt.cl (+3/-1)
tests/testsuite.at (+2/-6)
To merge this branch: bzr merge lp:~schnetter/pocl/main
Reviewer Review Type Date Requested Status
Pekka Jääskeläinen Approve
Review via email: mp+89469@code.launchpad.net

Description of the change

clang version 3.1 (trunk 148489) has several errors corrected. This allows me to re-enable all existing test cases.

To post a comment you must log in.
lp:~schnetter/pocl/main updated
181. By Erik Schnetter

Merge from trunk

182. By Erik Schnetter

Add test cases for mad_hi mad_sat mul_hi sub_sat

Revision history for this message
Pekka Jääskeläinen (pekka-jaaskelainen) wrote :

Merged. Please, in the future, add the LLVM rev number also to the INSTALL file. With the latest rev, Clang crashes when compiling pocl. It seems something more serious is broken in Clang at the moment.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'examples/kernel/kernel.c'
--- examples/kernel/kernel.c 2011-12-20 19:16:58 +0000
+++ examples/kernel/kernel.c 2012-01-20 19:44:17 +0000
@@ -79,7 +79,7 @@
79 "test_bitselect",79 "test_bitselect",
80 "test_fabs",80 "test_fabs",
81 "test_hadd",81 "test_hadd",
82 //"test_rotate", /* TODO: this test fails; LLVM bug #11555 */82 "test_rotate",
83 };83 };
84 int const ntests = sizeof(tests)/sizeof(*tests);84 int const ntests = sizeof(tests)/sizeof(*tests);
85 for (int i=0; i<ntests; ++i) {85 for (int i=0; i<ntests; ++i) {
8686
=== modified file 'examples/kernel/test_fabs.cl'
--- examples/kernel/test_fabs.cl 2012-01-16 18:19:41 +0000
+++ examples/kernel/test_fabs.cl 2012-01-20 19:44:17 +0000
@@ -136,8 +136,8 @@
136 }136 }
137 if (!equal) {137 if (!equal) {
138 for (int n=0; n<vecsize; ++n) {138 for (int n=0; n<vecsize; ++n) {
139 printf("FAIL: fabs type=%s val=%.17g res=%.17g\n",139 printf("FAIL: fabs type=%s val=%.17g res=%.17g good=%.17g\n",
140 typename, val.s[n], res.s[n]);140 typename, val.s[n], res.s[n], good.s[n]);
141 }141 }
142 return;142 return;
143 }143 }
@@ -150,8 +150,9 @@
150 }150 }
151 if (!equal) {151 if (!equal) {
152 for (int n=0; n<vecsize; ++n) {152 for (int n=0; n<vecsize; ++n) {
153 printf("FAIL: signbit type=%s val=%.17g res=%d\n",153 printf("FAIL: signbit type=%s val=%.17g res=%d good=%d\n",
154 typename, val.s[n], (int)ires.s[n]);154 typename, val.s[n], (int)ires.s[n],
155 (sign>0 ? 0 : vecsize==1 ? +1 : -1));
155 }156 }
156 return;157 return;
157 }158 }
@@ -167,8 +168,8 @@
167 }168 }
168 if (!equal) {169 if (!equal) {
169 for (int n=0; n<vecsize; ++n) {170 for (int n=0; n<vecsize; ++n) {
170 printf("FAIL: copysign type=%s val=%.17g sign=%.17g res=%.17g\n",171 printf("FAIL: copysign type=%s val=%.17g sign=%.17g res=%.17g good=%.17g\n",
171 typename, val.s[n], sign2*val2.s[n], res.s[n]);172 typename, val.s[n], sign2*val2.s[n], res.s[n], good.s[n]);
172 }173 }
173 return;174 return;
174 }175 }
175176
=== modified file 'examples/kernel/test_hadd.cl'
--- examples/kernel/test_hadd.cl 2011-12-20 18:54:12 +0000
+++ examples/kernel/test_hadd.cl 2012-01-20 19:44:17 +0000
@@ -2,7 +2,11 @@
2// TESTING: abs_diff2// TESTING: abs_diff
3// TESTING: add_sat3// TESTING: add_sat
4// TESTING: hadd4// TESTING: hadd
5// TESTING: mad_hi
6// TESTING: mad_sat
7// TESTING: mul_hi
5// TESTING: rhadd8// TESTING: rhadd
9// TESTING: sub_sat
610
711
812
@@ -21,6 +25,24 @@
21 return b; \25 return b; \
22 } \26 } \
23 \27 \
28 STYPE##4 _cl_overloadable safe_normalize(STYPE##4 const a) \
29 { \
30 STYPE const halfbits = 4*sizeof(STYPE); \
31 STYPE const halfmax = (STYPE)1 << halfbits; \
32 STYPE const halfmask = halfmax - (STYPE)1; \
33 STYPE tmp; \
34 STYPE##4 b; \
35 tmp = a.s0; \
36 b.s0 = tmp & halfmask; \
37 tmp = tmp >> halfbits + a.s1; \
38 b.s1 = tmp & halfmask; \
39 tmp = tmp >> halfbits + a.s2; \
40 b.s2 = tmp & halfmask; \
41 tmp = tmp >> halfbits + a.s3; \
42 b.s3 = tmp; \
43 return b; \
44 } \
45 \
24 STYPE _cl_overloadable safe_extract(STYPE##2 const a) \46 STYPE _cl_overloadable safe_extract(STYPE##2 const a) \
25 { \47 { \
26 STYPE const halfbits = 4*sizeof(STYPE); \48 STYPE const halfbits = 4*sizeof(STYPE); \
@@ -31,6 +53,19 @@
31 return b; \53 return b; \
32 } \54 } \
33 \55 \
56 STYPE _cl_overloadable safe_extract(STYPE##4 const a) \
57 { \
58 STYPE const halfbits = 4*sizeof(STYPE); \
59 STYPE const halfmax = (STYPE)1 << halfbits; \
60 STYPE const halfmask = halfmax - (STYPE)1; \
61 STYPE b; \
62 if (safe_extract(a.hi) != 0) { \
63 printf("FAIL: safe_extract [%d,%d,%d,%d]\n", \
64 (int)a.s0, (int)a.s1, (int)a.s2, (int)a.s3); \
65 } \
66 return safe_extract(a.lo); \
67 } \
68 \
34 STYPE##2 _cl_overloadable safe_neg(STYPE##2 a) \69 STYPE##2 _cl_overloadable safe_neg(STYPE##2 a) \
35 { \70 { \
36 STYPE##2 b; \71 STYPE##2 b; \
@@ -57,6 +92,16 @@
57 return safe_normalize(c); \92 return safe_normalize(c); \
58 } \93 } \
59 \94 \
95 STYPE##4 _cl_overloadable safe_add(STYPE##4 const a, STYPE##4 const b) \
96 { \
97 STYPE##4 c; \
98 c.s0 = a.s0 + b.s0; \
99 c.s1 = a.s1 + b.s1; \
100 c.s2 = a.s2 + b.s2; \
101 c.s3 = a.s3 + b.s3; \
102 return safe_normalize(c); \
103 } \
104 \
60 STYPE##2 _cl_overloadable safe_sub(STYPE##2 const a, STYPE##2 const b) \105 STYPE##2 _cl_overloadable safe_sub(STYPE##2 const a, STYPE##2 const b) \
61 { \106 { \
62 STYPE##2 c; \107 STYPE##2 c; \
@@ -65,6 +110,27 @@
65 return safe_normalize(c); \110 return safe_normalize(c); \
66 } \111 } \
67 \112 \
113 STYPE##4 _cl_overloadable safe_mul(STYPE##2 const a, STYPE##2 const b) \
114 { \
115 STYPE##4 c00, c01, c10, c11; \
116 c00 = 0; \
117 c00.s0 = a.s0 * b.s0; \
118 c00 = safe_normalize(c00); \
119 c01 = 0; \
120 c01.s1 = a.s0 * b.s1; \
121 c01 = safe_normalize(c01); \
122 c10 = 0; \
123 c10.s1 = a.s1 * b.s0; \
124 c10 = safe_normalize(c10); \
125 c11 = 0; \
126 c11.s2 = a.s1 * b.s1; \
127 c11 = safe_normalize(c11); \
128 STYPE##4 c; \
129 c = safe_add(safe_add(c00, c01), \
130 safe_add(c10, c11)); \
131 return c; \
132 } \
133 \
68 STYPE##2 _cl_overloadable safe_max(STYPE##2 const a, STYPE##2 const b) \134 STYPE##2 _cl_overloadable safe_max(STYPE##2 const a, STYPE##2 const b) \
69 { \135 { \
70 STYPE##2 c; \136 STYPE##2 c; \
@@ -87,6 +153,38 @@
87 return c; \153 return c; \
88 } \154 } \
89 \155 \
156 STYPE##2 _cl_overloadable safe_clamp(STYPE##2 const a, \
157 STYPE##2 const alo, STYPE##2 const ahi) \
158 { \
159 STYPE##2 b; \
160 if (a.s1 < alo.s1 || (a.s1 == alo.s1 && a.s0 < alo.s0)) { \
161 b = alo; \
162 } else if (a.s1 > ahi.s1 || (a.s1 == ahi.s1 && a.s0 > ahi.s0)) { \
163 b = ahi; \
164 } else { \
165 b = a; \
166 } \
167 return b; \
168 } \
169 \
170 STYPE##2 _cl_overloadable safe_clamp(STYPE##4 const a, \
171 STYPE##2 const alo, STYPE##2 const ahi) \
172 { \
173 STYPE##2 b; \
174 if (a.s3 < 0 || a.s2 < 0 || \
175 a.s1 < alo.s1 || (a.s1 == alo.s1 && a.s0 < alo.s0)) \
176 { \
177 b = alo; \
178 } else if (a.s3 > 0 || a.s2 > 0 || \
179 a.s1 > ahi.s1 || (a.s1 == ahi.s1 && a.s0 > ahi.s0)) \
180 { \
181 b = ahi; \
182 } else { \
183 b = safe_normalize(a.lo); \
184 } \
185 return b; \
186 } \
187 \
90 STYPE##2 _cl_overloadable safe_rshift(STYPE##2 a) \188 STYPE##2 _cl_overloadable safe_rshift(STYPE##2 a) \
91 { \189 { \
92 STYPE const halfbits = 4*sizeof(STYPE); \190 STYPE const halfbits = 4*sizeof(STYPE); \
@@ -98,6 +196,11 @@
98 b.s0 >>= (STYPE)1; \196 b.s0 >>= (STYPE)1; \
99 b.s1 >>= (STYPE)1; \197 b.s1 >>= (STYPE)1; \
100 return safe_normalize(b); \198 return safe_normalize(b); \
199 } \
200 \
201 STYPE##2 _cl_overloadable safe_hi(STYPE##4 a) \
202 { \
203 return safe_normalize(a.hi); \
101 }204 }
102205
103206
@@ -115,6 +218,19 @@
115 b = safe_normalize(b); \218 b = safe_normalize(b); \
116 if ((TYPE)safe_extract(b) != a) printf("FAIL: safe_create %d\n", (int)a); \219 if ((TYPE)safe_extract(b) != a) printf("FAIL: safe_create %d\n", (int)a); \
117 return b; \220 return b; \
221 } \
222 \
223 STYPE##4 _cl_overloadable safe_create4(TYPE const a) \
224 { \
225 STYPE const halfbits = 4*sizeof(STYPE); \
226 STYPE const halfmax = (STYPE)1 << halfbits; \
227 STYPE const halfmask = halfmax - (STYPE)1; \
228 STYPE##4 b; \
229 b = 0; \
230 b.lo = safe_create(a); \
231 b = safe_normalize(b); \
232 if ((TYPE)safe_extract(b) != a) printf("FAIL: safe_create4 %d\n", (int)a); \
233 return b; \
118 }234 }
119235
120236
@@ -1285,8 +1401,8 @@
1285 } Tvec;1401 } Tvec;
1286 Tvec x, y, z;1402 Tvec x, y, z;
1287 Tvec good_abs;1403 Tvec good_abs;
1288 Tvec good_abs_diff, good_add_sat;1404 Tvec good_abs_diff, good_add_sat, good_mad_sat, good_sub_sat;
1289 Tvec good_hadd, good_rhadd;1405 Tvec good_hadd, good_mad_hi, good_mul_hi, good_rhadd;
1290 int vecsize = vec_step(gtype);1406 int vecsize = vec_step(gtype);
1291 for (int n=0; n<vecsize; ++n) {1407 for (int n=0; n<vecsize; ++n) {
1292 x.s[n] = randoms[(iter+n ) % nrandoms];1408 x.s[n] = randoms[(iter+n ) % nrandoms];
@@ -1303,25 +1419,49 @@
1303 safe_extract(safe_abs(safe_sub(safe_create(x.s[n]),1419 safe_extract(safe_abs(safe_sub(safe_create(x.s[n]),
1304 safe_create(y.s[n]))));1420 safe_create(y.s[n]))));
1305 good_add_sat.s[n] =1421 good_add_sat.s[n] =
1306 safe_extract(safe_min(safe_max(safe_add(safe_create(x.s[n]),1422 safe_extract(safe_clamp(safe_add(safe_create(x.s[n]),
1307 safe_create(y.s[n])),1423 safe_create(y.s[n])),
1308 safe_create(tmin)),1424 safe_create(tmin),
1309 safe_create(tmax)));1425 safe_create(tmax)));
1426 good_mad_sat.s[n] =
1427 safe_extract(safe_clamp(safe_add(safe_mul(safe_create(x.s[n]),
1428 safe_create(y.s[n])),
1429 safe_create4(z.s[n])),
1430 safe_create(tmin),
1431 safe_create(tmax)));
1432 good_sub_sat.s[n] =
1433 safe_extract(safe_clamp(safe_sub(safe_create(x.s[n]),
1434 safe_create(y.s[n])),
1435 safe_create(tmin),
1436 safe_create(tmax)));
1310 good_hadd.s[n] =1437 good_hadd.s[n] =
1311 safe_extract(safe_rshift(safe_add(safe_create(x.s[n]),1438 safe_extract(safe_rshift(safe_add(safe_create(x.s[n]),
1312 safe_create(y.s[n]))));1439 safe_create(y.s[n]))));
1440 good_mad_hi.s[n] =
1441 safe_extract(safe_clamp(safe_add(safe_hi(safe_mul(safe_create(x.s[n]),
1442 safe_create(y.s[n]))),
1443 safe_create(z.s[n])),
1444 safe_create(tmin),
1445 safe_create(tmax)));
1446 good_mul_hi.s[n] =
1447 safe_extract(safe_hi(safe_mul(safe_create(x.s[n]),
1448 safe_create(y.s[n]))));
1313 good_rhadd.s[n] =1449 good_rhadd.s[n] =
1314 safe_extract(safe_rshift(safe_add(safe_add(safe_create(x.s[n]),1450 safe_extract(safe_rshift(safe_add(safe_add(safe_create(x.s[n]),
1315 safe_create(y.s[n])),1451 safe_create(y.s[n])),
1316 safe_create((sgtype)1))));1452 safe_create((sgtype)1))));
1317 }1453 }
1318 Tvec res_abs;1454 Tvec res_abs;
1319 Tvec res_abs_diff, res_add_sat;1455 Tvec res_abs_diff, res_add_sat, res_mad_sat, res_sub_sat;
1320 Tvec res_hadd, res_rhadd;1456 Tvec res_hadd, res_mad_hi, res_mul_hi, res_rhadd;
1321 res_abs.u = abs (x.v);1457 res_abs.u = abs (x.v);
1322 res_abs_diff.u = abs_diff(x.v, y.v);1458 res_abs_diff.u = abs_diff(x.v, y.v);
1323 res_add_sat.v = add_sat (x.v, y.v);1459 res_add_sat.v = add_sat (x.v, y.v);
1460 res_mad_sat.v = mad_sat (x.v, y.v, z.v);
1461 res_sub_sat.v = sub_sat (x.v, y.v);
1324 res_hadd.v = hadd (x.v, y.v);1462 res_hadd.v = hadd (x.v, y.v);
1463 res_mad_hi.v = mad_hi (x.v, y.v, z.v);
1464 res_mul_hi.v = mul_hi (x.v, y.v);
1325 res_rhadd.v = rhadd (x.v, y.v);1465 res_rhadd.v = rhadd (x.v, y.v);
1326 bool equal;1466 bool equal;
1327 // abs1467 // abs
@@ -1384,6 +1524,51 @@
1384 }1524 }
1385 return;1525 return;
1386 }1526 }
1527 // mad_hi
1528 equal = true;
1529 for (int n=0; n<vecsize; ++n) {
1530 equal = equal && res_mad_hi.s[n] == good_mad_hi.s[n];
1531 }
1532 if (!equal) {
1533 printf("FAIL: mad_hi type=%s\n", typename);
1534 for (int n=0; n<vecsize; ++n) {
1535 printf(" [%d] a=%d b=%d c=%d good=%d res=%d\n",
1536 n,
1537 (int)x.s[n], (int)y.s[n], (int)z.s[n],
1538 (int)good_mad_hi.s[n], (int)res_mad_hi.s[n]);
1539 }
1540 return;
1541 }
1542 // mad_sat
1543 equal = true;
1544 for (int n=0; n<vecsize; ++n) {
1545 equal = equal && res_mad_sat.s[n] == good_mad_sat.s[n];
1546 }
1547 if (!equal) {
1548 printf("FAIL: mad_sat type=%s\n", typename);
1549 for (int n=0; n<vecsize; ++n) {
1550 printf(" [%d] a=%d b=%d c=%d good=%d res=%d\n",
1551 n,
1552 (int)x.s[n], (int)y.s[n], (int)z.s[n],
1553 (int)good_mad_sat.s[n], (int)res_mad_sat.s[n]);
1554 }
1555 return;
1556 }
1557 // mul_hi
1558 equal = true;
1559 for (int n=0; n<vecsize; ++n) {
1560 equal = equal && res_mul_hi.s[n] == good_mul_hi.s[n];
1561 }
1562 if (!equal) {
1563 printf("FAIL: mul_hi type=%s\n", typename);
1564 for (int n=0; n<vecsize; ++n) {
1565 printf(" [%d] a=%d b=%d good=%d res=%d\n",
1566 n,
1567 (int)x.s[n], (int)y.s[n],
1568 (int)good_mul_hi.s[n], (int)res_mul_hi.s[n]);
1569 }
1570 return;
1571 }
1387 // rhadd1572 // rhadd
1388 equal = true;1573 equal = true;
1389 for (int n=0; n<vecsize; ++n) {1574 for (int n=0; n<vecsize; ++n) {
@@ -1399,6 +1584,21 @@
1399 }1584 }
1400 return;1585 return;
1401 }1586 }
1587 // sub_sat
1588 equal = true;
1589 for (int n=0; n<vecsize; ++n) {
1590 equal = equal && res_sub_sat.s[n] == good_sub_sat.s[n];
1591 }
1592 if (!equal) {
1593 printf("FAIL: sub_sat type=%s\n", typename);
1594 for (int n=0; n<vecsize; ++n) {
1595 printf(" [%d] a=%d b=%d good=%d res=%d\n",
1596 n,
1597 (int)x.s[n], (int)y.s[n],
1598 (int)good_sub_sat.s[n], (int)res_sub_sat.s[n]);
1599 }
1600 return;
1601 }
1402 }1602 }
1403 })1603 })
1404 )1604 )
14051605
=== modified file 'lib/kernel/x86_64/ceil.cl'
--- lib/kernel/x86_64/ceil.cl 2011-10-31 16:48:30 +0000
+++ lib/kernel/x86_64/ceil.cl 2012-01-20 19:44:17 +0000
@@ -31,7 +31,9 @@
31#define IMPLEMENT_UPCAST(NAME, TYPE, UPTYPE, LO) \31#define IMPLEMENT_UPCAST(NAME, TYPE, UPTYPE, LO) \
32 TYPE _cl_overloadable NAME(TYPE a) \32 TYPE _cl_overloadable NAME(TYPE a) \
33 { \33 { \
34 return NAME(*(UPTYPE*)&a).LO; \34 UPTYPE a1; \
35 a1.LO = a; \
36 return NAME(a1).LO; \
35 }37 }
3638
37#define IMPLEMENT_SPLIT(NAME, TYPE, LO, HI) \39#define IMPLEMENT_SPLIT(NAME, TYPE, LO, HI) \
3840
=== modified file 'lib/kernel/x86_64/fabs.cl'
--- lib/kernel/x86_64/fabs.cl 2011-12-15 00:52:06 +0000
+++ lib/kernel/x86_64/fabs.cl 2012-01-20 19:44:17 +0000
@@ -47,7 +47,9 @@
47#define IMPLEMENT_UPCAST(NAME, TYPE, UPTYPE, LO) \47#define IMPLEMENT_UPCAST(NAME, TYPE, UPTYPE, LO) \
48 TYPE _cl_overloadable NAME(TYPE a) \48 TYPE _cl_overloadable NAME(TYPE a) \
49 { \49 { \
50 return NAME(*(UPTYPE*)&a).LO; \50 UPTYPE a1; \
51 a1.LO = a; \
52 return NAME(a1).LO; \
51 }53 }
5254
53#define IMPLEMENT_SPLIT(NAME, TYPE, LO, HI) \55#define IMPLEMENT_SPLIT(NAME, TYPE, LO, HI) \
5456
=== modified file 'lib/kernel/x86_64/floor.cl'
--- lib/kernel/x86_64/floor.cl 2011-10-31 16:48:30 +0000
+++ lib/kernel/x86_64/floor.cl 2012-01-20 19:44:17 +0000
@@ -31,7 +31,9 @@
31#define IMPLEMENT_UPCAST(NAME, TYPE, UPTYPE, LO) \31#define IMPLEMENT_UPCAST(NAME, TYPE, UPTYPE, LO) \
32 TYPE _cl_overloadable NAME(TYPE a) \32 TYPE _cl_overloadable NAME(TYPE a) \
33 { \33 { \
34 return NAME(*(UPTYPE*)&a).LO; \34 UPTYPE a1; \
35 a1.LO = a; \
36 return NAME(a1).LO; \
35 }37 }
3638
37#define IMPLEMENT_SPLIT(NAME, TYPE, LO, HI) \39#define IMPLEMENT_SPLIT(NAME, TYPE, LO, HI) \
3840
=== modified file 'lib/kernel/x86_64/max.cl'
--- lib/kernel/x86_64/max.cl 2011-10-31 16:48:30 +0000
+++ lib/kernel/x86_64/max.cl 2012-01-20 19:44:17 +0000
@@ -32,7 +32,10 @@
32#define IMPLEMENT_UPCAST(NAME, TYPE, UPTYPE, LO) \32#define IMPLEMENT_UPCAST(NAME, TYPE, UPTYPE, LO) \
33 TYPE _cl_overloadable NAME(TYPE a, TYPE b) \33 TYPE _cl_overloadable NAME(TYPE a, TYPE b) \
34 { \34 { \
35 return NAME(*(UPTYPE*)&a, *(UPTYPE*)&b).LO; \35 UPTYPE a1, b1; \
36 a1.LO = a; \
37 b1.LO = b; \
38 return NAME(a1, b1).LO; \
36 }39 }
3740
38#define IMPLEMENT_SPLIT(NAME, TYPE, LO, HI) \41#define IMPLEMENT_SPLIT(NAME, TYPE, LO, HI) \
3942
=== modified file 'lib/kernel/x86_64/min.cl'
--- lib/kernel/x86_64/min.cl 2011-10-31 16:48:30 +0000
+++ lib/kernel/x86_64/min.cl 2012-01-20 19:44:17 +0000
@@ -32,7 +32,10 @@
32#define IMPLEMENT_UPCAST(NAME, TYPE, UPTYPE, LO) \32#define IMPLEMENT_UPCAST(NAME, TYPE, UPTYPE, LO) \
33 TYPE _cl_overloadable NAME(TYPE a, TYPE b) \33 TYPE _cl_overloadable NAME(TYPE a, TYPE b) \
34 { \34 { \
35 return NAME(*(UPTYPE*)&a, *(UPTYPE*)&b).LO; \35 UPTYPE a1, b1; \
36 a1.LO = a; \
37 b1.LO = b; \
38 return NAME(a1, b1).LO; \
36 }39 }
3740
38#define IMPLEMENT_SPLIT(NAME, TYPE, LO, HI) \41#define IMPLEMENT_SPLIT(NAME, TYPE, LO, HI) \
3942
=== modified file 'lib/kernel/x86_64/sqrt.cl'
--- lib/kernel/x86_64/sqrt.cl 2012-01-03 14:21:57 +0000
+++ lib/kernel/x86_64/sqrt.cl 2012-01-20 19:44:17 +0000
@@ -35,7 +35,9 @@
35#define IMPLEMENT_UPCAST(NAME, TYPE, UPTYPE, LO) \35#define IMPLEMENT_UPCAST(NAME, TYPE, UPTYPE, LO) \
36 TYPE _cl_overloadable NAME(TYPE a) \36 TYPE _cl_overloadable NAME(TYPE a) \
37 { \37 { \
38 return NAME(*(UPTYPE*)&a).LO; \38 UPTYPE a1; \
39 a1.LO = a; \
40 return NAME(a1).LO; \
39 }41 }
4042
41#define IMPLEMENT_SPLIT(NAME, TYPE, LO, HI) \43#define IMPLEMENT_SPLIT(NAME, TYPE, LO, HI) \
4244
=== modified file 'tests/testsuite.at'
--- tests/testsuite.at 2012-01-16 18:19:41 +0000
+++ tests/testsuite.at 2012-01-20 19:44:17 +0000
@@ -179,9 +179,7 @@
179[Running test test_fabs...179[Running test test_fabs...
180OK180OK
181])181])
182#AT_CHECK([$abs_top_builddir/examples/kernel/kernel test_fabs], 0, expout)182AT_CHECK([$abs_top_builddir/examples/kernel/kernel test_fabs], 0, expout)
183# Skip this test until >> works correctly again on vectors in clang's OpenCL
184AT_CHECK([exit 77])
185AT_CLEANUP183AT_CLEANUP
186184
187AT_SETUP([Kernel functions abs abs_diff add_sat hadd rhadd])185AT_SETUP([Kernel functions abs abs_diff add_sat hadd rhadd])
@@ -197,9 +195,7 @@
197[Running test test_rotate...195[Running test test_rotate...
198OK196OK
199])197])
200#AT_CHECK([$abs_top_builddir/examples/kernel/kernel test_rotate], 0, expout)198AT_CHECK([$abs_top_builddir/examples/kernel/kernel test_rotate], 0, expout)
201# Skip this test until << and >> work correctly with overflow in clang's OpenCL
202AT_CHECK([exit 77])
203AT_CLEANUP199AT_CLEANUP
204200
205AT_SETUP([Trigonometric functions])201AT_SETUP([Trigonometric functions])