Merge lp:~schnetter/pocl/main into lp:~pocl/pocl/trunk

Proposed by Erik Schnetter
Status: Merged
Merged at revision: 160
Proposed branch: lp:~schnetter/pocl/main
Merge into: lp:~pocl/pocl/trunk
Diff against target: 486 lines (+238/-27)
10 files modified
examples/kernel/kernel.c (+1/-1)
examples/kernel/test_fabs.cl (+7/-6)
examples/kernel/test_hadd.cl (+208/-8)
lib/kernel/x86_64/ceil.cl (+3/-1)
lib/kernel/x86_64/fabs.cl (+3/-1)
lib/kernel/x86_64/floor.cl (+3/-1)
lib/kernel/x86_64/max.cl (+4/-1)
lib/kernel/x86_64/min.cl (+4/-1)
lib/kernel/x86_64/sqrt.cl (+3/-1)
tests/testsuite.at (+2/-6)
To merge this branch: bzr merge lp:~schnetter/pocl/main
Reviewer Review Type Date Requested Status
Pekka Jääskeläinen Approve
Review via email: mp+89469@code.launchpad.net

Description of the change

clang version 3.1 (trunk 148489) has several errors corrected. This allows me to re-enable all existing test cases.

To post a comment you must log in.
lp:~schnetter/pocl/main updated
181. By Erik Schnetter

Merge from trunk

182. By Erik Schnetter

Add test cases for mad_hi mad_sat mul_hi sub_sat

Revision history for this message
Pekka Jääskeläinen (pekka-jaaskelainen) wrote :

Merged. Please, in the future, add the LLVM rev number also to the INSTALL file. With the latest rev, Clang crashes when compiling pocl. It seems something more serious is broken in Clang at the moment.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'examples/kernel/kernel.c'
2--- examples/kernel/kernel.c 2011-12-20 19:16:58 +0000
3+++ examples/kernel/kernel.c 2012-01-20 19:44:17 +0000
4@@ -79,7 +79,7 @@
5 "test_bitselect",
6 "test_fabs",
7 "test_hadd",
8- //"test_rotate", /* TODO: this test fails; LLVM bug #11555 */
9+ "test_rotate",
10 };
11 int const ntests = sizeof(tests)/sizeof(*tests);
12 for (int i=0; i<ntests; ++i) {
13
14=== modified file 'examples/kernel/test_fabs.cl'
15--- examples/kernel/test_fabs.cl 2012-01-16 18:19:41 +0000
16+++ examples/kernel/test_fabs.cl 2012-01-20 19:44:17 +0000
17@@ -136,8 +136,8 @@
18 }
19 if (!equal) {
20 for (int n=0; n<vecsize; ++n) {
21- printf("FAIL: fabs type=%s val=%.17g res=%.17g\n",
22- typename, val.s[n], res.s[n]);
23+ printf("FAIL: fabs type=%s val=%.17g res=%.17g good=%.17g\n",
24+ typename, val.s[n], res.s[n], good.s[n]);
25 }
26 return;
27 }
28@@ -150,8 +150,9 @@
29 }
30 if (!equal) {
31 for (int n=0; n<vecsize; ++n) {
32- printf("FAIL: signbit type=%s val=%.17g res=%d\n",
33- typename, val.s[n], (int)ires.s[n]);
34+ printf("FAIL: signbit type=%s val=%.17g res=%d good=%d\n",
35+ typename, val.s[n], (int)ires.s[n],
36+ (sign>0 ? 0 : vecsize==1 ? +1 : -1));
37 }
38 return;
39 }
40@@ -167,8 +168,8 @@
41 }
42 if (!equal) {
43 for (int n=0; n<vecsize; ++n) {
44- printf("FAIL: copysign type=%s val=%.17g sign=%.17g res=%.17g\n",
45- typename, val.s[n], sign2*val2.s[n], res.s[n]);
46+ printf("FAIL: copysign type=%s val=%.17g sign=%.17g res=%.17g good=%.17g\n",
47+ typename, val.s[n], sign2*val2.s[n], res.s[n], good.s[n]);
48 }
49 return;
50 }
51
52=== modified file 'examples/kernel/test_hadd.cl'
53--- examples/kernel/test_hadd.cl 2011-12-20 18:54:12 +0000
54+++ examples/kernel/test_hadd.cl 2012-01-20 19:44:17 +0000
55@@ -2,7 +2,11 @@
56 // TESTING: abs_diff
57 // TESTING: add_sat
58 // TESTING: hadd
59+// TESTING: mad_hi
60+// TESTING: mad_sat
61+// TESTING: mul_hi
62 // TESTING: rhadd
63+// TESTING: sub_sat
64
65
66
67@@ -21,6 +25,24 @@
68 return b; \
69 } \
70 \
71+ STYPE##4 _cl_overloadable safe_normalize(STYPE##4 const a) \
72+ { \
73+ STYPE const halfbits = 4*sizeof(STYPE); \
74+ STYPE const halfmax = (STYPE)1 << halfbits; \
75+ STYPE const halfmask = halfmax - (STYPE)1; \
76+ STYPE tmp; \
77+ STYPE##4 b; \
78+ tmp = a.s0; \
79+ b.s0 = tmp & halfmask; \
80+ tmp = tmp >> halfbits + a.s1; \
81+ b.s1 = tmp & halfmask; \
82+ tmp = tmp >> halfbits + a.s2; \
83+ b.s2 = tmp & halfmask; \
84+ tmp = tmp >> halfbits + a.s3; \
85+ b.s3 = tmp; \
86+ return b; \
87+ } \
88+ \
89 STYPE _cl_overloadable safe_extract(STYPE##2 const a) \
90 { \
91 STYPE const halfbits = 4*sizeof(STYPE); \
92@@ -31,6 +53,19 @@
93 return b; \
94 } \
95 \
96+ STYPE _cl_overloadable safe_extract(STYPE##4 const a) \
97+ { \
98+ STYPE const halfbits = 4*sizeof(STYPE); \
99+ STYPE const halfmax = (STYPE)1 << halfbits; \
100+ STYPE const halfmask = halfmax - (STYPE)1; \
101+ STYPE b; \
102+ if (safe_extract(a.hi) != 0) { \
103+ printf("FAIL: safe_extract [%d,%d,%d,%d]\n", \
104+ (int)a.s0, (int)a.s1, (int)a.s2, (int)a.s3); \
105+ } \
106+ return safe_extract(a.lo); \
107+ } \
108+ \
109 STYPE##2 _cl_overloadable safe_neg(STYPE##2 a) \
110 { \
111 STYPE##2 b; \
112@@ -57,6 +92,16 @@
113 return safe_normalize(c); \
114 } \
115 \
116+ STYPE##4 _cl_overloadable safe_add(STYPE##4 const a, STYPE##4 const b) \
117+ { \
118+ STYPE##4 c; \
119+ c.s0 = a.s0 + b.s0; \
120+ c.s1 = a.s1 + b.s1; \
121+ c.s2 = a.s2 + b.s2; \
122+ c.s3 = a.s3 + b.s3; \
123+ return safe_normalize(c); \
124+ } \
125+ \
126 STYPE##2 _cl_overloadable safe_sub(STYPE##2 const a, STYPE##2 const b) \
127 { \
128 STYPE##2 c; \
129@@ -65,6 +110,27 @@
130 return safe_normalize(c); \
131 } \
132 \
133+ STYPE##4 _cl_overloadable safe_mul(STYPE##2 const a, STYPE##2 const b) \
134+ { \
135+ STYPE##4 c00, c01, c10, c11; \
136+ c00 = 0; \
137+ c00.s0 = a.s0 * b.s0; \
138+ c00 = safe_normalize(c00); \
139+ c01 = 0; \
140+ c01.s1 = a.s0 * b.s1; \
141+ c01 = safe_normalize(c01); \
142+ c10 = 0; \
143+ c10.s1 = a.s1 * b.s0; \
144+ c10 = safe_normalize(c10); \
145+ c11 = 0; \
146+ c11.s2 = a.s1 * b.s1; \
147+ c11 = safe_normalize(c11); \
148+ STYPE##4 c; \
149+ c = safe_add(safe_add(c00, c01), \
150+ safe_add(c10, c11)); \
151+ return c; \
152+ } \
153+ \
154 STYPE##2 _cl_overloadable safe_max(STYPE##2 const a, STYPE##2 const b) \
155 { \
156 STYPE##2 c; \
157@@ -87,6 +153,38 @@
158 return c; \
159 } \
160 \
161+ STYPE##2 _cl_overloadable safe_clamp(STYPE##2 const a, \
162+ STYPE##2 const alo, STYPE##2 const ahi) \
163+ { \
164+ STYPE##2 b; \
165+ if (a.s1 < alo.s1 || (a.s1 == alo.s1 && a.s0 < alo.s0)) { \
166+ b = alo; \
167+ } else if (a.s1 > ahi.s1 || (a.s1 == ahi.s1 && a.s0 > ahi.s0)) { \
168+ b = ahi; \
169+ } else { \
170+ b = a; \
171+ } \
172+ return b; \
173+ } \
174+ \
175+ STYPE##2 _cl_overloadable safe_clamp(STYPE##4 const a, \
176+ STYPE##2 const alo, STYPE##2 const ahi) \
177+ { \
178+ STYPE##2 b; \
179+ if (a.s3 < 0 || a.s2 < 0 || \
180+ a.s1 < alo.s1 || (a.s1 == alo.s1 && a.s0 < alo.s0)) \
181+ { \
182+ b = alo; \
183+ } else if (a.s3 > 0 || a.s2 > 0 || \
184+ a.s1 > ahi.s1 || (a.s1 == ahi.s1 && a.s0 > ahi.s0)) \
185+ { \
186+ b = ahi; \
187+ } else { \
188+ b = safe_normalize(a.lo); \
189+ } \
190+ return b; \
191+ } \
192+ \
193 STYPE##2 _cl_overloadable safe_rshift(STYPE##2 a) \
194 { \
195 STYPE const halfbits = 4*sizeof(STYPE); \
196@@ -98,6 +196,11 @@
197 b.s0 >>= (STYPE)1; \
198 b.s1 >>= (STYPE)1; \
199 return safe_normalize(b); \
200+ } \
201+ \
202+ STYPE##2 _cl_overloadable safe_hi(STYPE##4 a) \
203+ { \
204+ return safe_normalize(a.hi); \
205 }
206
207
208@@ -115,6 +218,19 @@
209 b = safe_normalize(b); \
210 if ((TYPE)safe_extract(b) != a) printf("FAIL: safe_create %d\n", (int)a); \
211 return b; \
212+ } \
213+ \
214+ STYPE##4 _cl_overloadable safe_create4(TYPE const a) \
215+ { \
216+ STYPE const halfbits = 4*sizeof(STYPE); \
217+ STYPE const halfmax = (STYPE)1 << halfbits; \
218+ STYPE const halfmask = halfmax - (STYPE)1; \
219+ STYPE##4 b; \
220+ b = 0; \
221+ b.lo = safe_create(a); \
222+ b = safe_normalize(b); \
223+ if ((TYPE)safe_extract(b) != a) printf("FAIL: safe_create4 %d\n", (int)a); \
224+ return b; \
225 }
226
227
228@@ -1285,8 +1401,8 @@
229 } Tvec;
230 Tvec x, y, z;
231 Tvec good_abs;
232- Tvec good_abs_diff, good_add_sat;
233- Tvec good_hadd, good_rhadd;
234+ Tvec good_abs_diff, good_add_sat, good_mad_sat, good_sub_sat;
235+ Tvec good_hadd, good_mad_hi, good_mul_hi, good_rhadd;
236 int vecsize = vec_step(gtype);
237 for (int n=0; n<vecsize; ++n) {
238 x.s[n] = randoms[(iter+n ) % nrandoms];
239@@ -1303,25 +1419,49 @@
240 safe_extract(safe_abs(safe_sub(safe_create(x.s[n]),
241 safe_create(y.s[n]))));
242 good_add_sat.s[n] =
243- safe_extract(safe_min(safe_max(safe_add(safe_create(x.s[n]),
244- safe_create(y.s[n])),
245- safe_create(tmin)),
246- safe_create(tmax)));
247+ safe_extract(safe_clamp(safe_add(safe_create(x.s[n]),
248+ safe_create(y.s[n])),
249+ safe_create(tmin),
250+ safe_create(tmax)));
251+ good_mad_sat.s[n] =
252+ safe_extract(safe_clamp(safe_add(safe_mul(safe_create(x.s[n]),
253+ safe_create(y.s[n])),
254+ safe_create4(z.s[n])),
255+ safe_create(tmin),
256+ safe_create(tmax)));
257+ good_sub_sat.s[n] =
258+ safe_extract(safe_clamp(safe_sub(safe_create(x.s[n]),
259+ safe_create(y.s[n])),
260+ safe_create(tmin),
261+ safe_create(tmax)));
262 good_hadd.s[n] =
263 safe_extract(safe_rshift(safe_add(safe_create(x.s[n]),
264 safe_create(y.s[n]))));
265+ good_mad_hi.s[n] =
266+ safe_extract(safe_clamp(safe_add(safe_hi(safe_mul(safe_create(x.s[n]),
267+ safe_create(y.s[n]))),
268+ safe_create(z.s[n])),
269+ safe_create(tmin),
270+ safe_create(tmax)));
271+ good_mul_hi.s[n] =
272+ safe_extract(safe_hi(safe_mul(safe_create(x.s[n]),
273+ safe_create(y.s[n]))));
274 good_rhadd.s[n] =
275 safe_extract(safe_rshift(safe_add(safe_add(safe_create(x.s[n]),
276 safe_create(y.s[n])),
277 safe_create((sgtype)1))));
278 }
279 Tvec res_abs;
280- Tvec res_abs_diff, res_add_sat;
281- Tvec res_hadd, res_rhadd;
282+ Tvec res_abs_diff, res_add_sat, res_mad_sat, res_sub_sat;
283+ Tvec res_hadd, res_mad_hi, res_mul_hi, res_rhadd;
284 res_abs.u = abs (x.v);
285 res_abs_diff.u = abs_diff(x.v, y.v);
286 res_add_sat.v = add_sat (x.v, y.v);
287+ res_mad_sat.v = mad_sat (x.v, y.v, z.v);
288+ res_sub_sat.v = sub_sat (x.v, y.v);
289 res_hadd.v = hadd (x.v, y.v);
290+ res_mad_hi.v = mad_hi (x.v, y.v, z.v);
291+ res_mul_hi.v = mul_hi (x.v, y.v);
292 res_rhadd.v = rhadd (x.v, y.v);
293 bool equal;
294 // abs
295@@ -1384,6 +1524,51 @@
296 }
297 return;
298 }
299+ // mad_hi
300+ equal = true;
301+ for (int n=0; n<vecsize; ++n) {
302+ equal = equal && res_mad_hi.s[n] == good_mad_hi.s[n];
303+ }
304+ if (!equal) {
305+ printf("FAIL: mad_hi type=%s\n", typename);
306+ for (int n=0; n<vecsize; ++n) {
307+ printf(" [%d] a=%d b=%d c=%d good=%d res=%d\n",
308+ n,
309+ (int)x.s[n], (int)y.s[n], (int)z.s[n],
310+ (int)good_mad_hi.s[n], (int)res_mad_hi.s[n]);
311+ }
312+ return;
313+ }
314+ // mad_sat
315+ equal = true;
316+ for (int n=0; n<vecsize; ++n) {
317+ equal = equal && res_mad_sat.s[n] == good_mad_sat.s[n];
318+ }
319+ if (!equal) {
320+ printf("FAIL: mad_sat type=%s\n", typename);
321+ for (int n=0; n<vecsize; ++n) {
322+ printf(" [%d] a=%d b=%d c=%d good=%d res=%d\n",
323+ n,
324+ (int)x.s[n], (int)y.s[n], (int)z.s[n],
325+ (int)good_mad_sat.s[n], (int)res_mad_sat.s[n]);
326+ }
327+ return;
328+ }
329+ // mul_hi
330+ equal = true;
331+ for (int n=0; n<vecsize; ++n) {
332+ equal = equal && res_mul_hi.s[n] == good_mul_hi.s[n];
333+ }
334+ if (!equal) {
335+ printf("FAIL: mul_hi type=%s\n", typename);
336+ for (int n=0; n<vecsize; ++n) {
337+ printf(" [%d] a=%d b=%d good=%d res=%d\n",
338+ n,
339+ (int)x.s[n], (int)y.s[n],
340+ (int)good_mul_hi.s[n], (int)res_mul_hi.s[n]);
341+ }
342+ return;
343+ }
344 // rhadd
345 equal = true;
346 for (int n=0; n<vecsize; ++n) {
347@@ -1399,6 +1584,21 @@
348 }
349 return;
350 }
351+ // sub_sat
352+ equal = true;
353+ for (int n=0; n<vecsize; ++n) {
354+ equal = equal && res_sub_sat.s[n] == good_sub_sat.s[n];
355+ }
356+ if (!equal) {
357+ printf("FAIL: sub_sat type=%s\n", typename);
358+ for (int n=0; n<vecsize; ++n) {
359+ printf(" [%d] a=%d b=%d good=%d res=%d\n",
360+ n,
361+ (int)x.s[n], (int)y.s[n],
362+ (int)good_sub_sat.s[n], (int)res_sub_sat.s[n]);
363+ }
364+ return;
365+ }
366 }
367 })
368 )
369
370=== modified file 'lib/kernel/x86_64/ceil.cl'
371--- lib/kernel/x86_64/ceil.cl 2011-10-31 16:48:30 +0000
372+++ lib/kernel/x86_64/ceil.cl 2012-01-20 19:44:17 +0000
373@@ -31,7 +31,9 @@
374 #define IMPLEMENT_UPCAST(NAME, TYPE, UPTYPE, LO) \
375 TYPE _cl_overloadable NAME(TYPE a) \
376 { \
377- return NAME(*(UPTYPE*)&a).LO; \
378+ UPTYPE a1; \
379+ a1.LO = a; \
380+ return NAME(a1).LO; \
381 }
382
383 #define IMPLEMENT_SPLIT(NAME, TYPE, LO, HI) \
384
385=== modified file 'lib/kernel/x86_64/fabs.cl'
386--- lib/kernel/x86_64/fabs.cl 2011-12-15 00:52:06 +0000
387+++ lib/kernel/x86_64/fabs.cl 2012-01-20 19:44:17 +0000
388@@ -47,7 +47,9 @@
389 #define IMPLEMENT_UPCAST(NAME, TYPE, UPTYPE, LO) \
390 TYPE _cl_overloadable NAME(TYPE a) \
391 { \
392- return NAME(*(UPTYPE*)&a).LO; \
393+ UPTYPE a1; \
394+ a1.LO = a; \
395+ return NAME(a1).LO; \
396 }
397
398 #define IMPLEMENT_SPLIT(NAME, TYPE, LO, HI) \
399
400=== modified file 'lib/kernel/x86_64/floor.cl'
401--- lib/kernel/x86_64/floor.cl 2011-10-31 16:48:30 +0000
402+++ lib/kernel/x86_64/floor.cl 2012-01-20 19:44:17 +0000
403@@ -31,7 +31,9 @@
404 #define IMPLEMENT_UPCAST(NAME, TYPE, UPTYPE, LO) \
405 TYPE _cl_overloadable NAME(TYPE a) \
406 { \
407- return NAME(*(UPTYPE*)&a).LO; \
408+ UPTYPE a1; \
409+ a1.LO = a; \
410+ return NAME(a1).LO; \
411 }
412
413 #define IMPLEMENT_SPLIT(NAME, TYPE, LO, HI) \
414
415=== modified file 'lib/kernel/x86_64/max.cl'
416--- lib/kernel/x86_64/max.cl 2011-10-31 16:48:30 +0000
417+++ lib/kernel/x86_64/max.cl 2012-01-20 19:44:17 +0000
418@@ -32,7 +32,10 @@
419 #define IMPLEMENT_UPCAST(NAME, TYPE, UPTYPE, LO) \
420 TYPE _cl_overloadable NAME(TYPE a, TYPE b) \
421 { \
422- return NAME(*(UPTYPE*)&a, *(UPTYPE*)&b).LO; \
423+ UPTYPE a1, b1; \
424+ a1.LO = a; \
425+ b1.LO = b; \
426+ return NAME(a1, b1).LO; \
427 }
428
429 #define IMPLEMENT_SPLIT(NAME, TYPE, LO, HI) \
430
431=== modified file 'lib/kernel/x86_64/min.cl'
432--- lib/kernel/x86_64/min.cl 2011-10-31 16:48:30 +0000
433+++ lib/kernel/x86_64/min.cl 2012-01-20 19:44:17 +0000
434@@ -32,7 +32,10 @@
435 #define IMPLEMENT_UPCAST(NAME, TYPE, UPTYPE, LO) \
436 TYPE _cl_overloadable NAME(TYPE a, TYPE b) \
437 { \
438- return NAME(*(UPTYPE*)&a, *(UPTYPE*)&b).LO; \
439+ UPTYPE a1, b1; \
440+ a1.LO = a; \
441+ b1.LO = b; \
442+ return NAME(a1, b1).LO; \
443 }
444
445 #define IMPLEMENT_SPLIT(NAME, TYPE, LO, HI) \
446
447=== modified file 'lib/kernel/x86_64/sqrt.cl'
448--- lib/kernel/x86_64/sqrt.cl 2012-01-03 14:21:57 +0000
449+++ lib/kernel/x86_64/sqrt.cl 2012-01-20 19:44:17 +0000
450@@ -35,7 +35,9 @@
451 #define IMPLEMENT_UPCAST(NAME, TYPE, UPTYPE, LO) \
452 TYPE _cl_overloadable NAME(TYPE a) \
453 { \
454- return NAME(*(UPTYPE*)&a).LO; \
455+ UPTYPE a1; \
456+ a1.LO = a; \
457+ return NAME(a1).LO; \
458 }
459
460 #define IMPLEMENT_SPLIT(NAME, TYPE, LO, HI) \
461
462=== modified file 'tests/testsuite.at'
463--- tests/testsuite.at 2012-01-16 18:19:41 +0000
464+++ tests/testsuite.at 2012-01-20 19:44:17 +0000
465@@ -179,9 +179,7 @@
466 [Running test test_fabs...
467 OK
468 ])
469-#AT_CHECK([$abs_top_builddir/examples/kernel/kernel test_fabs], 0, expout)
470-# Skip this test until >> works correctly again on vectors in clang's OpenCL
471-AT_CHECK([exit 77])
472+AT_CHECK([$abs_top_builddir/examples/kernel/kernel test_fabs], 0, expout)
473 AT_CLEANUP
474
475 AT_SETUP([Kernel functions abs abs_diff add_sat hadd rhadd])
476@@ -197,9 +195,7 @@
477 [Running test test_rotate...
478 OK
479 ])
480-#AT_CHECK([$abs_top_builddir/examples/kernel/kernel test_rotate], 0, expout)
481-# Skip this test until << and >> work correctly with overflow in clang's OpenCL
482-AT_CHECK([exit 77])
483+AT_CHECK([$abs_top_builddir/examples/kernel/kernel test_rotate], 0, expout)
484 AT_CLEANUP
485
486 AT_SETUP([Trigonometric functions])