Merge lp:~schnetter/pocl/main into lp:~pocl/pocl/trunk

Proposed by Erik Schnetter
Status: Merged
Merged at revision: 212
Proposed branch: lp:~schnetter/pocl/main
Merge into: lp:~pocl/pocl/trunk
Diff against target: 322 lines (+99/-37)
10 files modified
README.devel (+1/-1)
examples/ViennaCL/README (+4/-5)
examples/kernel/test_bitselect.cl (+45/-6)
include/_kernel.h (+8/-14)
lib/kernel/add_sat.cl (+1/-1)
lib/kernel/as_type.cl (+10/-5)
lib/kernel/sincos.cl (+26/-0)
lib/kernel/sources.mk (+1/-0)
lib/kernel/templates.h (+1/-3)
tests/testsuite.at (+2/-2)
To merge this branch: bzr merge lp:~schnetter/pocl/main
Reviewer Review Type Date Requested Status
Erik Schnetter Needs Resubmitting
Pekka Jääskeläinen Needs Fixing
Review via email: mp+95921@code.launchpad.net

Description of the change

I made mostly small changes, and also implemented sincos().

To post a comment you must log in.
Revision history for this message
Pekka Jääskeläinen (pekka-jaaskelainen) wrote :

15:28 < visit0r> eschnett: openclbench of ViennaCL fails with your modifications
15:28 < visit0r> http://codepad.org/apFci5iG
15:29 < visit0r> ( ViennaCL-1.2.0-src/build/examples/benchmarks$ ./openclbench )

review: Needs Fixing
Revision history for this message
Erik Schnetter (schnetter) wrote :

ViennaCL fails for me, even on the trunk. Updating to a more modern clang (3.1svn) didn't help.

I added support for the "private" address space on my branch. This should address the error message above. However, I cannot test this.

review: Needs Resubmitting
lp:~schnetter/pocl/main updated
191. By Erik Schnetter

Correct typo. Reformat.

192. By Erik Schnetter

Enable address space "private" in prototypes.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'README.devel'
2--- README.devel 2012-01-06 23:16:00 +0000
3+++ README.devel 2012-03-06 23:20:24 +0000
4@@ -42,4 +42,4 @@
5 - Target architecture (as per --target, using standard GNU rules)
6 - Host architecture (same)
7
8-Host and/or target can be one of the specialized architectures.
9\ No newline at end of file
10+Host and/or target can be one of the specialized architectures.
11
12=== modified file 'examples/ViennaCL/README'
13--- examples/ViennaCL/README 2012-02-04 22:10:51 +0000
14+++ examples/ViennaCL/README 2012-03-06 23:20:24 +0000
15@@ -1,10 +1,9 @@
16-Enable building the ViennaCL suite as a pocl test case. ViennaCL will be compiled,
17-if its sources are extracted in this directory.
18+Enable building the ViennaCL suite as a pocl test case. ViennaCL will
19+be compiled, if its sources are extracted in this directory.
20
21 Download and unpack the ViennaCL package:
22 http://sourceforge.net/projects/viennacl/files/1.2.x/ViennaCL-1.2.0-src.tar.gz
23 into this directory.
24
25-Then, in pocl toplevel source dir run 'make check' or 'make TESTUITE=viennacl check'
26-to run the ViennaCL tests.
27-
28+Then, in pocl toplevel source dir run 'make check' or
29+'make TESTSUITE=viennacl check' to run the ViennaCL tests.
30
31=== modified file 'examples/kernel/test_bitselect.cl'
32--- examples/kernel/test_bitselect.cl 2011-12-20 18:54:12 +0000
33+++ examples/kernel/test_bitselect.cl 2012-03-06 23:20:24 +0000
34@@ -1,4 +1,6 @@
35 // TESTING: bitselect
36+// TESTING: clz
37+// TESTING: popcount
38
39 #define IMPLEMENT_BODY_G(NAME, BODY, GTYPE, SGTYPE, UGTYPE, SUGTYPE) \
40 void NAME##_##GTYPE() \
41@@ -1142,7 +1144,7 @@
42 gtype v;
43 sgtype s[16];
44 } Tvec;
45- Tvec sel, left, right, res;
46+ Tvec sel, left, right, res_bitselect, res_clz, res_popcount;
47 int vecsize = vec_step(gtype);
48 for (int n=0; n<vecsize; ++n) {
49 sel.s[n] = randoms[(iter+n ) % nrandoms];
50@@ -1154,17 +1156,54 @@
51 right.s[n] = (right.s[n] << (bits/2)) | randoms[(iter+n+140) % nrandoms];
52 }
53 }
54- res.v = bitselect(left.v, right.v, sel.v);
55- bool equal = true;
56+ res_bitselect.v = bitselect(left.v, right.v, sel.v);
57+ res_clz.v = clz(left.v);
58+ res_popcount.v = popcount(left.v);
59+ bool equal;
60+ // bitselect
61+ equal = true;
62 for (int n=0; n<vecsize; ++n) {
63- equal = equal && ((res.s[n] & ~sel.s[n]) == (left.s[n] & ~sel.s[n]));
64- equal = equal && ((res.s[n] & sel.s[n]) == (right.s[n] & sel.s[n]));
65+ equal = equal && ((res_bitselect.s[n] & ~sel.s[n]) == (left.s[n] & ~sel.s[n]));
66+ equal = equal && ((res_bitselect.s[n] & sel.s[n]) == (right.s[n] & sel.s[n]));
67 }
68 if (!equal) {
69 printf("FAIL: bitselect type=%s a=0x%08x b=0x%08x c=0x%08x c=0x%08x\n",
70 typename,
71 (uint)left.s[0], (uint)right.s[0], (uint)sel.s[0],
72- (uint)res.s[0]);
73+ (uint)res_bitselect.s[0]);
74+ return;
75+ }
76+ // clz
77+ equal = true;
78+ for (int n=0; n<vecsize; ++n) {
79+ int b=0;
80+ while (b<bits) {
81+ sgtype mask = (sgtype)1 << (sgtype)(bits - 1 - b);
82+ if (left.s[n] & mask) break;
83+ ++b;
84+ }
85+ equal = equal && res_clz.s[n] == (sgtype)b;
86+ }
87+ if (!equal) {
88+ printf("FAIL: clz type=%s a=0x%08x a=0x%08x\n",
89+ typename,
90+ (uint)left.s[0], (uint)res_clz.s[0]);
91+ return;
92+ }
93+ // popcount
94+ equal = true;
95+ for (int n=0; n<vecsize; ++n) {
96+ int c=0;
97+ for (int b=0; b<bits; ++b) {
98+ sgtype mask = (sgtype)1 << (sgtype)b;
99+ if (left.s[n] & mask) ++c;
100+ }
101+ equal = equal && res_popcount.s[n] == (sgtype)c;
102+ }
103+ if (!equal) {
104+ printf("FAIL: popcount type=%s a=0x%08x a=0x%08x\n",
105+ typename,
106+ (uint)left.s[0], (uint)res_clz.s[0]);
107 return;
108 }
109 }
110
111=== modified file 'include/_kernel.h'
112--- include/_kernel.h 2012-01-20 16:36:41 +0000
113+++ include/_kernel.h 2012-03-06 23:20:24 +0000
114@@ -717,7 +717,6 @@
115 double4 _cl_overloadable NAME(double4 , __local double4 *); \
116 double8 _cl_overloadable NAME(double8 , __local double8 *); \
117 double16 _cl_overloadable NAME(double16, __local double16*);) \
118- /* __private is not supported yet \
119 float _cl_overloadable NAME(float , __private float *); \
120 float2 _cl_overloadable NAME(float2 , __private float2 *); \
121 float3 _cl_overloadable NAME(float3 , __private float3 *); \
122@@ -730,8 +729,7 @@
123 double3 _cl_overloadable NAME(double3 , __private double3 *); \
124 double4 _cl_overloadable NAME(double4 , __private double4 *); \
125 double8 _cl_overloadable NAME(double8 , __private double8 *); \
126- double16 _cl_overloadable NAME(double16, __private double16*);) \
127- */
128+ double16 _cl_overloadable NAME(double16, __private double16*);)
129 #define _CL_DECLARE_FUNC_V_SV(NAME) \
130 float2 _cl_overloadable NAME(float , float2 ); \
131 float3 _cl_overloadable NAME(float , float3 ); \
132@@ -904,7 +902,7 @@
133 _CL_DECLARE_FUNC_V_V(round)
134 _CL_DECLARE_FUNC_V_V(rsqrt)
135 _CL_DECLARE_FUNC_V_V(sin)
136-// sincos
137+_CL_DECLARE_FUNC_V_VPV(sincos)
138 _CL_DECLARE_FUNC_V_V(sinh)
139 _CL_DECLARE_FUNC_V_V(sinpi)
140 _CL_DECLARE_FUNC_V_V(sqrt)
141@@ -1490,7 +1488,6 @@
142 _CL_DECLARE_VLOAD(double, __constant)
143 #endif
144
145-/* __private is not supported yet \
146 _CL_DECLARE_VLOAD(char , __private)
147 _CL_DECLARE_VLOAD(uchar , __private)
148 _CL_DECLARE_VLOAD(short , __private)
149@@ -1505,7 +1502,6 @@
150 #ifdef cl_khr_fp64
151 _CL_DECLARE_VLOAD(double, __private)
152 #endif
153-*/
154
155 #define _CL_DECLARE_VSTORE(TYPE, MOD) \
156 void _cl_overloadable vstore2 (TYPE##2 data, size_t offset, MOD TYPE *p); \
157@@ -1544,7 +1540,6 @@
158 _CL_DECLARE_VSTORE(double, __local)
159 #endif
160
161-/* __private is not supported yet
162 _CL_DECLARE_VSTORE(char , __private)
163 _CL_DECLARE_VSTORE(uchar , __private)
164 _CL_DECLARE_VSTORE(short , __private)
165@@ -1559,7 +1554,6 @@
166 #ifdef cl_khr_fp64
167 _CL_DECLARE_VSTORE(double, __private)
168 #endif
169-*/
170
171 #ifdef cl_khr_fp16
172
173@@ -1579,7 +1573,7 @@
174 _CL_DECLARE_VLOAD_HALF(__global)
175 _CL_DECLARE_VLOAD_HALF(__local)
176 _CL_DECLARE_VLOAD_HALF(__constant)
177-/* _CL_DECLARE_VLOAD_HALF(__private) */
178+_CL_DECLARE_VLOAD_HALF(__private)
179
180 /* stores to half may have a suffix: _rte _rtz _rtp _rtn */
181 #define _CL_DECLARE_VSTORE_HALF(MOD, SUFFIX) \
182@@ -1605,11 +1599,11 @@
183 _CL_DECLARE_VSTORE_HALF(__local , _rtz)
184 _CL_DECLARE_VSTORE_HALF(__local , _rtp)
185 _CL_DECLARE_VSTORE_HALF(__local , _rtn)
186-/* _CL_DECLARE_VSTORE_HALF(__private , ) */
187-/* _CL_DECLARE_VSTORE_HALF(__private , _rte) */
188-/* _CL_DECLARE_VSTORE_HALF(__private , _rtz) */
189-/* _CL_DECLARE_VSTORE_HALF(__private , _rtp) */
190-/* _CL_DECLARE_VSTORE_HALF(__private , _rtn) */
191+_CL_DECLARE_VSTORE_HALF(__private , )
192+_CL_DECLARE_VSTORE_HALF(__private , _rte)
193+_CL_DECLARE_VSTORE_HALF(__private , _rtz)
194+_CL_DECLARE_VSTORE_HALF(__private , _rtp)
195+_CL_DECLARE_VSTORE_HALF(__private , _rtn)
196
197 #endif
198
199
200=== modified file 'lib/kernel/add_sat.cl'
201--- lib/kernel/add_sat.cl 2011-12-20 18:54:48 +0000
202+++ lib/kernel/add_sat.cl 2012-03-06 23:20:24 +0000
203@@ -49,6 +49,6 @@
204 }) :
205 /* unsigned */
206 ({
207- gtype max = (sgtype)-1;
208+ gtype max = ~(gtype)0;
209 a > max-b ? max : a+b;
210 }))
211
212=== modified file 'lib/kernel/as_type.cl'
213--- lib/kernel/as_type.cl 2011-12-16 02:11:57 +0000
214+++ lib/kernel/as_type.cl 2012-03-06 23:20:24 +0000
215@@ -21,11 +21,16 @@
216 THE SOFTWARE.
217 */
218
219-#define DEFINE_AS_TYPE(SRC, DST) \
220- DST _cl_overloadable \
221- as_##DST(SRC a) \
222- { \
223- return *(DST*)&a; \
224+#define DEFINE_AS_TYPE(SRC, DST) \
225+ DST _cl_overloadable \
226+ as_##DST(SRC a) \
227+ { \
228+ /* This may not be safe: */ \
229+ /* return *(DST*)&a; */ \
230+ /* This should be safe, but is not officially supported in OpenCL: */ \
231+ union { SRC src; DST dst; } cvt; \
232+ cvt.src = a; \
233+ return cvt.dst; \
234 }
235
236
237
238=== added file 'lib/kernel/sincos.cl'
239--- lib/kernel/sincos.cl 1970-01-01 00:00:00 +0000
240+++ lib/kernel/sincos.cl 2012-03-06 23:20:24 +0000
241@@ -0,0 +1,26 @@
242+/* OpenCL built-in library: sincos()
243+
244+ Copyright (c) 2011 Universidad Rey Juan Carlos
245+
246+ Permission is hereby granted, free of charge, to any person obtaining a copy
247+ of this software and associated documentation files (the "Software"), to deal
248+ in the Software without restriction, including without limitation the rights
249+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
250+ copies of the Software, and to permit persons to whom the Software is
251+ furnished to do so, subject to the following conditions:
252+
253+ The above copyright notice and this permission notice shall be included in
254+ all copies or substantial portions of the Software.
255+
256+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
257+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
258+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
259+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
260+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
261+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
262+ THE SOFTWARE.
263+*/
264+
265+#include "templates.h"
266+
267+DEFINE_EXPR_V_VPV(sincos, ({ *b=cos(a); sin(a); }))
268
269=== modified file 'lib/kernel/sources.mk'
270--- lib/kernel/sources.mk 2012-01-20 16:36:41 +0000
271+++ lib/kernel/sources.mk 2012-03-06 23:20:24 +0000
272@@ -64,6 +64,7 @@
273 round.cl \
274 rsqrt.cl \
275 sin.cl \
276+ sincos.cl \
277 sinh.cl \
278 sinpi.cl \
279 sqrt.cl \
280
281=== modified file 'lib/kernel/templates.h'
282--- lib/kernel/templates.h 2012-01-20 16:36:41 +0000
283+++ lib/kernel/templates.h 2012-03-06 23:20:24 +0000
284@@ -565,15 +565,13 @@
285 typedef STYPE stype; \
286 return EXPR; \
287 } \
288- /* __private is not supported yet \
289 VTYPE __attribute__ ((overloadable)) \
290 NAME(VTYPE a, __private VTYPE *b) \
291 { \
292 typedef VTYPE vtype; \
293 typedef STYPE stype; \
294 return EXPR; \
295- } \
296- */
297+ }
298 #define DEFINE_EXPR_V_VPV(NAME, EXPR) \
299 IMPLEMENT_EXPR_V_VPV(NAME, EXPR, float , float ) \
300 IMPLEMENT_EXPR_V_VPV(NAME, EXPR, float2 , float ) \
301
302=== modified file 'tests/testsuite.at'
303--- tests/testsuite.at 2012-02-09 18:54:22 +0000
304+++ tests/testsuite.at 2012-03-06 23:20:24 +0000
305@@ -167,7 +167,7 @@
306
307 AT_BANNER([Kernel runtime library])
308
309-AT_SETUP([Kernel function bitselect])
310+AT_SETUP([Kernel functions bitselect clz popcount])
311 AT_DATA([expout],
312 [Running test test_bitselect...
313 OK
314@@ -183,7 +183,7 @@
315 AT_CHECK([$abs_top_builddir/examples/kernel/kernel test_fabs], 0, expout)
316 AT_CLEANUP
317
318-AT_SETUP([Kernel functions abs abs_diff add_sat hadd rhadd])
319+AT_SETUP([Kernel functions abs abs_diff add_sat hadd mad_hi mad_sat mul_hi rhadd sub_sat])
320 AT_DATA([expout],
321 [Running test test_hadd...
322 OK