Merge lp:~schnetter/pocl/main into lp:~pocl/pocl/trunk
- main
- Merge into trunk
Proposed by
Erik Schnetter
Status: | Merged |
---|---|
Merged at revision: | 212 |
Proposed branch: | lp:~schnetter/pocl/main |
Merge into: | lp:~pocl/pocl/trunk |
Diff against target: |
322 lines (+99/-37) 10 files modified
README.devel (+1/-1) examples/ViennaCL/README (+4/-5) examples/kernel/test_bitselect.cl (+45/-6) include/_kernel.h (+8/-14) lib/kernel/add_sat.cl (+1/-1) lib/kernel/as_type.cl (+10/-5) lib/kernel/sincos.cl (+26/-0) lib/kernel/sources.mk (+1/-0) lib/kernel/templates.h (+1/-3) tests/testsuite.at (+2/-2) |
To merge this branch: | bzr merge lp:~schnetter/pocl/main |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Erik Schnetter | Needs Resubmitting | ||
Pekka Jääskeläinen | Needs Fixing | ||
Review via email:
|
Commit message
Description of the change
I made mostly small changes, and also implemented sincos().
To post a comment you must log in.
Revision history for this message
![](/+icing/build/overlay/assets/skins/sam/images/close.gif)
Pekka Jääskeläinen (pekka-jaaskelainen) wrote : | # |
review:
Needs Fixing
Revision history for this message
![](/+icing/build/overlay/assets/skins/sam/images/close.gif)
Erik Schnetter (schnetter) wrote : | # |
ViennaCL fails for me, even on the trunk. Updating to a more modern clang (3.1svn) didn't help.
I added support for the "private" address space on my branch. This should address the error message above. However, I cannot test this.
review:
Needs Resubmitting
lp:~schnetter/pocl/main
updated
- 191. By Erik Schnetter
-
Correct typo. Reformat.
- 192. By Erik Schnetter
-
Enable address space "private" in prototypes.
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'README.devel' |
2 | --- README.devel 2012-01-06 23:16:00 +0000 |
3 | +++ README.devel 2012-03-06 23:20:24 +0000 |
4 | @@ -42,4 +42,4 @@ |
5 | - Target architecture (as per --target, using standard GNU rules) |
6 | - Host architecture (same) |
7 | |
8 | -Host and/or target can be one of the specialized architectures. |
9 | \ No newline at end of file |
10 | +Host and/or target can be one of the specialized architectures. |
11 | |
12 | === modified file 'examples/ViennaCL/README' |
13 | --- examples/ViennaCL/README 2012-02-04 22:10:51 +0000 |
14 | +++ examples/ViennaCL/README 2012-03-06 23:20:24 +0000 |
15 | @@ -1,10 +1,9 @@ |
16 | -Enable building the ViennaCL suite as a pocl test case. ViennaCL will be compiled, |
17 | -if its sources are extracted in this directory. |
18 | +Enable building the ViennaCL suite as a pocl test case. ViennaCL will |
19 | +be compiled, if its sources are extracted in this directory. |
20 | |
21 | Download and unpack the ViennaCL package: |
22 | http://sourceforge.net/projects/viennacl/files/1.2.x/ViennaCL-1.2.0-src.tar.gz |
23 | into this directory. |
24 | |
25 | -Then, in pocl toplevel source dir run 'make check' or 'make TESTUITE=viennacl check' |
26 | -to run the ViennaCL tests. |
27 | - |
28 | +Then, in pocl toplevel source dir run 'make check' or |
29 | +'make TESTSUITE=viennacl check' to run the ViennaCL tests. |
30 | |
31 | === modified file 'examples/kernel/test_bitselect.cl' |
32 | --- examples/kernel/test_bitselect.cl 2011-12-20 18:54:12 +0000 |
33 | +++ examples/kernel/test_bitselect.cl 2012-03-06 23:20:24 +0000 |
34 | @@ -1,4 +1,6 @@ |
35 | // TESTING: bitselect |
36 | +// TESTING: clz |
37 | +// TESTING: popcount |
38 | |
39 | #define IMPLEMENT_BODY_G(NAME, BODY, GTYPE, SGTYPE, UGTYPE, SUGTYPE) \ |
40 | void NAME##_##GTYPE() \ |
41 | @@ -1142,7 +1144,7 @@ |
42 | gtype v; |
43 | sgtype s[16]; |
44 | } Tvec; |
45 | - Tvec sel, left, right, res; |
46 | + Tvec sel, left, right, res_bitselect, res_clz, res_popcount; |
47 | int vecsize = vec_step(gtype); |
48 | for (int n=0; n<vecsize; ++n) { |
49 | sel.s[n] = randoms[(iter+n ) % nrandoms]; |
50 | @@ -1154,17 +1156,54 @@ |
51 | right.s[n] = (right.s[n] << (bits/2)) | randoms[(iter+n+140) % nrandoms]; |
52 | } |
53 | } |
54 | - res.v = bitselect(left.v, right.v, sel.v); |
55 | - bool equal = true; |
56 | + res_bitselect.v = bitselect(left.v, right.v, sel.v); |
57 | + res_clz.v = clz(left.v); |
58 | + res_popcount.v = popcount(left.v); |
59 | + bool equal; |
60 | + // bitselect |
61 | + equal = true; |
62 | for (int n=0; n<vecsize; ++n) { |
63 | - equal = equal && ((res.s[n] & ~sel.s[n]) == (left.s[n] & ~sel.s[n])); |
64 | - equal = equal && ((res.s[n] & sel.s[n]) == (right.s[n] & sel.s[n])); |
65 | + equal = equal && ((res_bitselect.s[n] & ~sel.s[n]) == (left.s[n] & ~sel.s[n])); |
66 | + equal = equal && ((res_bitselect.s[n] & sel.s[n]) == (right.s[n] & sel.s[n])); |
67 | } |
68 | if (!equal) { |
69 | printf("FAIL: bitselect type=%s a=0x%08x b=0x%08x c=0x%08x c=0x%08x\n", |
70 | typename, |
71 | (uint)left.s[0], (uint)right.s[0], (uint)sel.s[0], |
72 | - (uint)res.s[0]); |
73 | + (uint)res_bitselect.s[0]); |
74 | + return; |
75 | + } |
76 | + // clz |
77 | + equal = true; |
78 | + for (int n=0; n<vecsize; ++n) { |
79 | + int b=0; |
80 | + while (b<bits) { |
81 | + sgtype mask = (sgtype)1 << (sgtype)(bits - 1 - b); |
82 | + if (left.s[n] & mask) break; |
83 | + ++b; |
84 | + } |
85 | + equal = equal && res_clz.s[n] == (sgtype)b; |
86 | + } |
87 | + if (!equal) { |
88 | + printf("FAIL: clz type=%s a=0x%08x a=0x%08x\n", |
89 | + typename, |
90 | + (uint)left.s[0], (uint)res_clz.s[0]); |
91 | + return; |
92 | + } |
93 | + // popcount |
94 | + equal = true; |
95 | + for (int n=0; n<vecsize; ++n) { |
96 | + int c=0; |
97 | + for (int b=0; b<bits; ++b) { |
98 | + sgtype mask = (sgtype)1 << (sgtype)b; |
99 | + if (left.s[n] & mask) ++c; |
100 | + } |
101 | + equal = equal && res_popcount.s[n] == (sgtype)c; |
102 | + } |
103 | + if (!equal) { |
104 | + printf("FAIL: popcount type=%s a=0x%08x a=0x%08x\n", |
105 | + typename, |
106 | + (uint)left.s[0], (uint)res_clz.s[0]); |
107 | return; |
108 | } |
109 | } |
110 | |
111 | === modified file 'include/_kernel.h' |
112 | --- include/_kernel.h 2012-01-20 16:36:41 +0000 |
113 | +++ include/_kernel.h 2012-03-06 23:20:24 +0000 |
114 | @@ -717,7 +717,6 @@ |
115 | double4 _cl_overloadable NAME(double4 , __local double4 *); \ |
116 | double8 _cl_overloadable NAME(double8 , __local double8 *); \ |
117 | double16 _cl_overloadable NAME(double16, __local double16*);) \ |
118 | - /* __private is not supported yet \ |
119 | float _cl_overloadable NAME(float , __private float *); \ |
120 | float2 _cl_overloadable NAME(float2 , __private float2 *); \ |
121 | float3 _cl_overloadable NAME(float3 , __private float3 *); \ |
122 | @@ -730,8 +729,7 @@ |
123 | double3 _cl_overloadable NAME(double3 , __private double3 *); \ |
124 | double4 _cl_overloadable NAME(double4 , __private double4 *); \ |
125 | double8 _cl_overloadable NAME(double8 , __private double8 *); \ |
126 | - double16 _cl_overloadable NAME(double16, __private double16*);) \ |
127 | - */ |
128 | + double16 _cl_overloadable NAME(double16, __private double16*);) |
129 | #define _CL_DECLARE_FUNC_V_SV(NAME) \ |
130 | float2 _cl_overloadable NAME(float , float2 ); \ |
131 | float3 _cl_overloadable NAME(float , float3 ); \ |
132 | @@ -904,7 +902,7 @@ |
133 | _CL_DECLARE_FUNC_V_V(round) |
134 | _CL_DECLARE_FUNC_V_V(rsqrt) |
135 | _CL_DECLARE_FUNC_V_V(sin) |
136 | -// sincos |
137 | +_CL_DECLARE_FUNC_V_VPV(sincos) |
138 | _CL_DECLARE_FUNC_V_V(sinh) |
139 | _CL_DECLARE_FUNC_V_V(sinpi) |
140 | _CL_DECLARE_FUNC_V_V(sqrt) |
141 | @@ -1490,7 +1488,6 @@ |
142 | _CL_DECLARE_VLOAD(double, __constant) |
143 | #endif |
144 | |
145 | -/* __private is not supported yet \ |
146 | _CL_DECLARE_VLOAD(char , __private) |
147 | _CL_DECLARE_VLOAD(uchar , __private) |
148 | _CL_DECLARE_VLOAD(short , __private) |
149 | @@ -1505,7 +1502,6 @@ |
150 | #ifdef cl_khr_fp64 |
151 | _CL_DECLARE_VLOAD(double, __private) |
152 | #endif |
153 | -*/ |
154 | |
155 | #define _CL_DECLARE_VSTORE(TYPE, MOD) \ |
156 | void _cl_overloadable vstore2 (TYPE##2 data, size_t offset, MOD TYPE *p); \ |
157 | @@ -1544,7 +1540,6 @@ |
158 | _CL_DECLARE_VSTORE(double, __local) |
159 | #endif |
160 | |
161 | -/* __private is not supported yet |
162 | _CL_DECLARE_VSTORE(char , __private) |
163 | _CL_DECLARE_VSTORE(uchar , __private) |
164 | _CL_DECLARE_VSTORE(short , __private) |
165 | @@ -1559,7 +1554,6 @@ |
166 | #ifdef cl_khr_fp64 |
167 | _CL_DECLARE_VSTORE(double, __private) |
168 | #endif |
169 | -*/ |
170 | |
171 | #ifdef cl_khr_fp16 |
172 | |
173 | @@ -1579,7 +1573,7 @@ |
174 | _CL_DECLARE_VLOAD_HALF(__global) |
175 | _CL_DECLARE_VLOAD_HALF(__local) |
176 | _CL_DECLARE_VLOAD_HALF(__constant) |
177 | -/* _CL_DECLARE_VLOAD_HALF(__private) */ |
178 | +_CL_DECLARE_VLOAD_HALF(__private) |
179 | |
180 | /* stores to half may have a suffix: _rte _rtz _rtp _rtn */ |
181 | #define _CL_DECLARE_VSTORE_HALF(MOD, SUFFIX) \ |
182 | @@ -1605,11 +1599,11 @@ |
183 | _CL_DECLARE_VSTORE_HALF(__local , _rtz) |
184 | _CL_DECLARE_VSTORE_HALF(__local , _rtp) |
185 | _CL_DECLARE_VSTORE_HALF(__local , _rtn) |
186 | -/* _CL_DECLARE_VSTORE_HALF(__private , ) */ |
187 | -/* _CL_DECLARE_VSTORE_HALF(__private , _rte) */ |
188 | -/* _CL_DECLARE_VSTORE_HALF(__private , _rtz) */ |
189 | -/* _CL_DECLARE_VSTORE_HALF(__private , _rtp) */ |
190 | -/* _CL_DECLARE_VSTORE_HALF(__private , _rtn) */ |
191 | +_CL_DECLARE_VSTORE_HALF(__private , ) |
192 | +_CL_DECLARE_VSTORE_HALF(__private , _rte) |
193 | +_CL_DECLARE_VSTORE_HALF(__private , _rtz) |
194 | +_CL_DECLARE_VSTORE_HALF(__private , _rtp) |
195 | +_CL_DECLARE_VSTORE_HALF(__private , _rtn) |
196 | |
197 | #endif |
198 | |
199 | |
200 | === modified file 'lib/kernel/add_sat.cl' |
201 | --- lib/kernel/add_sat.cl 2011-12-20 18:54:48 +0000 |
202 | +++ lib/kernel/add_sat.cl 2012-03-06 23:20:24 +0000 |
203 | @@ -49,6 +49,6 @@ |
204 | }) : |
205 | /* unsigned */ |
206 | ({ |
207 | - gtype max = (sgtype)-1; |
208 | + gtype max = ~(gtype)0; |
209 | a > max-b ? max : a+b; |
210 | })) |
211 | |
212 | === modified file 'lib/kernel/as_type.cl' |
213 | --- lib/kernel/as_type.cl 2011-12-16 02:11:57 +0000 |
214 | +++ lib/kernel/as_type.cl 2012-03-06 23:20:24 +0000 |
215 | @@ -21,11 +21,16 @@ |
216 | THE SOFTWARE. |
217 | */ |
218 | |
219 | -#define DEFINE_AS_TYPE(SRC, DST) \ |
220 | - DST _cl_overloadable \ |
221 | - as_##DST(SRC a) \ |
222 | - { \ |
223 | - return *(DST*)&a; \ |
224 | +#define DEFINE_AS_TYPE(SRC, DST) \ |
225 | + DST _cl_overloadable \ |
226 | + as_##DST(SRC a) \ |
227 | + { \ |
228 | + /* This may not be safe: */ \ |
229 | + /* return *(DST*)&a; */ \ |
230 | + /* This should be safe, but is not officially supported in OpenCL: */ \ |
231 | + union { SRC src; DST dst; } cvt; \ |
232 | + cvt.src = a; \ |
233 | + return cvt.dst; \ |
234 | } |
235 | |
236 | |
237 | |
238 | === added file 'lib/kernel/sincos.cl' |
239 | --- lib/kernel/sincos.cl 1970-01-01 00:00:00 +0000 |
240 | +++ lib/kernel/sincos.cl 2012-03-06 23:20:24 +0000 |
241 | @@ -0,0 +1,26 @@ |
242 | +/* OpenCL built-in library: sincos() |
243 | + |
244 | + Copyright (c) 2011 Universidad Rey Juan Carlos |
245 | + |
246 | + Permission is hereby granted, free of charge, to any person obtaining a copy |
247 | + of this software and associated documentation files (the "Software"), to deal |
248 | + in the Software without restriction, including without limitation the rights |
249 | + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
250 | + copies of the Software, and to permit persons to whom the Software is |
251 | + furnished to do so, subject to the following conditions: |
252 | + |
253 | + The above copyright notice and this permission notice shall be included in |
254 | + all copies or substantial portions of the Software. |
255 | + |
256 | + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
257 | + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
258 | + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
259 | + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
260 | + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
261 | + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
262 | + THE SOFTWARE. |
263 | +*/ |
264 | + |
265 | +#include "templates.h" |
266 | + |
267 | +DEFINE_EXPR_V_VPV(sincos, ({ *b=cos(a); sin(a); })) |
268 | |
269 | === modified file 'lib/kernel/sources.mk' |
270 | --- lib/kernel/sources.mk 2012-01-20 16:36:41 +0000 |
271 | +++ lib/kernel/sources.mk 2012-03-06 23:20:24 +0000 |
272 | @@ -64,6 +64,7 @@ |
273 | round.cl \ |
274 | rsqrt.cl \ |
275 | sin.cl \ |
276 | + sincos.cl \ |
277 | sinh.cl \ |
278 | sinpi.cl \ |
279 | sqrt.cl \ |
280 | |
281 | === modified file 'lib/kernel/templates.h' |
282 | --- lib/kernel/templates.h 2012-01-20 16:36:41 +0000 |
283 | +++ lib/kernel/templates.h 2012-03-06 23:20:24 +0000 |
284 | @@ -565,15 +565,13 @@ |
285 | typedef STYPE stype; \ |
286 | return EXPR; \ |
287 | } \ |
288 | - /* __private is not supported yet \ |
289 | VTYPE __attribute__ ((overloadable)) \ |
290 | NAME(VTYPE a, __private VTYPE *b) \ |
291 | { \ |
292 | typedef VTYPE vtype; \ |
293 | typedef STYPE stype; \ |
294 | return EXPR; \ |
295 | - } \ |
296 | - */ |
297 | + } |
298 | #define DEFINE_EXPR_V_VPV(NAME, EXPR) \ |
299 | IMPLEMENT_EXPR_V_VPV(NAME, EXPR, float , float ) \ |
300 | IMPLEMENT_EXPR_V_VPV(NAME, EXPR, float2 , float ) \ |
301 | |
302 | === modified file 'tests/testsuite.at' |
303 | --- tests/testsuite.at 2012-02-09 18:54:22 +0000 |
304 | +++ tests/testsuite.at 2012-03-06 23:20:24 +0000 |
305 | @@ -167,7 +167,7 @@ |
306 | |
307 | AT_BANNER([Kernel runtime library]) |
308 | |
309 | -AT_SETUP([Kernel function bitselect]) |
310 | +AT_SETUP([Kernel functions bitselect clz popcount]) |
311 | AT_DATA([expout], |
312 | [Running test test_bitselect... |
313 | OK |
314 | @@ -183,7 +183,7 @@ |
315 | AT_CHECK([$abs_top_builddir/examples/kernel/kernel test_fabs], 0, expout) |
316 | AT_CLEANUP |
317 | |
318 | -AT_SETUP([Kernel functions abs abs_diff add_sat hadd rhadd]) |
319 | +AT_SETUP([Kernel functions abs abs_diff add_sat hadd mad_hi mad_sat mul_hi rhadd sub_sat]) |
320 | AT_DATA([expout], |
321 | [Running test test_hadd... |
322 | OK |
15:28 < visit0r> eschnett: openclbench of ViennaCL fails with your modifications codepad. org/apFci5iG 1.2.0-src/ build/examples/ benchmarks$ ./openclbench )
15:28 < visit0r> http://
15:29 < visit0r> ( ViennaCL-