Merge lp:~schnetter/pocl/main into lp:~pocl/pocl/trunk
- main
- Merge into trunk
Proposed by
Erik Schnetter
Status: | Merged |
---|---|
Merged at revision: | 66 |
Proposed branch: | lp:~schnetter/pocl/main |
Merge into: | lp:~pocl/pocl/trunk |
Diff against target: |
503 lines (+177/-66) 20 files modified
include/_kernel.h (+26/-4) lib/kernel/acospi.cl (+4/-0) lib/kernel/asinpi.cl (+4/-0) lib/kernel/atan2pi.cl (+4/-0) lib/kernel/atanpi.cl (+4/-0) lib/kernel/cospi.cl (+4/-1) lib/kernel/degrees.cl (+5/-1) lib/kernel/exp10.cl (+4/-0) lib/kernel/fdim.cl (+1/-1) lib/kernel/fract.cl (+5/-0) lib/kernel/radians.cl (+5/-1) lib/kernel/rsqrt.cl (+1/-1) lib/kernel/signbit.cl (+1/-1) lib/kernel/sinpi.cl (+4/-0) lib/kernel/smoothstep.cl (+2/-2) lib/kernel/step.cl (+2/-2) lib/kernel/tanpi.cl (+4/-0) lib/kernel/tce/Makefile.am (+0/-52) lib/kernel/templates.h (+42/-0) lib/kernel/x86_64/signbit.cl (+55/-0) |
To merge this branch: | bzr merge lp:~schnetter/pocl/main |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Carlos Sánchez de La Lama | Approve | ||
Review via email:
|
Commit message
Description of the change
To post a comment you must log in.
lp:~schnetter/pocl/main
updated
- 77. By Erik Schnetter
-
Add missing lib/kernel/tce that got lost in the merge
Revision history for this message
![](/+icing/build/overlay/assets/skins/sam/images/close.gif)
Carlos Sánchez de La Lama (csanchezdll) : | # |
review:
Approve
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'include/_kernel.h' |
2 | --- include/_kernel.h 2011-11-03 18:58:00 +0000 |
3 | +++ include/_kernel.h 2011-11-05 00:27:24 +0000 |
4 | @@ -48,9 +48,6 @@ |
5 | #define cl_khr_fp64 |
6 | #define cl_khr_int64 |
7 | |
8 | -/* Shouldn't the compiler define this? */ |
9 | -//#define __SSE4_1__ |
10 | - |
11 | #endif |
12 | |
13 | /* Enable double precision. This should really only be done when |
14 | @@ -928,6 +925,7 @@ |
15 | #define M_SQRT2_F 1.41421356237309504880168872421f |
16 | #define M_SQRT1_2_F 0.707106781186547524400844362105f |
17 | |
18 | +#ifdef cl_khr_fp64 |
19 | #define HUGE_VAL __builtin_huge_val() |
20 | |
21 | #define DBL_DIG 15 |
22 | @@ -953,6 +951,7 @@ |
23 | #define M_2_SQRTPI 1.12837916709551257389615890312 |
24 | #define M_SQRT2 1.41421356237309504880168872421 |
25 | #define M_SQRT1_2 0.707106781186547524400844362105 |
26 | +#endif |
27 | |
28 | |
29 | |
30 | /* Math Functions */ |
31 | @@ -1344,6 +1343,29 @@ |
32 | int16 _cl_overloadable NAME(float16 ); |
33 | #endif |
34 | #ifdef cl_khr_fp64 |
35 | +#define _CL_DECLARE_FUNC_K_V(NAME) \ |
36 | + int _cl_overloadable NAME(float ); \ |
37 | + int2 _cl_overloadable NAME(float2 ); \ |
38 | + int3 _cl_overloadable NAME(float3 ); \ |
39 | + int4 _cl_overloadable NAME(float4 ); \ |
40 | + int8 _cl_overloadable NAME(float8 ); \ |
41 | + int16 _cl_overloadable NAME(float16 ); \ |
42 | + int _cl_overloadable NAME(double ); \ |
43 | + long2 _cl_overloadable NAME(double2 ); \ |
44 | + long3 _cl_overloadable NAME(double3 ); \ |
45 | + long4 _cl_overloadable NAME(double4 ); \ |
46 | + long8 _cl_overloadable NAME(double8 ); \ |
47 | + long16 _cl_overloadable NAME(double16); |
48 | +#else |
49 | +#define _CL_DECLARE_FUNC_K_V(NAME) \ |
50 | + int _cl_overloadable NAME(float ); \ |
51 | + int2 _cl_overloadable NAME(float2 ); \ |
52 | + int3 _cl_overloadable NAME(float3 ); \ |
53 | + int4 _cl_overloadable NAME(float4 ); \ |
54 | + int8 _cl_overloadable NAME(float8 ); \ |
55 | + int16 _cl_overloadable NAME(float16 ); |
56 | +#endif |
57 | +#ifdef cl_khr_fp64 |
58 | #define _CL_DECLARE_FUNC_S_V(NAME) \ |
59 | float _cl_overloadable NAME(float ); \ |
60 | float _cl_overloadable NAME(float2 ); \ |
61 | @@ -2242,7 +2264,7 @@ |
62 | _CL_DECLARE_FUNC_J_VV(isnormal) |
63 | _CL_DECLARE_FUNC_J_VV(isordered) |
64 | _CL_DECLARE_FUNC_J_VV(isunordered) |
65 | -_CL_DECLARE_FUNC_J_V(signbit) |
66 | +_CL_DECLARE_FUNC_K_V(signbit) |
67 | _CL_DECLARE_FUNC_I_IG(any) |
68 | _CL_DECLARE_FUNC_I_IG(all) |
69 | _CL_DECLARE_FUNC_G_GGG(bitselect) |
70 | |
71 | === modified file 'lib/kernel/acospi.cl' |
72 | --- lib/kernel/acospi.cl 2011-10-26 03:01:29 +0000 |
73 | +++ lib/kernel/acospi.cl 2011-11-05 00:27:24 +0000 |
74 | @@ -23,4 +23,8 @@ |
75 | |
76 | #include "templates.h" |
77 | |
78 | +#ifdef cl_khr_fp64 |
79 | DEFINE_EXPR_V_V(acospi, acos(a)/(stype)M_PI) |
80 | +#else |
81 | +DEFINE_EXPR_V_V(acospi, acos(a)/M_PI_F) |
82 | +#endif |
83 | |
84 | === modified file 'lib/kernel/asinpi.cl' |
85 | --- lib/kernel/asinpi.cl 2011-10-26 03:01:29 +0000 |
86 | +++ lib/kernel/asinpi.cl 2011-11-05 00:27:24 +0000 |
87 | @@ -23,4 +23,8 @@ |
88 | |
89 | #include "templates.h" |
90 | |
91 | +#ifdef cl_khr_fp64 |
92 | DEFINE_EXPR_V_V(asinpi, asin(a)/(stype)M_PI) |
93 | +#else |
94 | +DEFINE_EXPR_V_V(asinpi, asin(a)/M_PI_F) |
95 | +#endif |
96 | |
97 | === modified file 'lib/kernel/atan2pi.cl' |
98 | --- lib/kernel/atan2pi.cl 2011-10-26 03:01:29 +0000 |
99 | +++ lib/kernel/atan2pi.cl 2011-11-05 00:27:24 +0000 |
100 | @@ -23,4 +23,8 @@ |
101 | |
102 | #include "templates.h" |
103 | |
104 | +#ifdef cl_khr_fp64 |
105 | DEFINE_EXPR_V_VV(atan2pi, atan2(a, b)/(stype)M_PI) |
106 | +#else |
107 | +DEFINE_EXPR_V_VV(atan2pi, atan2(a, b)/M_PI_F) |
108 | +#endif |
109 | |
110 | === modified file 'lib/kernel/atanpi.cl' |
111 | --- lib/kernel/atanpi.cl 2011-10-26 03:01:29 +0000 |
112 | +++ lib/kernel/atanpi.cl 2011-11-05 00:27:24 +0000 |
113 | @@ -23,4 +23,8 @@ |
114 | |
115 | #include "templates.h" |
116 | |
117 | +#ifdef cl_khr_fp64 |
118 | DEFINE_EXPR_V_V(atanpi, atan(a)/(stype)M_PI) |
119 | +#else |
120 | +DEFINE_EXPR_V_V(atanpi, atan(a)/M_PI_F) |
121 | +#endif |
122 | |
123 | === modified file 'lib/kernel/cospi.cl' |
124 | --- lib/kernel/cospi.cl 2011-10-26 03:01:29 +0000 |
125 | +++ lib/kernel/cospi.cl 2011-11-05 00:27:24 +0000 |
126 | @@ -23,5 +23,8 @@ |
127 | |
128 | #include "templates.h" |
129 | |
130 | -#undef cospi |
131 | +#ifdef cl_khr_fp64 |
132 | DEFINE_EXPR_V_V(cospi, cos((stype)M_PI*a)) |
133 | +#else |
134 | +DEFINE_EXPR_V_V(cospi, cos(M_PI_F*a)) |
135 | +#endif |
136 | |
137 | === modified file 'lib/kernel/degrees.cl' |
138 | --- lib/kernel/degrees.cl 2011-10-26 21:01:40 +0000 |
139 | +++ lib/kernel/degrees.cl 2011-11-05 00:27:24 +0000 |
140 | @@ -23,4 +23,8 @@ |
141 | |
142 | #include "templates.h" |
143 | |
144 | -DEFINE_EXPR_V_V(degrees, (stype)(180.0 / M_PI) * a) |
145 | +#ifdef cl_khr_fp64 |
146 | +DEFINE_EXPR_V_V(degrees, (stype)(180 / M_PI) * a) |
147 | +#else |
148 | +DEFINE_EXPR_V_V(degrees, (180 / M_PI_F) * a) |
149 | +#endif |
150 | |
151 | === modified file 'lib/kernel/exp10.cl' |
152 | --- lib/kernel/exp10.cl 2011-10-26 03:01:29 +0000 |
153 | +++ lib/kernel/exp10.cl 2011-11-05 00:27:24 +0000 |
154 | @@ -23,4 +23,8 @@ |
155 | |
156 | #include "templates.h" |
157 | |
158 | +#ifdef cl_khr_fp64 |
159 | DEFINE_EXPR_V_V(exp10, exp((stype)M_LN10*a)) |
160 | +#else |
161 | +DEFINE_EXPR_V_V(exp10, exp(M_LN10_F*a)) |
162 | +#endif |
163 | |
164 | === modified file 'lib/kernel/fdim.cl' |
165 | --- lib/kernel/fdim.cl 2011-10-26 03:01:29 +0000 |
166 | +++ lib/kernel/fdim.cl 2011-11-05 00:27:24 +0000 |
167 | @@ -23,4 +23,4 @@ |
168 | |
169 | #include "templates.h" |
170 | |
171 | -DEFINE_EXPR_V_VV(fdim, fmax(a-b, (stype)0.0)) |
172 | +DEFINE_EXPR_V_VV(fdim, fmax(a-b, (stype)0)) |
173 | |
174 | === modified file 'lib/kernel/fract.cl' |
175 | --- lib/kernel/fract.cl 2011-10-26 03:01:29 +0000 |
176 | +++ lib/kernel/fract.cl 2011-11-05 00:27:24 +0000 |
177 | @@ -23,4 +23,9 @@ |
178 | |
179 | #include "templates.h" |
180 | |
181 | + |
182 | +#ifdef cl_khr_fp64 |
183 | DEFINE_EXPR_V_VPV(fract, fmin(a - floor(a), (vtype)(stype)(sizeof(stype)==4 ? 0x1.fffffep-1f : 0x1.fffffffffffffp-1))) |
184 | +#else |
185 | +DEFINE_EXPR_V_VPV(fract, fmin(a - floor(a), (vtype)(stype)0x1.fffffep-1f)) |
186 | +#endif |
187 | |
188 | === modified file 'lib/kernel/radians.cl' |
189 | --- lib/kernel/radians.cl 2011-10-26 21:01:40 +0000 |
190 | +++ lib/kernel/radians.cl 2011-11-05 00:27:24 +0000 |
191 | @@ -23,4 +23,8 @@ |
192 | |
193 | #include "templates.h" |
194 | |
195 | -DEFINE_EXPR_V_V(radians, (stype)(M_PI / 180.0) * a) |
196 | +#ifdef cl_khr_fp64 |
197 | +DEFINE_EXPR_V_V(radians, (stype)(M_PI / 180) * a) |
198 | +#else |
199 | +DEFINE_EXPR_V_V(radians, (M_PI_F / 180) * a) |
200 | +#endif |
201 | |
202 | === modified file 'lib/kernel/rsqrt.cl' |
203 | --- lib/kernel/rsqrt.cl 2011-10-26 03:01:29 +0000 |
204 | +++ lib/kernel/rsqrt.cl 2011-11-05 00:27:24 +0000 |
205 | @@ -23,4 +23,4 @@ |
206 | |
207 | #include "templates.h" |
208 | |
209 | -DEFINE_EXPR_V_V(rsqrt, (stype)1.0/sqrt(a)) |
210 | +DEFINE_EXPR_V_V(rsqrt, (stype)1/sqrt(a)) |
211 | |
212 | === modified file 'lib/kernel/signbit.cl' |
213 | --- lib/kernel/signbit.cl 2011-10-27 01:35:56 +0000 |
214 | +++ lib/kernel/signbit.cl 2011-11-05 00:27:24 +0000 |
215 | @@ -23,4 +23,4 @@ |
216 | |
217 | #include "templates.h" |
218 | |
219 | -DEFINE_BUILTIN_J_V(signbit) |
220 | +DEFINE_BUILTIN_K_V(signbit) |
221 | |
222 | === modified file 'lib/kernel/sinpi.cl' |
223 | --- lib/kernel/sinpi.cl 2011-10-26 03:01:29 +0000 |
224 | +++ lib/kernel/sinpi.cl 2011-11-05 00:27:24 +0000 |
225 | @@ -23,4 +23,8 @@ |
226 | |
227 | #include "templates.h" |
228 | |
229 | +#ifdef cl_khr_fp64 |
230 | DEFINE_EXPR_V_V(sinpi, sin((stype)M_PI*a)) |
231 | +#else |
232 | +DEFINE_EXPR_V_V(sinpi, sin(M_PI_F*a)) |
233 | +#endif |
234 | |
235 | === modified file 'lib/kernel/smoothstep.cl' |
236 | --- lib/kernel/smoothstep.cl 2011-10-27 00:18:42 +0000 |
237 | +++ lib/kernel/smoothstep.cl 2011-11-05 00:27:24 +0000 |
238 | @@ -25,7 +25,7 @@ |
239 | |
240 | DEFINE_EXPR_V_VVV(smoothstep, |
241 | ({ |
242 | - vtype t = clamp((c - a) / (b - a), (stype)0.0, (stype)1.0); |
243 | - t * t * mad((vtype)-2.0, t, (vtype)3.0); |
244 | + vtype t = clamp((c - a) / (b - a), (stype)0, (stype)1); |
245 | + t * t * mad((vtype)-2, t, (vtype)3); |
246 | })) |
247 | DEFINE_EXPR_V_SSV(smoothstep, smoothstep((vtype)a, (vtype)b, c)) |
248 | |
249 | === modified file 'lib/kernel/step.cl' |
250 | --- lib/kernel/step.cl 2011-10-27 00:18:42 +0000 |
251 | +++ lib/kernel/step.cl 2011-11-05 00:27:24 +0000 |
252 | @@ -27,8 +27,8 @@ |
253 | // DEFINE_EXPR_V_VV(step, b < a ? (vtype)0.0 : (vtype)1.0) |
254 | DEFINE_EXPR_V_VV(step, |
255 | ({ |
256 | - vtype zero = 0.0; |
257 | - vtype one = 1.0; |
258 | + vtype zero = 0; |
259 | + vtype one = 1; |
260 | jtype result = b < a ? *(jtype*)&zero : *(jtype*)&one; |
261 | *(vtype*)&result; |
262 | })) |
263 | |
264 | === modified file 'lib/kernel/tanpi.cl' |
265 | --- lib/kernel/tanpi.cl 2011-10-26 03:01:29 +0000 |
266 | +++ lib/kernel/tanpi.cl 2011-11-05 00:27:24 +0000 |
267 | @@ -23,4 +23,8 @@ |
268 | |
269 | #include "templates.h" |
270 | |
271 | +#ifdef cl_khr_fp64 |
272 | DEFINE_EXPR_V_V(tanpi, tan((stype)M_PI*a)) |
273 | +#else |
274 | +DEFINE_EXPR_V_V(tanpi, tan(M_PI_F*a)) |
275 | +#endif |
276 | |
277 | === added directory 'lib/kernel/tce' |
278 | === removed directory 'lib/kernel/tce' |
279 | === added file 'lib/kernel/tce/Makefile.am' |
280 | --- lib/kernel/tce/Makefile.am 1970-01-01 00:00:00 +0000 |
281 | +++ lib/kernel/tce/Makefile.am 2011-11-05 00:27:24 +0000 |
282 | @@ -0,0 +1,52 @@ |
283 | +# Process this file with automake to produce Makefile.in (in this, |
284 | +# and all subdirectories). |
285 | +# Makefile.am for pocl/lib/kernel/tce. |
286 | +# |
287 | +# The TCE (http://tce.cs.tut.fi) target. |
288 | +# |
289 | +# Copyright (c) 2011 Universidad Rey Juan Carlos |
290 | +# |
291 | +# Permission is hereby granted, free of charge, to any person obtaining a copy |
292 | +# of this software and associated documentation files (the "Software"), to deal |
293 | +# in the Software without restriction, including without limitation the rights |
294 | +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
295 | +# copies of the Software, and to permit persons to whom the Software is |
296 | +# furnished to do so, subject to the following conditions: |
297 | +# |
298 | +# The above copyright notice and this permission notice shall be included in |
299 | +# all copies or substantial portions of the Software. |
300 | +# |
301 | +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
302 | +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
303 | +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
304 | +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
305 | +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
306 | +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
307 | +# THE SOFTWARE. |
308 | + |
309 | +targetpkglibdir = $(pkglibdir)/tce |
310 | +targetpkglib_LIBRARIES = libkernel.a |
311 | + |
312 | +vpath %.cl @srcdir@/.. |
313 | +vpath %.c @srcdir@/.. |
314 | +vpath %.ll @srcdir@/.. |
315 | + |
316 | +include ../sources.mk |
317 | + |
318 | +libkernel_a_LIBADD = barrier.o |
319 | +EXTRA_DIST = barrier.ll |
320 | + |
321 | +RANLIB = `@LLVM_CONFIG@ --bindir`/llvm-ranlib |
322 | +AR = `@LLVM_CONFIG@ --bindir`/llvm-ar |
323 | + |
324 | +.cl.o: |
325 | + $(CLANG) $(AM_CPPFLAGS) $(CLANGFLAGS) -ccc-host-triple tce-tut-llvm -c -emit-llvm -include $(top_srcdir)/include/_kernel.h -o $@ $< |
326 | + |
327 | +.c.o: |
328 | + $(CLANG) $(AM_CPPFLAGS) $(CLANGFLAGS) -ccc-host-triple tce-tut-llvm -c -emit-llvm -include $(top_srcdir)/include/_kernel.h -o $@ $< |
329 | + |
330 | +barrier.o: barrier.ll |
331 | + $(LLVM_AS) -o $@ $< |
332 | + |
333 | +$(libkernel_a_SOURCES:.c=.o): $(top_srcdir)/include/_kernel.h ../templates.h |
334 | +$(libkernel_a_SOURCES:.cl=.o): $(top_srcdir)/include/_kernel.h ../templates.h |
335 | |
336 | === removed file 'lib/kernel/tce/Makefile.am' |
337 | --- lib/kernel/tce/Makefile.am 2011-11-03 18:58:00 +0000 |
338 | +++ lib/kernel/tce/Makefile.am 1970-01-01 00:00:00 +0000 |
339 | @@ -1,52 +0,0 @@ |
340 | -# Process this file with automake to produce Makefile.in (in this, |
341 | -# and all subdirectories). |
342 | -# Makefile.am for pocl/lib/kernel/tce. |
343 | -# |
344 | -# The TCE (http://tce.cs.tut.fi) target. |
345 | -# |
346 | -# Copyright (c) 2011 Universidad Rey Juan Carlos |
347 | -# |
348 | -# Permission is hereby granted, free of charge, to any person obtaining a copy |
349 | -# of this software and associated documentation files (the "Software"), to deal |
350 | -# in the Software without restriction, including without limitation the rights |
351 | -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
352 | -# copies of the Software, and to permit persons to whom the Software is |
353 | -# furnished to do so, subject to the following conditions: |
354 | -# |
355 | -# The above copyright notice and this permission notice shall be included in |
356 | -# all copies or substantial portions of the Software. |
357 | -# |
358 | -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
359 | -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
360 | -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
361 | -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
362 | -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
363 | -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
364 | -# THE SOFTWARE. |
365 | - |
366 | -targetpkglibdir = $(pkglibdir)/tce |
367 | -targetpkglib_LIBRARIES = libkernel.a |
368 | - |
369 | -vpath %.cl @srcdir@/.. |
370 | -vpath %.c @srcdir@/.. |
371 | -vpath %.ll @srcdir@/.. |
372 | - |
373 | -include ../sources.mk |
374 | - |
375 | -libkernel_a_LIBADD = barrier.o |
376 | -EXTRA_DIST = barrier.ll |
377 | - |
378 | -RANLIB = `@LLVM_CONFIG@ --bindir`/llvm-ranlib |
379 | -AR = `@LLVM_CONFIG@ --bindir`/llvm-ar |
380 | - |
381 | -.cl.o: |
382 | - $(CLANG) $(AM_CPPFLAGS) $(CLANGFLAGS) -ccc-host-triple tce-tut-llvm -c -emit-llvm -include $(top_srcdir)/include/_kernel.h -o $@ $< |
383 | - |
384 | -.c.o: |
385 | - $(CLANG) $(AM_CPPFLAGS) $(CLANGFLAGS) -ccc-host-triple tce-tut-llvm -c -emit-llvm -include $(top_srcdir)/include/_kernel.h -o $@ $< |
386 | - |
387 | -barrier.o: barrier.ll |
388 | - $(LLVM_AS) -o $@ $< |
389 | - |
390 | -$(libkernel_a_SOURCES:.c=.o): $(top_srcdir)/include/_kernel.h ../templates.h |
391 | -$(libkernel_a_SOURCES:.cl=.o): $(top_srcdir)/include/_kernel.h ../templates.h |
392 | |
393 | === modified file 'lib/kernel/templates.h' |
394 | --- lib/kernel/templates.h 2011-11-01 16:33:17 +0000 |
395 | +++ lib/kernel/templates.h 2011-11-05 00:27:24 +0000 |
396 | @@ -359,6 +359,48 @@ |
397 | IMPLEMENT_BUILTIN_J_V(NAME, int16, float16 , lo, hi) |
398 | #endif |
399 | |
400 | +#define IMPLEMENT_BUILTIN_K_V(NAME, JTYPE, VTYPE, LO, HI) \ |
401 | + JTYPE __attribute__ ((overloadable)) \ |
402 | + NAME(VTYPE a) \ |
403 | + { \ |
404 | + return (JTYPE)(NAME(a.LO), NAME(a.HI)); \ |
405 | + } |
406 | +#ifdef cl_khr_fp64 |
407 | +#define DEFINE_BUILTIN_K_V(NAME) \ |
408 | + int __attribute__ ((overloadable)) \ |
409 | + NAME(float a) \ |
410 | + { \ |
411 | + return __builtin_##NAME##f(a); \ |
412 | + } \ |
413 | + int __attribute__ ((overloadable)) \ |
414 | + NAME(double a) \ |
415 | + { \ |
416 | + return __builtin_##NAME(a); \ |
417 | + } \ |
418 | + IMPLEMENT_BUILTIN_K_V(NAME, int2 , float2 , lo, hi) \ |
419 | + IMPLEMENT_BUILTIN_K_V(NAME, int3 , float3 , lo, s2) \ |
420 | + IMPLEMENT_BUILTIN_K_V(NAME, int4 , float4 , lo, hi) \ |
421 | + IMPLEMENT_BUILTIN_K_V(NAME, int8 , float8 , lo, hi) \ |
422 | + IMPLEMENT_BUILTIN_K_V(NAME, int16 , float16 , lo, hi) \ |
423 | + IMPLEMENT_BUILTIN_K_V(NAME, long2 , double2 , lo, hi) \ |
424 | + IMPLEMENT_BUILTIN_K_V(NAME, long3 , double3 , lo, s2) \ |
425 | + IMPLEMENT_BUILTIN_K_V(NAME, long4 , double4 , lo, hi) \ |
426 | + IMPLEMENT_BUILTIN_K_V(NAME, long8 , double8 , lo, hi) \ |
427 | + IMPLEMENT_BUILTIN_K_V(NAME, long16, double16, lo, hi) |
428 | +#else |
429 | +#define DEFINE_BUILTIN_K_V(NAME) \ |
430 | + int __attribute__ ((overloadable)) \ |
431 | + NAME(float a) \ |
432 | + { \ |
433 | + return __builtin_##NAME##f(a); \ |
434 | + } \ |
435 | + IMPLEMENT_BUILTIN_K_V(NAME, int2 , float2 , lo, hi) \ |
436 | + IMPLEMENT_BUILTIN_K_V(NAME, int3 , float3 , lo, s2) \ |
437 | + IMPLEMENT_BUILTIN_K_V(NAME, int4 , float4 , lo, hi) \ |
438 | + IMPLEMENT_BUILTIN_K_V(NAME, int8 , float8 , lo, hi) \ |
439 | + IMPLEMENT_BUILTIN_K_V(NAME, int16, float16 , lo, hi) |
440 | +#endif |
441 | + |
442 | #define IMPLEMENT_EXPR_V_V(NAME, EXPR, VTYPE, STYPE) \ |
443 | VTYPE __attribute__ ((overloadable)) \ |
444 | NAME(VTYPE a, VTYPE b) \ |
445 | |
446 | === added file 'lib/kernel/x86_64/signbit.cl' |
447 | --- lib/kernel/x86_64/signbit.cl 1970-01-01 00:00:00 +0000 |
448 | +++ lib/kernel/x86_64/signbit.cl 2011-11-05 00:27:24 +0000 |
449 | @@ -0,0 +1,55 @@ |
450 | +/* OpenCL built-in library: signbit() |
451 | + |
452 | + Copyright (c) 2011 Universidad Rey Juan Carlos |
453 | + |
454 | + Permission is hereby granted, free of charge, to any person obtaining a copy |
455 | + of this software and associated documentation files (the "Software"), to deal |
456 | + in the Software without restriction, including without limitation the rights |
457 | + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
458 | + copies of the Software, and to permit persons to whom the Software is |
459 | + furnished to do so, subject to the following conditions: |
460 | + |
461 | + The above copyright notice and this permission notice shall be included in |
462 | + all copies or substantial portions of the Software. |
463 | + |
464 | + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
465 | + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
466 | + FITNESS FOR A PARTICULAR PURPOSE AND NONORDEREDRINGEMENT. IN NO EVENT SHALL THE |
467 | + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
468 | + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
469 | + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
470 | + THE SOFTWARE. |
471 | +*/ |
472 | + |
473 | +#define IMPLEMENT_SIGNBIT_BUILTIN_FLOAT __builtin_signbitf(a) |
474 | +#define IMPLEMENT_SIGNBIT_BUILTIN_DOUBLE __builtin_signbit(a) |
475 | +#define IMPLEMENT_SIGNBIT_DIRECT \ |
476 | + ({ \ |
477 | + int bits = CHAR_BIT * sizeof(stype); \ |
478 | + *(jtype*)&a >> (jtype)(bits-1); \ |
479 | + }) |
480 | + |
481 | +#define IMPLEMENT_DIRECT(NAME, VTYPE, STYPE, JTYPE, EXPR) \ |
482 | + JTYPE _cl_overloadable NAME(VTYPE a) \ |
483 | + { \ |
484 | + typedef VTYPE vtype; \ |
485 | + typedef STYPE stype; \ |
486 | + typedef JTYPE jtype; \ |
487 | + return EXPR; \ |
488 | + } |
489 | + |
490 | + |
491 | + |
492 | +IMPLEMENT_DIRECT(signbit, float , float, int , IMPLEMENT_SIGNBIT_BUILTIN_FLOAT) |
493 | +IMPLEMENT_DIRECT(signbit, float2 , float, int2 , IMPLEMENT_SIGNBIT_DIRECT) |
494 | +IMPLEMENT_DIRECT(signbit, float3 , float, int3 , IMPLEMENT_SIGNBIT_DIRECT) |
495 | +IMPLEMENT_DIRECT(signbit, float4 , float, int4 , IMPLEMENT_SIGNBIT_DIRECT) |
496 | +IMPLEMENT_DIRECT(signbit, float8 , float, int8 , IMPLEMENT_SIGNBIT_DIRECT) |
497 | +IMPLEMENT_DIRECT(signbit, float16, float, int16, IMPLEMENT_SIGNBIT_DIRECT) |
498 | + |
499 | +IMPLEMENT_DIRECT(signbit, double , double, int , IMPLEMENT_SIGNBIT_BUILTIN_DOUBLE) |
500 | +IMPLEMENT_DIRECT(signbit, double2 , double, long2 , IMPLEMENT_SIGNBIT_DIRECT) |
501 | +IMPLEMENT_DIRECT(signbit, double3 , double, long3 , IMPLEMENT_SIGNBIT_DIRECT) |
502 | +IMPLEMENT_DIRECT(signbit, double4 , double, long4 , IMPLEMENT_SIGNBIT_DIRECT) |
503 | +IMPLEMENT_DIRECT(signbit, double8 , double, long8 , IMPLEMENT_SIGNBIT_DIRECT) |
504 | +IMPLEMENT_DIRECT(signbit, double16, double, long16, IMPLEMENT_SIGNBIT_DIRECT) |