Merge lp:~viitanet/pocl/ImageSupport into lp:~pocl/pocl/trunk
- ImageSupport
- Merge into trunk
Status: | Merged |
---|---|
Merged at revision: | 306 |
Proposed branch: | lp:~viitanet/pocl/ImageSupport |
Merge into: | lp:~pocl/pocl/trunk |
Diff against target: |
1394 lines (+900/-95) 32 files modified
TODO (+0/-3) examples/opencl-book-samples/ImageFilter2D.cl (+34/-0) examples/opencl-book-samples/Makefile.am (+1/-0) include/_kernel.h (+36/-0) include/pocl_device.h (+2/-0) lib/CL/Makefile.am (+14/-13) lib/CL/clCreateBuffer.c (+1/-0) lib/CL/clCreateImage2D.c (+108/-4) lib/CL/clCreateKernel.c (+2/-0) lib/CL/clCreateSampler.c (+30/-10) lib/CL/clEnqueueCopyBufferToImage.c (+46/-13) lib/CL/clEnqueueReadBufferRect.c (+2/-0) lib/CL/clEnqueueReadImage.c (+23/-16) lib/CL/clEnqueueWriteImage.c (+22/-16) lib/CL/clGetDeviceInfo.c (+1/-2) lib/CL/clGetSupportedImageFormats.c (+49/-10) lib/CL/clReleaseSampler.c (+3/-5) lib/CL/devices/basic/basic.c (+25/-0) lib/CL/devices/dev_image.h (+17/-0) lib/CL/devices/pthread/pthread.c (+39/-1) lib/CL/pocl_cl.h (+17/-1) lib/CL/pocl_image_util.c (+228/-0) lib/CL/pocl_image_util.h (+50/-0) lib/kernel/get_image_height.cl (+7/-0) lib/kernel/get_image_width.cl (+7/-0) lib/kernel/image.h (+15/-0) lib/kernel/read_image.cl (+30/-0) lib/kernel/sources.mk (+5/-1) lib/kernel/write_image.cl (+17/-0) lib/llvmopencl/GenerateHeader.cc (+39/-0) scripts/pocl-kernel.in (+4/-0) tests/testsuite-samples.at (+26/-0) |
To merge this branch: | bzr merge lp:~viitanet/pocl/ImageSupport |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
pocl maintaners | Pending | ||
Review via email: mp+107929@code.launchpad.net |
This proposal supersedes a proposal from 2012-05-28.
Commit message
Description of the change
Implemented parts of OpenCL Image API:
clCreateImage2D
clCreateSampler
clReleaseSampler
clEnqueueCopyBu
clEnqueueWriteImage
clEnqueueReadImage
clGetSupportedI
read_imagef
write_imagef
get_image_width
get_image_height
Supported data types: CL_UNORM_INT8, CL_FLOAT
Supported channel orders: CL_RGBA, CL_R
Sampler always defaults to CLK_NORMALIZED_
Added a 'make check' test from opencl-
Other possible tests:
Chapter 14, histogram, requires atomics
Chapter 19, oclFlow, requires OpenCV
Rodinia-leukocyte crashes on compiler bug (https:/
Rodinia-nn doesn't actually use images, but runs, TODO
Rodinia-gaussian doesn't actually use images; crashes
Rodinia-
- 306. By Pekka Jääskeläinen
-
Some image APIs from Timo.
Preview Diff
1 | === modified file 'TODO' |
2 | --- TODO 2012-05-24 21:26:09 +0000 |
3 | +++ TODO 2012-05-30 08:34:23 +0000 |
4 | @@ -49,15 +49,12 @@ |
5 | * 5.2.1 Creating buffer objects |
6 | * 5.2.4 Mapping buffer objects |
7 | * 5.3 Image objects |
8 | - * clCreateImage2D (deprecated in OpenCL 1.2) (*, R[gaussian, leukocyte, nn]) |
9 | * 5.3.3 Reading, Writing and Copying Image Objects |
10 | - * clEnqueueReadImage (*, R[nn]) |
11 | * 5.4 Querying, Umapping, Migrating, ... Mem objects |
12 | * 5.4.1 Retaining and Releasing Memory Objects |
13 | * 5.4.2 Unmapping Mapped Memory Objects |
14 | * 5.5 Sampler objects |
15 | * 5.5.1 Creating Sampler Objects |
16 | - * clCreateSampler (*) |
17 | * 5.6.1 Creating Program Objects |
18 | * 5.7.1 Creating Kernel Objects |
19 | * 5.9 Event objects |
20 | |
21 | === added file 'examples/opencl-book-samples/ImageFilter2D.cl' |
22 | --- examples/opencl-book-samples/ImageFilter2D.cl 1970-01-01 00:00:00 +0000 |
23 | +++ examples/opencl-book-samples/ImageFilter2D.cl 2012-05-30 08:34:23 +0000 |
24 | @@ -0,0 +1,34 @@ |
25 | + |
26 | + // Gaussian Kernel is: |
27 | + // 1 2 1 |
28 | + // 2 4 2 |
29 | + // 1 2 1 |
30 | +float kernelWeights[9] = { 1.0f, 2.0f, 1.0f, 2.0f, 4.0f, 2.0f,1.0f, 2.0f, 1.0f }; |
31 | + |
32 | +// Gaussian filter of image |
33 | +__kernel void gaussian_filter(__read_only image2d_t srcImg, |
34 | + __write_only image2d_t dstImg, |
35 | + sampler_t sampler, |
36 | + int width, int height) |
37 | +{ |
38 | + int2 startImageCoord = (int2) (get_global_id(0) - 1, get_global_id(1) - 1); |
39 | + int2 endImageCoord = (int2) (get_global_id(0) + 1, get_global_id(1) + 1); |
40 | + int2 outImageCoord = (int2) (get_global_id(0), get_global_id(1)); |
41 | + |
42 | + if (outImageCoord.x < width && outImageCoord.y < height) |
43 | + { |
44 | + int weight = 0; |
45 | + float4 outColor = (float4)(0.0f, 0.0f, 0.0f, 0.0f); |
46 | + for( int y = startImageCoord.y; y <= endImageCoord.y; y++) |
47 | + { |
48 | + for( int x = startImageCoord.x; x <= endImageCoord.x; x++) |
49 | + { |
50 | + outColor += (read_imagef(srcImg, sampler, (int2)(x, y)) * (kernelWeights[weight] / 16.0f)); |
51 | + weight += 1; |
52 | + } |
53 | + } |
54 | + |
55 | + // Write the output value to image |
56 | + write_imagef(dstImg, outImageCoord, outColor); |
57 | + } |
58 | +} |
59 | |
60 | === modified file 'examples/opencl-book-samples/Makefile.am' |
61 | --- examples/opencl-book-samples/Makefile.am 2012-05-21 08:51:32 +0000 |
62 | +++ examples/opencl-book-samples/Makefile.am 2012-05-30 08:34:23 +0000 |
63 | @@ -39,6 +39,7 @@ |
64 | sed -i 's/size_t localWorkSize = maxWorkGroupSize/size_t localWorkSize = 2/g' src/Chapter_16/Dijkstra/oclDijkstraKernel.cpp; \ |
65 | sed -i 's/device.j..type == CL_DEVICE_TYPE_GPU/device[j].type \& CL_DEVICE_TYPE_GPU/g' src/Chapter_22/spmv.c; \ |
66 | sed -i 's/context.CL_DEVICE_TYPE_GPU/context\(CL_DEVICE_TYPE_CPU/g' src/Chapter_12/VectorAdd/vecadd.cpp; \ |
67 | + rm src/Chapter_12/Sinewave/CMakeLists.txt; \ |
68 | mkdir -p build; cd build; \ |
69 | cmake \ |
70 | -D CMAKE_CXX_FLAGS:STRING='-g -O3 -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -lGL -lglut ${PTHREAD_CFLAGS}' \ |
71 | |
72 | === modified file 'include/_kernel.h' |
73 | --- include/_kernel.h 2012-03-06 23:17:03 +0000 |
74 | +++ include/_kernel.h 2012-05-30 08:34:23 +0000 |
75 | @@ -1772,3 +1772,39 @@ |
76 | _CL_DECLARE_ASYNC_COPY_FUNCS(float); |
77 | __IF_FP64(_CL_DECLARE_ASYNC_COPY_FUNCS(double)); |
78 | |
79 | +// Image support |
80 | + |
81 | +typedef int sampler_t; |
82 | + |
83 | +#define CLK_ADDRESS_NONE 0x00 |
84 | +#define CLK_ADDRESS_MIRRORED_REPEAT 0x01 |
85 | +#define CLK_ADDRESS_REPEAT 0x02 |
86 | +#define CLK_ADDRESS_CLAMP_TO_EDGE 0x03 |
87 | +#define CLK_ADDRESS_CLAMP 0x04 |
88 | + |
89 | +#define CLK_NORMALIZED_COORDS_FALSE 0x00 |
90 | +#define CLK_NORMALIZED_COORDS_TRUE 0x08 |
91 | + |
92 | +#define CLK_FILTER_NEAREST 0x00 |
93 | +#define CLK_FILTER_LINEAR 0x10 |
94 | + |
95 | +typedef struct image2d_t_* image2d_t; |
96 | + |
97 | +float4 _cl_overloadable read_imagef( image2d_t image, |
98 | + sampler_t sampler, |
99 | + int2 coord); |
100 | + |
101 | +float4 _cl_overloadable read_imagef( image2d_t image, |
102 | + sampler_t sampler, |
103 | + float2 coord); |
104 | + |
105 | +void _cl_overloadable write_imagef( image2d_t image, |
106 | + int2 coord, |
107 | + float4 color); |
108 | + |
109 | +void _cl_overloadable write_imagei( image2d_t image, |
110 | + int2 coord, |
111 | + int4 color); |
112 | + |
113 | +int get_image_width (image2d_t image); |
114 | +int get_image_height (image2d_t image); |
115 | |
116 | === modified file 'include/pocl_device.h' |
117 | --- include/pocl_device.h 2012-05-08 17:37:35 +0000 |
118 | +++ include/pocl_device.h 2012-05-30 08:34:23 +0000 |
119 | @@ -51,6 +51,8 @@ |
120 | unsigned short alocal_sizes[MAX_KERNEL_ARGS]; |
121 | char arg_is_local[MAX_KERNEL_ARGS]; |
122 | char arg_is_pointer[MAX_KERNEL_ARGS]; |
123 | + char arg_is_image[MAX_KERNEL_ARGS]; |
124 | + char arg_is_sampler[MAX_KERNEL_ARGS]; |
125 | pocl_workgroup work_group_func; |
126 | } __kernel_metadata; |
127 | |
128 | |
129 | === modified file 'lib/CL/Makefile.am' |
130 | --- lib/CL/Makefile.am 2012-05-28 12:03:51 +0000 |
131 | +++ lib/CL/Makefile.am 2012-05-30 08:34:23 +0000 |
132 | @@ -38,9 +38,9 @@ |
133 | clGetCommandQueueInfo.c \ |
134 | clCreateBuffer.c \ |
135 | clCreateSubBuffer.c \ |
136 | - clEnqueueReadBuffer.c \ |
137 | - clEnqueueReadBufferRect.c \ |
138 | - clEnqueueMapBuffer.c \ |
139 | + clEnqueueReadBuffer.c \ |
140 | + clEnqueueReadBufferRect.c \ |
141 | + clEnqueueMapBuffer.c \ |
142 | clEnqueueUnmapMemObject.c \ |
143 | clReleaseMemObject.c \ |
144 | clRetainMemObject.c \ |
145 | @@ -89,23 +89,24 @@ |
146 | clEnqueueTask.c \ |
147 | clCreateImage2D.c \ |
148 | clCreateImage3D.c \ |
149 | - clEnqueueReadImage.c \ |
150 | - clEnqueueWriteImage.c \ |
151 | + clEnqueueReadImage.c \ |
152 | + clEnqueueWriteImage.c \ |
153 | + clCreateSampler.c \ |
154 | + clReleaseSampler.c \ |
155 | + clRetainSampler.c \ |
156 | + clGetSamplerInfo.c \ |
157 | clEnqueueCopyImage.c \ |
158 | - clEnqueueMapImage.c \ |
159 | - clGetSupportedImageFormats.c \ |
160 | + clEnqueueMapImage.c \ \ |
161 | clGetImageInfo.c \ |
162 | clCreateFromGLTexture2D.c \ |
163 | clCreateFromGLTexture3D.c \ |
164 | clUnloadCompiler.c \ |
165 | + clGetSupportedImageFormats.c \ |
166 | clGetExtensionFunctionAddress.c \ |
167 | clIcdGetPlatformIDsKHR.c \ |
168 | - clCreateSampler.c \ |
169 | - clReleaseSampler.c \ |
170 | - clRetainSampler.c \ |
171 | - clGetSamplerInfo.c \ |
172 | - pocl_cl.h \ |
173 | - pocl_util.c pocl_util.h |
174 | + pocl_cl.h \ |
175 | + pocl_util.c pocl_util.h \ |
176 | + pocl_image_util.c pocl_image_util.h |
177 | |
178 | |
179 | |
180 | |
181 | === modified file 'lib/CL/clCreateBuffer.c' |
182 | --- lib/CL/clCreateBuffer.c 2012-05-24 20:46:33 +0000 |
183 | +++ lib/CL/clCreateBuffer.c 2012-05-30 08:34:23 +0000 |
184 | @@ -49,6 +49,7 @@ |
185 | mem->map_count = 0; |
186 | mem->mappings = NULL; |
187 | mem->flags = flags; |
188 | + mem->is_image = CL_FALSE; |
189 | POCL_INIT_ICD_OBJECT(mem); |
190 | |
191 | /* Store the per device buffer pointers always to a known |
192 | |
193 | === modified file 'lib/CL/clCreateImage2D.c' |
194 | --- lib/CL/clCreateImage2D.c 2012-01-30 12:21:12 +0000 |
195 | +++ lib/CL/clCreateImage2D.c 2012-05-30 08:34:23 +0000 |
196 | @@ -1,17 +1,121 @@ |
197 | #include "pocl_cl.h" |
198 | +#include "assert.h" |
199 | +#include "pocl_image_util.h" |
200 | + |
201 | CL_API_ENTRY cl_mem CL_API_CALL |
202 | clCreateImage2D(cl_context context, |
203 | cl_mem_flags flags, |
204 | const cl_image_format * image_format, |
205 | - size_t image_width, |
206 | - size_t image_height, |
207 | + size_t width, |
208 | + size_t height, |
209 | size_t image_row_pitch, |
210 | void * host_ptr, |
211 | cl_int * errcode_ret) |
212 | CL_API_SUFFIX__VERSION_1_0 |
213 | { |
214 | - POCL_ABORT_UNIMPLEMENTED(); |
215 | - return CL_SUCCESS; |
216 | + cl_mem mem; |
217 | + cl_device_id device_id; |
218 | + void *device_ptr; |
219 | + unsigned i, j; |
220 | + int size; |
221 | + |
222 | + if (context == NULL) |
223 | + POCL_ERROR(CL_INVALID_CONTEXT); |
224 | + |
225 | + mem = (cl_mem) malloc(sizeof(struct _cl_mem)); |
226 | + if (mem == NULL) |
227 | + POCL_ERROR(CL_OUT_OF_HOST_MEMORY); |
228 | + |
229 | + POCL_INIT_OBJECT(mem); |
230 | + mem->parent = NULL; |
231 | + mem->map_count = 0; |
232 | + mem->mappings = NULL; |
233 | + mem->flags = flags; |
234 | + mem->is_image = CL_TRUE; |
235 | + |
236 | + cl_channel_order order = image_format->image_channel_order; |
237 | + cl_channel_type type = image_format->image_channel_data_type; |
238 | + |
239 | + int dev_elem_size = sizeof(cl_float); //TODO |
240 | + int dev_channels = 4; |
241 | + |
242 | + if (image_row_pitch == 0) |
243 | + image_row_pitch = width; |
244 | + |
245 | + if (image_row_pitch != width) |
246 | + POCL_ABORT_UNIMPLEMENTED(); |
247 | + |
248 | + size = width * height * dev_elem_size * dev_channels; |
249 | + |
250 | + mem->device_ptrs = (void **) malloc(context->num_devices * sizeof(void *)); |
251 | + if (mem->device_ptrs == NULL) |
252 | + { |
253 | + free(mem); |
254 | + POCL_ERROR(CL_OUT_OF_HOST_MEMORY); |
255 | + } |
256 | + |
257 | + int host_channels; |
258 | + |
259 | + if (order == CL_RGBA) |
260 | + host_channels=4; |
261 | + else if (order == CL_R) |
262 | + host_channels=1; |
263 | + else |
264 | + POCL_ABORT_UNIMPLEMENTED(); |
265 | + |
266 | + mem->size = size; |
267 | + mem->context = context; |
268 | + |
269 | + mem->image_width = width; |
270 | + mem->image_height = height; |
271 | + mem->image_row_pitch = image_row_pitch; |
272 | + mem->image_channel_data_type = type; |
273 | + mem->image_channel_order = order; |
274 | + |
275 | + for (i = 0; i < context->num_devices; ++i) |
276 | + { |
277 | + if (i > 0) |
278 | + clRetainMemObject (mem); |
279 | + device_id = context->devices[i]; |
280 | + device_ptr = device_id->malloc(device_id->data, 0, size, NULL); |
281 | + |
282 | + if (device_ptr == NULL) |
283 | + { |
284 | + for (j = 0; j < i; ++j) |
285 | + { |
286 | + device_id = context->devices[j]; |
287 | + device_id->free(device_id->data, 0, mem->device_ptrs[j]); |
288 | + } |
289 | + free(mem); |
290 | + POCL_ERROR(CL_MEM_OBJECT_ALLOCATION_FAILURE); |
291 | + } |
292 | + mem->device_ptrs[i] = device_ptr; |
293 | + /* The device allocator allocated from a device-host shared memory. */ |
294 | + if (flags & CL_MEM_ALLOC_HOST_PTR || |
295 | + flags & CL_MEM_USE_HOST_PTR) |
296 | + POCL_ABORT_UNIMPLEMENTED(); |
297 | + |
298 | + if (flags & CL_MEM_COPY_HOST_PTR) |
299 | + { |
300 | + size_t origin[3] = { 0, 0, 0 }; |
301 | + size_t region[3] = { width, height, 1 }; |
302 | + pocl_write_image( mem, |
303 | + context->devices[i], |
304 | + origin, |
305 | + region, |
306 | + 0, |
307 | + 1, |
308 | + host_ptr ); |
309 | + } |
310 | + } |
311 | + |
312 | + POCL_RETAIN_OBJECT(context); |
313 | + |
314 | + |
315 | + if (errcode_ret != NULL) |
316 | + *errcode_ret = CL_SUCCESS; |
317 | + |
318 | + return mem; |
319 | } |
320 | |
321 | |
322 | |
323 | === modified file 'lib/CL/clCreateKernel.c' |
324 | --- lib/CL/clCreateKernel.c 2012-05-23 08:38:12 +0000 |
325 | +++ lib/CL/clCreateKernel.c 2012-05-30 08:34:23 +0000 |
326 | @@ -143,6 +143,8 @@ |
327 | kernel->dlhandle = dlhandle; /* TODO: why is this stored? */ |
328 | kernel->arg_is_pointer = lt_dlsym(dlhandle, "_arg_is_pointer"); |
329 | kernel->arg_is_local = lt_dlsym(dlhandle, "_arg_is_local"); |
330 | + kernel->arg_is_image = lt_dlsym(dlhandle, "_arg_is_image"); |
331 | + kernel->arg_is_sampler = lt_dlsym(dlhandle, "_arg_is_sampler"); |
332 | kernel->num_locals = *(cl_uint *) lt_dlsym(dlhandle, "_num_locals"); |
333 | kernel->arguments = |
334 | (struct pocl_argument *) malloc ((kernel->num_args + kernel->num_locals) * |
335 | |
336 | === modified file 'lib/CL/clCreateSampler.c' |
337 | --- lib/CL/clCreateSampler.c 2012-05-22 21:26:29 +0000 |
338 | +++ lib/CL/clCreateSampler.c 2012-05-30 08:34:23 +0000 |
339 | @@ -1,13 +1,33 @@ |
340 | #include "pocl_cl.h" |
341 | - |
342 | -CL_API_ENTRY cl_sampler CL_API_CALL |
343 | -clCreateSampler(cl_context context , |
344 | - cl_bool normalized_coords , |
345 | - cl_addressing_mode addressing_mode , |
346 | - cl_filter_mode filter_mode , |
347 | - cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 |
348 | +extern CL_API_ENTRY cl_sampler CL_API_CALL |
349 | +clCreateSampler(cl_context context, |
350 | + cl_bool normalized_coords, |
351 | + cl_addressing_mode addressing_mode, |
352 | + cl_filter_mode filter_mode, |
353 | + cl_int * errcode_ret) |
354 | +CL_API_SUFFIX__VERSION_1_0 |
355 | { |
356 | - POCL_ABORT_UNIMPLEMENTED(); |
357 | - return CL_SUCCESS; |
358 | + cl_sampler sampler; |
359 | + |
360 | + if (context == NULL) |
361 | + POCL_ERROR(CL_INVALID_CONTEXT); |
362 | + |
363 | + sampler = (cl_sampler) malloc(sizeof(struct _cl_sampler)); |
364 | + if (sampler == NULL) |
365 | + POCL_ERROR(CL_OUT_OF_HOST_MEMORY); |
366 | + |
367 | + if (normalized_coords == CL_TRUE) |
368 | + POCL_ABORT_UNIMPLEMENTED(); |
369 | + |
370 | + if (addressing_mode != CL_ADDRESS_CLAMP_TO_EDGE) |
371 | + POCL_ABORT_UNIMPLEMENTED(); |
372 | + |
373 | + if (filter_mode != CL_FILTER_NEAREST) |
374 | + POCL_ABORT_UNIMPLEMENTED(); |
375 | + |
376 | + sampler->normalized_coords = normalized_coords; |
377 | + sampler->addressing_mode = addressing_mode; |
378 | + sampler->filter_mode = filter_mode; |
379 | + |
380 | + return sampler; |
381 | } |
382 | - |
383 | |
384 | === modified file 'lib/CL/clEnqueueCopyBufferToImage.c' |
385 | --- lib/CL/clEnqueueCopyBufferToImage.c 2012-05-22 21:26:29 +0000 |
386 | +++ lib/CL/clEnqueueCopyBufferToImage.c 2012-05-30 08:34:23 +0000 |
387 | @@ -1,17 +1,50 @@ |
388 | #include "pocl_cl.h" |
389 | |
390 | -CL_API_ENTRY cl_int CL_API_CALL |
391 | -clEnqueueCopyBufferToImage(cl_command_queue command_queue , |
392 | - cl_mem src_buffer , |
393 | - cl_mem dst_image , |
394 | - size_t src_offset , |
395 | - const size_t * dst_origin , |
396 | - const size_t * region , |
397 | - cl_uint num_events_in_wait_list , |
398 | - const cl_event * event_wait_list , |
399 | +extern CL_API_ENTRY cl_int CL_API_CALL |
400 | +clEnqueueCopyBufferToImage(cl_command_queue command_queue, |
401 | + cl_mem buffer, |
402 | + cl_mem image, |
403 | + size_t src_offset, |
404 | + const size_t * dst_origin, /*[3]*/ |
405 | + const size_t * region, /*[3]*/ |
406 | + cl_uint num_events_in_wait_list, |
407 | + const cl_event * event_wait_list, |
408 | cl_event * event ) CL_API_SUFFIX__VERSION_1_0 |
409 | -{ |
410 | - POCL_ABORT_UNIMPLEMENTED(); |
411 | - return CL_SUCCESS; |
412 | -} |
413 | + { |
414 | + if (region == NULL) |
415 | + return CL_INVALID_VALUE; |
416 | + |
417 | + if (region[2] != 1) //3D image |
418 | + POCL_ABORT_UNIMPLEMENTED(); |
419 | + |
420 | + int dev_elem_size = sizeof(cl_float); |
421 | + int dev_channels = 4; |
422 | + |
423 | + int host_elem_size; |
424 | + int host_channels; |
425 | + pocl_get_image_information (image, &host_channels, &host_elem_size); |
426 | + |
427 | + void* temp = malloc (image->size); |
428 | + |
429 | + cl_device_id device_id = command_queue->device; |
430 | |
431 | + device_id->read |
432 | + (device_id->data, |
433 | + temp, |
434 | + image->device_ptrs[device_id->dev_id], |
435 | + image->size); |
436 | + |
437 | + cl_int ret_code = pocl_write_image |
438 | + (image, |
439 | + command_queue->device, |
440 | + dst_origin, |
441 | + region, |
442 | + 0, |
443 | + 0, |
444 | + temp+src_offset); |
445 | + |
446 | + free(temp); |
447 | + |
448 | + return ret_code; |
449 | + } |
450 | + |
451 | \ No newline at end of file |
452 | |
453 | === modified file 'lib/CL/clEnqueueReadBufferRect.c' |
454 | --- lib/CL/clEnqueueReadBufferRect.c 2012-05-14 11:45:48 +0000 |
455 | +++ lib/CL/clEnqueueReadBufferRect.c 2012-05-30 08:34:23 +0000 |
456 | @@ -23,6 +23,7 @@ |
457 | |
458 | #include "pocl_cl.h" |
459 | #include <assert.h> |
460 | +#include <stdio.h> |
461 | |
462 | CL_API_ENTRY cl_int CL_API_CALL |
463 | clEnqueueReadBufferRect(cl_command_queue command_queue, |
464 | @@ -78,6 +79,7 @@ |
465 | buffer_origin, host_origin, region, |
466 | buffer_row_pitch, buffer_slice_pitch, |
467 | host_row_pitch, host_slice_pitch); |
468 | + |
469 | |
470 | return CL_SUCCESS; |
471 | } |
472 | |
473 | === modified file 'lib/CL/clEnqueueReadImage.c' |
474 | --- lib/CL/clEnqueueReadImage.c 2012-05-22 21:26:29 +0000 |
475 | +++ lib/CL/clEnqueueReadImage.c 2012-05-30 08:34:23 +0000 |
476 | @@ -1,19 +1,26 @@ |
477 | #include "pocl_cl.h" |
478 | +#include "assert.h" |
479 | +#include "pocl_image_util.h" |
480 | |
481 | -CL_API_ENTRY cl_int CL_API_CALL |
482 | -clEnqueueReadImage(cl_command_queue command_queue , |
483 | - cl_mem image , |
484 | - cl_bool blocking_read , |
485 | - const size_t * origin , |
486 | - const size_t * region , |
487 | - size_t row_pitch , |
488 | - size_t slice_pitch , |
489 | - void * ptr , |
490 | - cl_uint num_events_in_wait_list , |
491 | - const cl_event * event_wait_list , |
492 | - cl_event * event ) CL_API_SUFFIX__VERSION_1_0 |
493 | +extern CL_API_ENTRY cl_int CL_API_CALL |
494 | +clEnqueueReadImage(cl_command_queue command_queue, |
495 | + cl_mem image, |
496 | + cl_bool blocking_read, |
497 | + const size_t * origin, /* [3] */ |
498 | + const size_t * region, /* [3] */ |
499 | + size_t host_row_pitch, |
500 | + size_t host_slice_pitch, |
501 | + void * ptr, |
502 | + cl_uint num_events_in_wait_list, |
503 | + const cl_event * event_wait_list, |
504 | + cl_event * event) |
505 | +CL_API_SUFFIX__VERSION_1_0 |
506 | { |
507 | - POCL_ABORT_UNIMPLEMENTED(); |
508 | - return CL_SUCCESS; |
509 | -} |
510 | - |
511 | + return pocl_read_image(image, |
512 | + command_queue->device, |
513 | + origin, |
514 | + region, |
515 | + host_row_pitch, |
516 | + host_slice_pitch, |
517 | + ptr); |
518 | +} |
519 | \ No newline at end of file |
520 | |
521 | === modified file 'lib/CL/clEnqueueWriteImage.c' |
522 | --- lib/CL/clEnqueueWriteImage.c 2012-05-22 21:26:29 +0000 |
523 | +++ lib/CL/clEnqueueWriteImage.c 2012-05-30 08:34:23 +0000 |
524 | @@ -1,19 +1,25 @@ |
525 | #include "pocl_cl.h" |
526 | +#include "pocl_image_util.h" |
527 | |
528 | -CL_API_ENTRY cl_int CL_API_CALL |
529 | -clEnqueueWriteImage(cl_command_queue command_queue , |
530 | - cl_mem image , |
531 | - cl_bool blocking_write , |
532 | - const size_t * origin, |
533 | - const size_t * region, |
534 | - size_t input_row_pitch , |
535 | - size_t input_slice_pitch , |
536 | - const void * ptr , |
537 | - cl_uint num_events_in_wait_list , |
538 | - const cl_event * event_wait_list , |
539 | - cl_event * event ) CL_API_SUFFIX__VERSION_1_0 |
540 | -{ |
541 | - POCL_ABORT_UNIMPLEMENTED(); |
542 | - return CL_SUCCESS; |
543 | -} |
544 | +extern CL_API_ENTRY cl_int CL_API_CALL |
545 | +clEnqueueWriteImage(cl_command_queue command_queue, |
546 | + cl_mem image, |
547 | + cl_bool blocking_write, |
548 | + const size_t * origin, /*[3]*/ |
549 | + const size_t * region, /*[3]*/ |
550 | + size_t host_row_pitch, |
551 | + size_t host_slice_pitch, |
552 | + const void * ptr, |
553 | + cl_uint num_events_in_wait_list, |
554 | + const cl_event * event_wait_list, |
555 | + cl_event * event) CL_API_SUFFIX__VERSION_1_0 |
556 | + { |
557 | + return pocl_write_image(image, |
558 | + command_queue->device, |
559 | + origin, |
560 | + region, |
561 | + host_row_pitch, |
562 | + host_slice_pitch, |
563 | + ptr); |
564 | + } |
565 | |
566 | |
567 | === modified file 'lib/CL/clGetDeviceInfo.c' |
568 | --- lib/CL/clGetDeviceInfo.c 2012-05-15 22:38:06 +0000 |
569 | +++ lib/CL/clGetDeviceInfo.c 2012-05-30 08:34:23 +0000 |
570 | @@ -62,8 +62,7 @@ |
571 | switch (param_name) |
572 | { |
573 | case CL_DEVICE_IMAGE_SUPPORT: |
574 | - /* Return CL_FALSE until the APIs are implemented. */ |
575 | - POCL_RETURN_DEVICE_INFO(cl_bool, CL_FALSE); |
576 | + POCL_RETURN_DEVICE_INFO(cl_bool, CL_TRUE); |
577 | case CL_DEVICE_TYPE: |
578 | POCL_RETURN_DEVICE_INFO(cl_device_type, device->type); |
579 | case CL_DEVICE_VENDOR_ID: |
580 | |
581 | === modified file 'lib/CL/clGetSupportedImageFormats.c' |
582 | --- lib/CL/clGetSupportedImageFormats.c 2012-05-22 21:26:29 +0000 |
583 | +++ lib/CL/clGetSupportedImageFormats.c 2012-05-30 08:34:23 +0000 |
584 | @@ -1,14 +1,53 @@ |
585 | #include "pocl_cl.h" |
586 | |
587 | -CL_API_ENTRY cl_int CL_API_CALL |
588 | -clGetSupportedImageFormats(cl_context context , |
589 | - cl_mem_flags flags , |
590 | - cl_mem_object_type image_type , |
591 | - cl_uint num_entries , |
592 | - cl_image_format * image_formats , |
593 | - cl_uint * num_image_formats ) CL_API_SUFFIX__VERSION_1_0 |
594 | +extern CL_API_ENTRY cl_int CL_API_CALL |
595 | +clGetSupportedImageFormats(cl_context context, |
596 | + cl_mem_flags flags, |
597 | + cl_mem_object_type image_type, |
598 | + cl_uint num_entries, |
599 | + cl_image_format * image_formats, |
600 | + cl_uint * num_image_formats) CL_API_SUFFIX__VERSION_1_0 |
601 | { |
602 | - POCL_ABORT_UNIMPLEMENTED(); |
603 | - return CL_SUCCESS; |
604 | + if (context == NULL) |
605 | + return CL_INVALID_CONTEXT; |
606 | + |
607 | + if (image_type != CL_MEM_OBJECT_IMAGE2D) |
608 | + return CL_INVALID_VALUE; |
609 | + |
610 | + if (num_entries==0 && image_formats!=NULL) |
611 | + return CL_INVALID_VALUE; |
612 | + |
613 | + int idx=0; |
614 | + |
615 | + const int supported_order_count = 2; |
616 | + cl_channel_order supported_orders[] = |
617 | + { |
618 | + CL_RGBA, |
619 | + CL_R |
620 | + }; |
621 | + |
622 | + const int supported_type_count = 2; |
623 | + cl_channel_type supported_types[] = |
624 | + { |
625 | + CL_UNORM_INT8, |
626 | + CL_FLOAT |
627 | + }; |
628 | + |
629 | + int i, j; |
630 | + for (i=0; i<supported_order_count; i++) |
631 | + for (j=0; j<supported_type_count; j++) |
632 | + { |
633 | + if (idx >= num_entries) |
634 | + return CL_SUCCESS; |
635 | + |
636 | + image_formats[idx].image_channel_order = supported_orders[i]; |
637 | + image_formats[idx].image_channel_data_type = supported_types[j]; |
638 | + |
639 | + idx++; |
640 | + } |
641 | + |
642 | + // Add special cases here if a channel order is supported with only some types or vice versa. |
643 | + *num_image_formats = idx; |
644 | + |
645 | + return CL_SUCCESS; |
646 | } |
647 | - |
648 | |
649 | === modified file 'lib/CL/clReleaseSampler.c' |
650 | --- lib/CL/clReleaseSampler.c 2012-05-22 21:26:29 +0000 |
651 | +++ lib/CL/clReleaseSampler.c 2012-05-30 08:34:23 +0000 |
652 | @@ -1,9 +1,7 @@ |
653 | #include "pocl_cl.h" |
654 | - |
655 | -CL_API_ENTRY cl_int CL_API_CALL |
656 | -clReleaseSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0 |
657 | +extern CL_API_ENTRY cl_int CL_API_CALL |
658 | +clReleaseSampler(cl_sampler sampler) |
659 | +CL_API_SUFFIX__VERSION_1_0 |
660 | { |
661 | - POCL_ABORT_UNIMPLEMENTED(); |
662 | return CL_SUCCESS; |
663 | } |
664 | - |
665 | |
666 | === modified file 'lib/CL/devices/basic/basic.c' |
667 | --- lib/CL/devices/basic/basic.c 2012-05-28 16:17:57 +0000 |
668 | +++ lib/CL/devices/basic/basic.c 2012-05-30 08:34:23 +0000 |
669 | @@ -27,6 +27,7 @@ |
670 | #include <string.h> |
671 | #include <stdlib.h> |
672 | #include <unistd.h> |
673 | +#include <../dev_image.h> |
674 | #include <sys/time.h> |
675 | |
676 | #define max(a,b) (((a) > (b)) ? (a) : (b)) |
677 | @@ -235,6 +236,30 @@ |
678 | { |
679 | arguments[i] = &((*(cl_mem *) (p->value))->device_ptrs[device]); |
680 | } |
681 | + else if (kernel->arg_is_image[i]) |
682 | + { |
683 | + dev_image2d_t di; |
684 | + cl_mem mem = *(cl_mem*)p->value; |
685 | + di.data = &((*(cl_mem *) (p->value))->device_ptrs[device]); |
686 | + di.data = ((*(cl_mem *) (p->value))->device_ptrs[device]); |
687 | + di.width = mem->image_width; |
688 | + di.height = mem->image_height; |
689 | + di.rowpitch = mem->image_row_pitch; |
690 | + di.order = mem->image_channel_order; |
691 | + di.data_type = mem->image_channel_data_type; |
692 | + void* devptr = pocl_basic_malloc(data, 0, sizeof(dev_image2d_t), NULL); |
693 | + arguments[i] = malloc (sizeof (void *)); |
694 | + *(void **)(arguments[i]) = devptr; |
695 | + pocl_basic_write( data, &di, devptr, sizeof(dev_image2d_t) ); |
696 | + } |
697 | + else if (kernel->arg_is_sampler[i]) |
698 | + { |
699 | + dev_sampler_t ds; |
700 | + |
701 | + arguments[i] = malloc (sizeof (void *)); |
702 | + *(void **)(arguments[i]) = pocl_basic_malloc(data, 0, sizeof(dev_sampler_t), NULL); |
703 | + pocl_basic_write( data, &ds, *(void**)arguments[i], sizeof(dev_sampler_t) ); |
704 | + } |
705 | else |
706 | { |
707 | arguments[i] = p->value; |
708 | |
709 | === added file 'lib/CL/devices/dev_image.h' |
710 | --- lib/CL/devices/dev_image.h 1970-01-01 00:00:00 +0000 |
711 | +++ lib/CL/devices/dev_image.h 2012-05-30 08:34:23 +0000 |
712 | @@ -0,0 +1,17 @@ |
713 | +#ifndef __X86_IMAGE_H__ |
714 | +#define __X86_IMAGE_H__ |
715 | + |
716 | +//Definition of the image datatype used on basic and pthread (and probably tce?) |
717 | + |
718 | +typedef cl_int dev_sampler_t; |
719 | + |
720 | +typedef struct dev_image2d_t { |
721 | + void* data; |
722 | + cl_int width; |
723 | + cl_int height; |
724 | + cl_int rowpitch; |
725 | + cl_int order; |
726 | + cl_int data_type; |
727 | +} dev_image2d_t; |
728 | + |
729 | +#endif |
730 | |
731 | === modified file 'lib/CL/devices/pthread/pthread.c' |
732 | --- lib/CL/devices/pthread/pthread.c 2012-05-29 12:16:56 +0000 |
733 | +++ lib/CL/devices/pthread/pthread.c 2012-05-30 08:34:23 +0000 |
734 | @@ -36,6 +36,7 @@ |
735 | #ifdef CUSTOM_BUFFER_ALLOCATOR |
736 | |
737 | #include "bufalloc.h" |
738 | +#include <../dev_image.h> |
739 | |
740 | /* Instead of mallocing a buffer size for a region, try to allocate |
741 | this many times the buffer size to hopefully avoid mallocs for |
742 | @@ -651,6 +652,15 @@ |
743 | } |
744 | |
745 | void * |
746 | +pocl_pthread_map_mem (void *data, void *buf_ptr, |
747 | + size_t offset, size_t size, void* host_ptr) |
748 | +{ |
749 | + /* All global pointers of the pthread/CPU device are in |
750 | + the host address space already, and up to date. */ |
751 | + return buf_ptr + offset; |
752 | +} |
753 | + |
754 | +void * |
755 | workgroup_thread (void *p) |
756 | { |
757 | struct thread_arguments *ta = (struct thread_arguments *) p; |
758 | @@ -669,6 +679,30 @@ |
759 | } |
760 | else if (kernel->arg_is_pointer[i]) |
761 | arguments[i] = &((*(cl_mem *) (al->value))->device_ptrs[ta->device]); |
762 | + else if (kernel->arg_is_image[i]) |
763 | + { |
764 | + dev_image2d_t di; |
765 | + cl_mem mem = *(cl_mem*)al->value; |
766 | + di.data = &((*(cl_mem *) (al->value))->device_ptrs[ta->device]); |
767 | + di.data = ((*(cl_mem *) (al->value))->device_ptrs[ta->device]); |
768 | + di.width = mem->image_width; |
769 | + di.height = mem->image_height; |
770 | + di.rowpitch = mem->image_row_pitch; |
771 | + di.order = mem->image_channel_order; |
772 | + di.data_type = mem->image_channel_data_type; |
773 | + void* devptr = pocl_pthread_malloc(ta->data, 0, sizeof(dev_image2d_t), NULL); |
774 | + arguments[i] = malloc (sizeof (void *)); |
775 | + *(void **)(arguments[i]) = devptr; |
776 | + pocl_pthread_write( ta->data, &di, devptr, sizeof(dev_image2d_t) ); |
777 | + } |
778 | + else if (kernel->arg_is_sampler[i]) |
779 | + { |
780 | + dev_sampler_t ds; |
781 | + |
782 | + arguments[i] = malloc (sizeof (void *)); |
783 | + *(void **)(arguments[i]) = pocl_pthread_malloc(ta->data, 0, sizeof(dev_sampler_t), NULL); |
784 | + pocl_pthread_write( ta->data, &ds, *(void**)arguments[i], sizeof(dev_sampler_t) ); |
785 | + } |
786 | else |
787 | arguments[i] = al->value; |
788 | } |
789 | @@ -699,11 +733,15 @@ |
790 | |
791 | for (i = 0; i < kernel->num_args; ++i) |
792 | { |
793 | - if (kernel->arg_is_local[i]) |
794 | + if (kernel->arg_is_local[i] ) |
795 | { |
796 | pocl_pthread_free(ta->data, 0, *(void **)(arguments[i])); |
797 | free(arguments[i]); |
798 | } |
799 | + else if( kernel->arg_is_sampler[i] || kernel->arg_is_image[i] ) |
800 | + { |
801 | + free(arguments[i]); |
802 | + } |
803 | } |
804 | for (i = kernel->num_args; |
805 | i < kernel->num_args + kernel->num_locals; |
806 | |
807 | === modified file 'lib/CL/pocl_cl.h' |
808 | --- lib/CL/pocl_cl.h 2012-05-29 12:16:56 +0000 |
809 | +++ lib/CL/pocl_cl.h 2012-05-30 08:34:23 +0000 |
810 | @@ -303,6 +303,15 @@ |
811 | /* in case this is a sub buffer, this points to the parent |
812 | buffer */ |
813 | cl_mem_t *parent; |
814 | + /* Image flags */ |
815 | + cl_bool is_image; |
816 | + cl_channel_order image_channel_order; |
817 | + cl_channel_type image_channel_data_type; |
818 | + size_t image_width; |
819 | + size_t image_height; |
820 | + /*size_t image_depth;*/ |
821 | + size_t image_row_pitch; |
822 | + /*size_t image_slice_pitch;*/ |
823 | }; |
824 | |
825 | struct _cl_program { |
826 | @@ -337,6 +346,8 @@ |
827 | lt_dlhandle dlhandle; |
828 | cl_int *arg_is_pointer; |
829 | cl_int *arg_is_local; |
830 | + cl_int *arg_is_image; |
831 | + cl_int *arg_is_sampler; |
832 | cl_uint num_locals; |
833 | struct pocl_argument *arguments; |
834 | struct _cl_kernel *next; |
835 | @@ -359,8 +370,13 @@ |
836 | |
837 | }; |
838 | |
839 | +typedef struct _cl_sampler cl_sampler_t; |
840 | + |
841 | struct _cl_sampler { |
842 | - POCL_ICD_OBJECT |
843 | + POCL_ICD_OBJECT; |
844 | + cl_bool normalized_coords; |
845 | + cl_addressing_mode addressing_mode; |
846 | + cl_filter_mode filter_mode; |
847 | }; |
848 | |
849 | #define POCL_PROFILE_QUEUED \ |
850 | |
851 | === added file 'lib/CL/pocl_image_util.c' |
852 | --- lib/CL/pocl_image_util.c 1970-01-01 00:00:00 +0000 |
853 | +++ lib/CL/pocl_image_util.c 2012-05-30 08:34:23 +0000 |
854 | @@ -0,0 +1,228 @@ |
855 | +#include "pocl_cl.h" |
856 | +#include "pocl_image_util.h" |
857 | +#include "assert.h" |
858 | + |
859 | +extern void |
860 | +pocl_get_image_information (cl_mem image, |
861 | + int* channels_out, |
862 | + int* elem_size_out) |
863 | + { |
864 | + cl_channel_order order = image->image_channel_order; |
865 | + cl_channel_type type = image->image_channel_data_type; |
866 | + |
867 | + int host_elem_size; |
868 | + if (type == CL_FLOAT) |
869 | + host_elem_size=4; |
870 | + else if (type==CL_UNORM_INT8) |
871 | + host_elem_size=1; |
872 | + else |
873 | + POCL_ABORT_UNIMPLEMENTED(); |
874 | + if (elem_size_out != NULL) |
875 | + *elem_size_out = host_elem_size; |
876 | + |
877 | + int host_channels; |
878 | + if (order == CL_RGBA) |
879 | + host_channels=4; |
880 | + else if (order == CL_R) |
881 | + host_channels=1; |
882 | + else |
883 | + POCL_ABORT_UNIMPLEMENTED(); |
884 | + if (channels_out != NULL) |
885 | + *channels_out = host_channels; |
886 | + } |
887 | +extern cl_int |
888 | +pocl_write_image (cl_mem image, |
889 | + cl_device_id device_id, |
890 | + const size_t * origin_, /*[3]*/ |
891 | + const size_t * region_, /*[3]*/ |
892 | + size_t host_row_pitch, |
893 | + size_t host_slice_pitch, |
894 | + const void * ptr) |
895 | + { |
896 | + if (image == NULL) |
897 | + return CL_INVALID_MEM_OBJECT; |
898 | + |
899 | + if ((ptr == NULL) || |
900 | + (region_ == NULL)) |
901 | + return CL_INVALID_VALUE; |
902 | + |
903 | + int width = image->image_width; |
904 | + int height = image->image_height; |
905 | + cl_channel_order order = image->image_channel_order; |
906 | + cl_channel_type type = image->image_channel_data_type; |
907 | + |
908 | + size_t dev_elem_size = sizeof(cl_float); |
909 | + int dev_channels = 4; |
910 | + |
911 | + int host_elem_size; |
912 | + int host_channels; |
913 | + pocl_get_image_information (image, &host_channels, &host_elem_size); |
914 | + |
915 | + size_t origin[3] = { origin_[0]*dev_elem_size*dev_channels, origin_[1], origin_[2] }; |
916 | + size_t region[3] = { region_[0]*dev_elem_size*dev_channels, region_[1], region_[2] }; |
917 | + |
918 | + size_t image_row_pitch = width*dev_elem_size*dev_channels; |
919 | + size_t image_slice_pitch = 0; |
920 | + |
921 | + if ((region[0]*region[1]*region[2] > 0) && |
922 | + (region[0]-1 + |
923 | + image_row_pitch * (region[1]-1) + |
924 | + image_slice_pitch * (region[2]-1) >= image->size)) |
925 | + return CL_INVALID_VALUE; |
926 | + |
927 | + cl_float* temp = malloc( width*height*dev_channels*dev_elem_size ); |
928 | + |
929 | + if (temp == NULL) |
930 | + return CL_OUT_OF_HOST_MEMORY; |
931 | + |
932 | + int x, y, k; |
933 | + |
934 | + for (y=0; y<height; y++) |
935 | + for (x=0; x<width*dev_channels; x++) |
936 | + temp[x+y*width*dev_channels] = 0.f; |
937 | + |
938 | + for (y=0; y<height; y++) |
939 | + { |
940 | + for (x=0; x<width; x++) |
941 | + { |
942 | + cl_float elem[4]; //TODO 0,0,0,0 for some modes? |
943 | + |
944 | + for (k=0; k<host_channels; k++) |
945 | + { |
946 | + if (type == CL_FLOAT) |
947 | + elem[k] = ((float*)ptr)[k+(x+y*width)*host_channels]; |
948 | + else if (type==CL_UNORM_INT8) |
949 | + { |
950 | + cl_uchar foo = ((cl_uchar*)ptr)[k+(x+y*width)*host_channels]; |
951 | + elem[k] = (float)(foo) * (1.f/255.f); |
952 | + } |
953 | + else |
954 | + POCL_ABORT_UNIMPLEMENTED(); |
955 | + } |
956 | + |
957 | + if (order == CL_RGBA) |
958 | + for (k=0; k<4; k++) |
959 | + temp[(x+y*width)*dev_channels+k] = elem[k]; |
960 | + else if (order == CL_R) |
961 | + { |
962 | + temp[(x+y*width)*dev_channels+0] = elem[0]; |
963 | + temp[(x+y*width)*dev_channels+1] = 0.f; |
964 | + temp[(x+y*width)*dev_channels+2] = 0.f; |
965 | + temp[(x+y*width)*dev_channels+3] = 1.f; |
966 | + } |
967 | + } |
968 | + } |
969 | + |
970 | + |
971 | + device_id->write_rect(device_id->data, temp, |
972 | + image->device_ptrs[device_id->dev_id], |
973 | + origin, origin, region, |
974 | + image_row_pitch, image_slice_pitch, |
975 | + image_row_pitch, image_slice_pitch); |
976 | + |
977 | + free (temp); |
978 | + return CL_SUCCESS; |
979 | + } |
980 | + |
981 | +extern cl_int |
982 | +pocl_read_image (cl_mem image, |
983 | + cl_device_id device_id, |
984 | + const size_t * origin_, /*[3]*/ |
985 | + const size_t * region_, /*[3]*/ |
986 | + size_t host_row_pitch, |
987 | + size_t host_slice_pitch, |
988 | + void * ptr) |
989 | + { |
990 | + |
991 | + if (image == NULL) |
992 | + return CL_INVALID_MEM_OBJECT; |
993 | + |
994 | + if ((ptr == NULL) || |
995 | + (region_ == NULL)) |
996 | + return CL_INVALID_VALUE; |
997 | + |
998 | + int width = image->image_width; |
999 | + int height = image->image_height; |
1000 | + int dev_elem_size = sizeof(cl_float); |
1001 | + int dev_channels = 4; |
1002 | + size_t origin[3] = { origin_[0]*dev_elem_size*dev_channels, origin_[1], origin_[2] }; |
1003 | + size_t region[3] = { region_[0]*dev_elem_size*dev_channels, region_[1], region_[2] }; |
1004 | + |
1005 | + size_t image_row_pitch = width*dev_elem_size*dev_channels; |
1006 | + size_t image_slice_pitch = 0; |
1007 | + |
1008 | + if ((region[0]*region[1]*region[2] > 0) && |
1009 | + (region[0]-1 + |
1010 | + image_row_pitch * (region[1]-1) + |
1011 | + image_slice_pitch * (region[2]-1) >= image->size)) |
1012 | + return CL_INVALID_VALUE; |
1013 | + |
1014 | + |
1015 | + int i, j, k; |
1016 | + |
1017 | + cl_channel_order order = image->image_channel_order; |
1018 | + cl_channel_type type = image->image_channel_data_type; |
1019 | + |
1020 | + cl_float* temp = malloc( width*height*dev_channels*dev_elem_size ); |
1021 | + |
1022 | + if (temp == NULL) |
1023 | + return CL_OUT_OF_HOST_MEMORY; |
1024 | + |
1025 | + int host_channels, host_elem_size; |
1026 | + |
1027 | + pocl_get_image_information(image, &host_channels, &host_elem_size); |
1028 | + |
1029 | + if (host_row_pitch == 0) { |
1030 | + host_row_pitch = width*host_channels; |
1031 | + } |
1032 | + |
1033 | + size_t buffer_origin[3] = { 0, 0, 0 }; |
1034 | + |
1035 | + device_id->read_rect(device_id->data, temp, |
1036 | + image->device_ptrs[device_id->dev_id], |
1037 | + origin, origin, region, |
1038 | + image_row_pitch, image_slice_pitch, |
1039 | + image_row_pitch, image_slice_pitch); |
1040 | + |
1041 | + for (j=0; j<height; j++) { |
1042 | + for (i=0; i<width; i++) { |
1043 | + cl_float elem[4]; |
1044 | + |
1045 | + for (k=0; k<4; k++) |
1046 | + elem[k]=0; |
1047 | + |
1048 | + if (order == CL_RGBA) { |
1049 | + for (k=0; k<4; k++) |
1050 | + elem[k] = temp[i*dev_channels + j*width*dev_channels + k]; |
1051 | + } |
1052 | + else if (order == CL_R) { // host_channels == 1 |
1053 | + elem[0] = temp[i*dev_channels + j*width*dev_channels + 0]; |
1054 | + } |
1055 | + |
1056 | + if (type == CL_UNORM_INT8) |
1057 | + { // host_channels == 4 |
1058 | + for (k=0; k<host_channels; k++) |
1059 | + { |
1060 | + ((cl_uchar*)ptr)[i*host_channels + j*host_row_pitch + k] |
1061 | + = (unsigned char)(255*elem[k]); |
1062 | + } |
1063 | + } |
1064 | + else if (type == CL_FLOAT) |
1065 | + { |
1066 | + for (k=0; k<host_channels; k++) |
1067 | + { |
1068 | + POCL_ABORT_UNIMPLEMENTED(); |
1069 | + ((cl_float*)ptr)[i*host_channels + j*host_row_pitch + k] |
1070 | + = elem[k]; |
1071 | + } |
1072 | + } |
1073 | + else |
1074 | + POCL_ABORT_UNIMPLEMENTED(); |
1075 | + } |
1076 | + } |
1077 | + |
1078 | + free (temp); |
1079 | + |
1080 | + return CL_SUCCESS; |
1081 | + } |
1082 | + |
1083 | \ No newline at end of file |
1084 | |
1085 | === added file 'lib/CL/pocl_image_util.h' |
1086 | --- lib/CL/pocl_image_util.h 1970-01-01 00:00:00 +0000 |
1087 | +++ lib/CL/pocl_image_util.h 2012-05-30 08:34:23 +0000 |
1088 | @@ -0,0 +1,50 @@ |
1089 | +/* OpenCL runtime library: pocl_image_util image utility functions |
1090 | + |
1091 | + Copyright (c) 2012 Timo Viitanen / Tampere University of Technology |
1092 | + |
1093 | + Permission is hereby granted, free of charge, to any person obtaining a copy |
1094 | + of this software and associated documentation files (the "Software"), to deal |
1095 | + in the Software without restriction, including without limitation the rights |
1096 | + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
1097 | + copies of the Software, and to permit persons to whom the Software is |
1098 | + furnished to do so, subject to the following conditions: |
1099 | + |
1100 | + The above copyright notice and this permission notice shall be included in |
1101 | + all copies or substantial portions of the Software. |
1102 | + |
1103 | + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
1104 | + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
1105 | + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
1106 | + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
1107 | + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
1108 | + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
1109 | + THE SOFTWARE. |
1110 | +*/ |
1111 | + |
1112 | +#ifndef POCL_IMAGE_UTIL_H |
1113 | +#define POCL_IMAGE_UTIL_H |
1114 | + |
1115 | +extern cl_int |
1116 | +pocl_write_image (cl_mem image, |
1117 | + cl_device_id device_id, |
1118 | + const size_t * origin_, /*[3]*/ |
1119 | + const size_t * region_, /*[3]*/ |
1120 | + size_t host_row_pitch, |
1121 | + size_t host_slice_pitch, |
1122 | + const void * ptr); |
1123 | + |
1124 | +extern cl_int |
1125 | +pocl_read_image (cl_mem image, |
1126 | + cl_device_id device, |
1127 | + const size_t * origin, /*[3]*/ |
1128 | + const size_t * region, /*[3]*/ |
1129 | + size_t host_row_pitch, |
1130 | + size_t host_slice_pitch, |
1131 | + void * ptr); |
1132 | + |
1133 | +extern void |
1134 | +pocl_get_image_information (cl_mem image, |
1135 | + int* host_channels, |
1136 | + int* host_elem_size); |
1137 | + |
1138 | +#endif |
1139 | |
1140 | === added file 'lib/kernel/get_image_height.cl' |
1141 | --- lib/kernel/get_image_height.cl 1970-01-01 00:00:00 +0000 |
1142 | +++ lib/kernel/get_image_height.cl 2012-05-30 08:34:23 +0000 |
1143 | @@ -0,0 +1,7 @@ |
1144 | +#include "templates.h" |
1145 | +#include "image.h" |
1146 | + |
1147 | +int get_image_width (image2d_t image) |
1148 | +{ |
1149 | + return image->height; |
1150 | +} |
1151 | \ No newline at end of file |
1152 | |
1153 | === added file 'lib/kernel/get_image_width.cl' |
1154 | --- lib/kernel/get_image_width.cl 1970-01-01 00:00:00 +0000 |
1155 | +++ lib/kernel/get_image_width.cl 2012-05-30 08:34:23 +0000 |
1156 | @@ -0,0 +1,7 @@ |
1157 | +#include "templates.h" |
1158 | +#include "image.h" |
1159 | + |
1160 | +int get_image_width (image2d_t image) |
1161 | +{ |
1162 | + return image->width; |
1163 | +} |
1164 | \ No newline at end of file |
1165 | |
1166 | === added file 'lib/kernel/image.h' |
1167 | --- lib/kernel/image.h 1970-01-01 00:00:00 +0000 |
1168 | +++ lib/kernel/image.h 2012-05-30 08:34:23 +0000 |
1169 | @@ -0,0 +1,15 @@ |
1170 | +#ifndef __IMAGE_H__ |
1171 | +#define __IMAGE_H__ |
1172 | + |
1173 | +#include "templates.h" |
1174 | + |
1175 | +typedef struct image2d_t_ { |
1176 | + uchar4* data; |
1177 | + int width; |
1178 | + int height; |
1179 | + int rowpitch; |
1180 | + int order; |
1181 | + int data_type; |
1182 | +} image2d_t_; |
1183 | + |
1184 | +#endif |
1185 | |
1186 | === added file 'lib/kernel/read_image.cl' |
1187 | --- lib/kernel/read_image.cl 1970-01-01 00:00:00 +0000 |
1188 | +++ lib/kernel/read_image.cl 2012-05-30 08:34:23 +0000 |
1189 | @@ -0,0 +1,30 @@ |
1190 | +#include "templates.h" |
1191 | + |
1192 | +#include "image.h" |
1193 | + |
1194 | +float4 _cl_overloadable read_imagef ( image2d_t image, |
1195 | + sampler_t sampler, |
1196 | + int2 coord) { |
1197 | + //TODO: Sampler options |
1198 | + if( coord.x<0 ) |
1199 | + coord.x=0; |
1200 | + if( coord.y<0 ) |
1201 | + coord.y=0; |
1202 | + if( coord.x>=image->width ) |
1203 | + coord.x=image->width-1; |
1204 | + if( coord.y>=image->height ) |
1205 | + coord.y=image->height-1; |
1206 | + |
1207 | + float4 color = ((float4*)image->data)[ coord.x + coord.y*image->rowpitch ]; |
1208 | + |
1209 | + return color; |
1210 | +} |
1211 | + |
1212 | +float4 _cl_overloadable read_imagef ( image2d_t image, |
1213 | + sampler_t sampler, |
1214 | + float2 coord) { |
1215 | + |
1216 | + float4 color = ((float4*)image->data)[ (int)coord.x + (int)coord.y*image->rowpitch ]; |
1217 | + |
1218 | + return color; |
1219 | +} |
1220 | |
1221 | === modified file 'lib/kernel/sources.mk' |
1222 | --- lib/kernel/sources.mk 2012-03-05 15:33:38 +0000 |
1223 | +++ lib/kernel/sources.mk 2012-05-30 08:34:23 +0000 |
1224 | @@ -130,7 +130,11 @@ |
1225 | vload_half.cl \ |
1226 | vstore_half.cl \ |
1227 | async_work_group_copy.cl \ |
1228 | - wait_group_events.cl |
1229 | + wait_group_events.cl \ |
1230 | + read_image.cl \ |
1231 | + write_image.cl \ |
1232 | + get_image_width.cl \ |
1233 | + get_image_height.cl |
1234 | |
1235 | |
1236 | |
1237 | |
1238 | === added file 'lib/kernel/write_image.cl' |
1239 | --- lib/kernel/write_image.cl 1970-01-01 00:00:00 +0000 |
1240 | +++ lib/kernel/write_image.cl 2012-05-30 08:34:23 +0000 |
1241 | @@ -0,0 +1,17 @@ |
1242 | +#include "templates.h" |
1243 | + |
1244 | +#include "image.h" |
1245 | + |
1246 | +//typedef struct image2d_t_* image2d_t; |
1247 | + |
1248 | +void _cl_overloadable write_imagef ( image2d_t image, |
1249 | + int2 coord, |
1250 | + float4 color) { |
1251 | + ((float4*)image->data)[ coord.x + coord.y*image->rowpitch ] = color; |
1252 | +} |
1253 | + |
1254 | +void _cl_overloadable write_imagei ( image2d_t image, |
1255 | + int2 coord, |
1256 | + int4 color) { |
1257 | + ((float4*)image->data)[ coord.x + coord.y*image->rowpitch ] = (float4)(color.x,color.y,color.z,color.w); |
1258 | +} |
1259 | |
1260 | === modified file 'lib/llvmopencl/GenerateHeader.cc' |
1261 | --- lib/llvmopencl/GenerateHeader.cc 2012-02-24 17:50:01 +0000 |
1262 | +++ lib/llvmopencl/GenerateHeader.cc 2012-05-30 08:34:23 +0000 |
1263 | @@ -123,6 +123,8 @@ |
1264 | return changed; |
1265 | } |
1266 | |
1267 | +#include <iostream> |
1268 | + |
1269 | |
1270 | |
1271 | void |
1272 | GenerateHeader::ProcessPointers(Function *F, |
1273 | @@ -134,12 +136,17 @@ |
1274 | |
1275 | bool is_pointer[num_args]; |
1276 | bool is_local[num_args]; |
1277 | + bool is_image[num_args]; |
1278 | + bool is_sampler[num_args]; |
1279 | |
1280 | int i = 0; |
1281 | for (Function::const_arg_iterator ii = F->arg_begin(), |
1282 | ee = F->arg_end(); |
1283 | ii != ee; ++ii) { |
1284 | Type *t = ii->getType(); |
1285 | + |
1286 | + is_image[i] = false; |
1287 | + is_sampler[i] = false; |
1288 | |
1289 | if (const PointerType *p = dyn_cast<PointerType> (t)) { |
1290 | is_pointer[i] = true; |
1291 | @@ -153,6 +160,22 @@ |
1292 | is_pointer[i] = false; |
1293 | is_local[i] = false; |
1294 | } |
1295 | + |
1296 | + if( t->isPointerTy() ) { |
1297 | + if( t->getPointerElementType()->isStructTy() ) { |
1298 | + string name = t->getPointerElementType()->getStructName().str(); |
1299 | + if( name == "struct.image2d_t_" ) { // TODO image3d? |
1300 | + is_image[i] = true; |
1301 | + is_pointer[i] = false; |
1302 | + is_local[i] = false; |
1303 | + } |
1304 | + if( name == "struct.sampler_t_" ) { |
1305 | + is_sampler[i] = true; |
1306 | + is_pointer[i] = false; |
1307 | + is_local[i] = false; |
1308 | + } |
1309 | + } |
1310 | + } |
1311 | |
1312 | ++i; |
1313 | } |
1314 | @@ -172,6 +195,22 @@ |
1315 | out << ", " << is_local[i]; |
1316 | } |
1317 | out << "}\n"; |
1318 | + |
1319 | + out << "#define _" << F->getName() << "_ARG_IS_IMAGE {"; |
1320 | + if (num_args != 0) { |
1321 | + out << is_image[0]; |
1322 | + for (i = 1; i < num_args; ++i) |
1323 | + out << ", " << is_image[i]; |
1324 | + } |
1325 | + out << "}\n"; |
1326 | + |
1327 | + out << "#define _" << F->getName() << "_ARG_IS_SAMPLER {"; |
1328 | + if (num_args != 0) { |
1329 | + out << is_sampler[0]; |
1330 | + for (i = 1; i < num_args; ++i) |
1331 | + out << ", " << is_sampler[i]; |
1332 | + } |
1333 | + out << "}\n"; |
1334 | } |
1335 | |
1336 | |
1337 | |
1338 | |
1339 | === modified file 'scripts/pocl-kernel.in' |
1340 | --- scripts/pocl-kernel.in 2012-05-14 17:44:09 +0000 |
1341 | +++ scripts/pocl-kernel.in 2012-05-30 08:34:23 +0000 |
1342 | @@ -60,6 +60,8 @@ |
1343 | unsigned _num_args = _${kernel}_NUM_ARGS; |
1344 | int _arg_is_pointer[] = _${kernel}_ARG_IS_POINTER; |
1345 | int _arg_is_local[] = _${kernel}_ARG_IS_LOCAL; |
1346 | +int _arg_is_image[] = _${kernel}_ARG_IS_IMAGE; |
1347 | +int _arg_is_sampler[] = _${kernel}_ARG_IS_SAMPLER; |
1348 | unsigned _num_locals = _${kernel}_NUM_LOCALS; |
1349 | #if _${kernel}_NUM_LOCALS != 0 |
1350 | unsigned _local_sizes[_${kernel}_NUM_LOCALS] = _${kernel}_LOCAL_SIZE; |
1351 | @@ -93,6 +95,8 @@ |
1352 | #endif |
1353 | _${kernel}_ARG_IS_LOCAL, |
1354 | _${kernel}_ARG_IS_POINTER, |
1355 | + _${kernel}_ARG_IS_IMAGE, |
1356 | + _${kernel}_ARG_IS_SAMPLER, |
1357 | _${kernel}_workgroup_fast |
1358 | }; |
1359 | EOF |
1360 | |
1361 | === modified file 'tests/testsuite-samples.at' |
1362 | --- tests/testsuite-samples.at 2012-05-24 21:26:09 +0000 |
1363 | +++ tests/testsuite-samples.at 2012-05-30 08:34:23 +0000 |
1364 | @@ -73,6 +73,32 @@ |
1365 | |
1366 | AT_CLEANUP |
1367 | |
1368 | +AT_SETUP([Run Chapter 8: ImageFilter2D]) |
1369 | +AT_KEYWORDS([booksamples imagefilter2d]) |
1370 | +AT_SKIP_IF([! test -e $abs_top_srcdir/examples/opencl-book-samples/checkout]) |
1371 | +AT_CHECK_UNQUOTED([ |
1372 | +cd ${abs_top_srcdir}/examples/opencl-book-samples/checkout/src/Chapter_8/ImageFilter2D ; |
1373 | +#sed '13c\ ' -i ImageFilter2D.cl ; |
1374 | +#sed '14c\ ' -i ImageFilter2D.cl ; |
1375 | +#sed '15c\ ' -i ImageFilter2D.cl ; |
1376 | +#sed "3c float kernelWeights[9] = { 1.0f, 2.0f, 1.0f, 2.0f, 4.0f, 2.0f,1.0f, 2.0f, 1.0f }; \ " -i ImageFilter2D.cl ; |
1377 | +cp ${abs_top_srcdir}/examples/opencl-book-samples/ImageFilter2D.cl ./ |
1378 | +sed '418cclFinish(commandQueue);' -i ImageFilter2D.cpp ; |
1379 | +cd ${abs_top_srcdir}/examples/opencl-book-samples/checkout/build/src/Chapter_8/ImageFilter2D ; |
1380 | +make] |
1381 | +, 0, [ignore], [ignore]) |
1382 | + |
1383 | +AT_CHECK_UNQUOTED([ |
1384 | +cd $abs_top_srcdir/examples/opencl-book-samples/checkout/build/src/Chapter_8/ImageFilter2D |
1385 | +./ImageFilter2D ../../../../src/Chapter_19/oclFlow/data/minicooper/frame10.png output.png |
1386 | +], 0, |
1387 | +[Could not create GPU context, trying CPU... |
1388 | + |
1389 | +Executed program succesfully. |
1390 | +], [ignore]) |
1391 | + |
1392 | +AT_CLEANUP |
1393 | + |
1394 | AT_SETUP([Run Chapter 12: VectorAdd (C++ bindings)]) |
1395 | AT_KEYWORDS([booksamples]) |
1396 | AT_SKIP_IF([! test -e $abs_top_srcdir/examples/opencl-book-samples/checkout]) |