Merge lp:~viitanet/pocl/ImageSupport into lp:~pocl/pocl/trunk

Proposed by Timo Viitanen
Status: Merged
Merged at revision: 306
Proposed branch: lp:~viitanet/pocl/ImageSupport
Merge into: lp:~pocl/pocl/trunk
Diff against target: 1394 lines (+900/-95)
32 files modified
TODO (+0/-3)
examples/opencl-book-samples/ImageFilter2D.cl (+34/-0)
examples/opencl-book-samples/Makefile.am (+1/-0)
include/_kernel.h (+36/-0)
include/pocl_device.h (+2/-0)
lib/CL/Makefile.am (+14/-13)
lib/CL/clCreateBuffer.c (+1/-0)
lib/CL/clCreateImage2D.c (+108/-4)
lib/CL/clCreateKernel.c (+2/-0)
lib/CL/clCreateSampler.c (+30/-10)
lib/CL/clEnqueueCopyBufferToImage.c (+46/-13)
lib/CL/clEnqueueReadBufferRect.c (+2/-0)
lib/CL/clEnqueueReadImage.c (+23/-16)
lib/CL/clEnqueueWriteImage.c (+22/-16)
lib/CL/clGetDeviceInfo.c (+1/-2)
lib/CL/clGetSupportedImageFormats.c (+49/-10)
lib/CL/clReleaseSampler.c (+3/-5)
lib/CL/devices/basic/basic.c (+25/-0)
lib/CL/devices/dev_image.h (+17/-0)
lib/CL/devices/pthread/pthread.c (+39/-1)
lib/CL/pocl_cl.h (+17/-1)
lib/CL/pocl_image_util.c (+228/-0)
lib/CL/pocl_image_util.h (+50/-0)
lib/kernel/get_image_height.cl (+7/-0)
lib/kernel/get_image_width.cl (+7/-0)
lib/kernel/image.h (+15/-0)
lib/kernel/read_image.cl (+30/-0)
lib/kernel/sources.mk (+5/-1)
lib/kernel/write_image.cl (+17/-0)
lib/llvmopencl/GenerateHeader.cc (+39/-0)
scripts/pocl-kernel.in (+4/-0)
tests/testsuite-samples.at (+26/-0)
To merge this branch: bzr merge lp:~viitanet/pocl/ImageSupport
Reviewer Review Type Date Requested Status
pocl maintaners Pending
Review via email: mp+107929@code.launchpad.net

This proposal supersedes a proposal from 2012-05-28.

Description of the change

Implemented parts of OpenCL Image API:
clCreateImage2D
clCreateSampler
clReleaseSampler
clEnqueueCopyBufferToImage
clEnqueueWriteImage
clEnqueueReadImage
clGetSupportedImageFormats

read_imagef
write_imagef
get_image_width
get_image_height

Supported data types: CL_UNORM_INT8, CL_FLOAT
Supported channel orders: CL_RGBA, CL_R
Sampler always defaults to CLK_NORMALIZED_COORDINATES_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST.

Added a 'make check' test from opencl-book-samples, chapter 8, ImageFilter2D.
Other possible tests:
Chapter 14, histogram, requires atomics
Chapter 19, oclFlow, requires OpenCV
Rodinia-leukocyte crashes on compiler bug (https://bugs.launchpad.net/pocl/+bug/966992)
Rodinia-nn doesn't actually use images, but runs, TODO
Rodinia-gaussian doesn't actually use images; crashes
Rodinia-particlefilter crashes on another compiler bug

To post a comment you must log in.
lp:~viitanet/pocl/ImageSupport updated
306. By Pekka Jääskeläinen

Some image APIs from Timo.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'TODO'
2--- TODO 2012-05-24 21:26:09 +0000
3+++ TODO 2012-05-30 08:34:23 +0000
4@@ -49,15 +49,12 @@
5 * 5.2.1 Creating buffer objects
6 * 5.2.4 Mapping buffer objects
7 * 5.3 Image objects
8- * clCreateImage2D (deprecated in OpenCL 1.2) (*, R[gaussian, leukocyte, nn])
9 * 5.3.3 Reading, Writing and Copying Image Objects
10- * clEnqueueReadImage (*, R[nn])
11 * 5.4 Querying, Umapping, Migrating, ... Mem objects
12 * 5.4.1 Retaining and Releasing Memory Objects
13 * 5.4.2 Unmapping Mapped Memory Objects
14 * 5.5 Sampler objects
15 * 5.5.1 Creating Sampler Objects
16- * clCreateSampler (*)
17 * 5.6.1 Creating Program Objects
18 * 5.7.1 Creating Kernel Objects
19 * 5.9 Event objects
20
21=== added file 'examples/opencl-book-samples/ImageFilter2D.cl'
22--- examples/opencl-book-samples/ImageFilter2D.cl 1970-01-01 00:00:00 +0000
23+++ examples/opencl-book-samples/ImageFilter2D.cl 2012-05-30 08:34:23 +0000
24@@ -0,0 +1,34 @@
25+
26+ // Gaussian Kernel is:
27+ // 1 2 1
28+ // 2 4 2
29+ // 1 2 1
30+float kernelWeights[9] = { 1.0f, 2.0f, 1.0f, 2.0f, 4.0f, 2.0f,1.0f, 2.0f, 1.0f };
31+
32+// Gaussian filter of image
33+__kernel void gaussian_filter(__read_only image2d_t srcImg,
34+ __write_only image2d_t dstImg,
35+ sampler_t sampler,
36+ int width, int height)
37+{
38+ int2 startImageCoord = (int2) (get_global_id(0) - 1, get_global_id(1) - 1);
39+ int2 endImageCoord = (int2) (get_global_id(0) + 1, get_global_id(1) + 1);
40+ int2 outImageCoord = (int2) (get_global_id(0), get_global_id(1));
41+
42+ if (outImageCoord.x < width && outImageCoord.y < height)
43+ {
44+ int weight = 0;
45+ float4 outColor = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
46+ for( int y = startImageCoord.y; y <= endImageCoord.y; y++)
47+ {
48+ for( int x = startImageCoord.x; x <= endImageCoord.x; x++)
49+ {
50+ outColor += (read_imagef(srcImg, sampler, (int2)(x, y)) * (kernelWeights[weight] / 16.0f));
51+ weight += 1;
52+ }
53+ }
54+
55+ // Write the output value to image
56+ write_imagef(dstImg, outImageCoord, outColor);
57+ }
58+}
59
60=== modified file 'examples/opencl-book-samples/Makefile.am'
61--- examples/opencl-book-samples/Makefile.am 2012-05-21 08:51:32 +0000
62+++ examples/opencl-book-samples/Makefile.am 2012-05-30 08:34:23 +0000
63@@ -39,6 +39,7 @@
64 sed -i 's/size_t localWorkSize = maxWorkGroupSize/size_t localWorkSize = 2/g' src/Chapter_16/Dijkstra/oclDijkstraKernel.cpp; \
65 sed -i 's/device.j..type == CL_DEVICE_TYPE_GPU/device[j].type \& CL_DEVICE_TYPE_GPU/g' src/Chapter_22/spmv.c; \
66 sed -i 's/context.CL_DEVICE_TYPE_GPU/context\(CL_DEVICE_TYPE_CPU/g' src/Chapter_12/VectorAdd/vecadd.cpp; \
67+ rm src/Chapter_12/Sinewave/CMakeLists.txt; \
68 mkdir -p build; cd build; \
69 cmake \
70 -D CMAKE_CXX_FLAGS:STRING='-g -O3 -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -lGL -lglut ${PTHREAD_CFLAGS}' \
71
72=== modified file 'include/_kernel.h'
73--- include/_kernel.h 2012-03-06 23:17:03 +0000
74+++ include/_kernel.h 2012-05-30 08:34:23 +0000
75@@ -1772,3 +1772,39 @@
76 _CL_DECLARE_ASYNC_COPY_FUNCS(float);
77 __IF_FP64(_CL_DECLARE_ASYNC_COPY_FUNCS(double));
78
79+// Image support
80+
81+typedef int sampler_t;
82+
83+#define CLK_ADDRESS_NONE 0x00
84+#define CLK_ADDRESS_MIRRORED_REPEAT 0x01
85+#define CLK_ADDRESS_REPEAT 0x02
86+#define CLK_ADDRESS_CLAMP_TO_EDGE 0x03
87+#define CLK_ADDRESS_CLAMP 0x04
88+
89+#define CLK_NORMALIZED_COORDS_FALSE 0x00
90+#define CLK_NORMALIZED_COORDS_TRUE 0x08
91+
92+#define CLK_FILTER_NEAREST 0x00
93+#define CLK_FILTER_LINEAR 0x10
94+
95+typedef struct image2d_t_* image2d_t;
96+
97+float4 _cl_overloadable read_imagef( image2d_t image,
98+ sampler_t sampler,
99+ int2 coord);
100+
101+float4 _cl_overloadable read_imagef( image2d_t image,
102+ sampler_t sampler,
103+ float2 coord);
104+
105+void _cl_overloadable write_imagef( image2d_t image,
106+ int2 coord,
107+ float4 color);
108+
109+void _cl_overloadable write_imagei( image2d_t image,
110+ int2 coord,
111+ int4 color);
112+
113+int get_image_width (image2d_t image);
114+int get_image_height (image2d_t image);
115
116=== modified file 'include/pocl_device.h'
117--- include/pocl_device.h 2012-05-08 17:37:35 +0000
118+++ include/pocl_device.h 2012-05-30 08:34:23 +0000
119@@ -51,6 +51,8 @@
120 unsigned short alocal_sizes[MAX_KERNEL_ARGS];
121 char arg_is_local[MAX_KERNEL_ARGS];
122 char arg_is_pointer[MAX_KERNEL_ARGS];
123+ char arg_is_image[MAX_KERNEL_ARGS];
124+ char arg_is_sampler[MAX_KERNEL_ARGS];
125 pocl_workgroup work_group_func;
126 } __kernel_metadata;
127
128
129=== modified file 'lib/CL/Makefile.am'
130--- lib/CL/Makefile.am 2012-05-28 12:03:51 +0000
131+++ lib/CL/Makefile.am 2012-05-30 08:34:23 +0000
132@@ -38,9 +38,9 @@
133 clGetCommandQueueInfo.c \
134 clCreateBuffer.c \
135 clCreateSubBuffer.c \
136- clEnqueueReadBuffer.c \
137- clEnqueueReadBufferRect.c \
138- clEnqueueMapBuffer.c \
139+ clEnqueueReadBuffer.c \
140+ clEnqueueReadBufferRect.c \
141+ clEnqueueMapBuffer.c \
142 clEnqueueUnmapMemObject.c \
143 clReleaseMemObject.c \
144 clRetainMemObject.c \
145@@ -89,23 +89,24 @@
146 clEnqueueTask.c \
147 clCreateImage2D.c \
148 clCreateImage3D.c \
149- clEnqueueReadImage.c \
150- clEnqueueWriteImage.c \
151+ clEnqueueReadImage.c \
152+ clEnqueueWriteImage.c \
153+ clCreateSampler.c \
154+ clReleaseSampler.c \
155+ clRetainSampler.c \
156+ clGetSamplerInfo.c \
157 clEnqueueCopyImage.c \
158- clEnqueueMapImage.c \
159- clGetSupportedImageFormats.c \
160+ clEnqueueMapImage.c \ \
161 clGetImageInfo.c \
162 clCreateFromGLTexture2D.c \
163 clCreateFromGLTexture3D.c \
164 clUnloadCompiler.c \
165+ clGetSupportedImageFormats.c \
166 clGetExtensionFunctionAddress.c \
167 clIcdGetPlatformIDsKHR.c \
168- clCreateSampler.c \
169- clReleaseSampler.c \
170- clRetainSampler.c \
171- clGetSamplerInfo.c \
172- pocl_cl.h \
173- pocl_util.c pocl_util.h
174+ pocl_cl.h \
175+ pocl_util.c pocl_util.h \
176+ pocl_image_util.c pocl_image_util.h
177
178
179
180
181=== modified file 'lib/CL/clCreateBuffer.c'
182--- lib/CL/clCreateBuffer.c 2012-05-24 20:46:33 +0000
183+++ lib/CL/clCreateBuffer.c 2012-05-30 08:34:23 +0000
184@@ -49,6 +49,7 @@
185 mem->map_count = 0;
186 mem->mappings = NULL;
187 mem->flags = flags;
188+ mem->is_image = CL_FALSE;
189 POCL_INIT_ICD_OBJECT(mem);
190
191 /* Store the per device buffer pointers always to a known
192
193=== modified file 'lib/CL/clCreateImage2D.c'
194--- lib/CL/clCreateImage2D.c 2012-01-30 12:21:12 +0000
195+++ lib/CL/clCreateImage2D.c 2012-05-30 08:34:23 +0000
196@@ -1,17 +1,121 @@
197 #include "pocl_cl.h"
198+#include "assert.h"
199+#include "pocl_image_util.h"
200+
201 CL_API_ENTRY cl_mem CL_API_CALL
202 clCreateImage2D(cl_context context,
203 cl_mem_flags flags,
204 const cl_image_format * image_format,
205- size_t image_width,
206- size_t image_height,
207+ size_t width,
208+ size_t height,
209 size_t image_row_pitch,
210 void * host_ptr,
211 cl_int * errcode_ret)
212 CL_API_SUFFIX__VERSION_1_0
213 {
214- POCL_ABORT_UNIMPLEMENTED();
215- return CL_SUCCESS;
216+ cl_mem mem;
217+ cl_device_id device_id;
218+ void *device_ptr;
219+ unsigned i, j;
220+ int size;
221+
222+ if (context == NULL)
223+ POCL_ERROR(CL_INVALID_CONTEXT);
224+
225+ mem = (cl_mem) malloc(sizeof(struct _cl_mem));
226+ if (mem == NULL)
227+ POCL_ERROR(CL_OUT_OF_HOST_MEMORY);
228+
229+ POCL_INIT_OBJECT(mem);
230+ mem->parent = NULL;
231+ mem->map_count = 0;
232+ mem->mappings = NULL;
233+ mem->flags = flags;
234+ mem->is_image = CL_TRUE;
235+
236+ cl_channel_order order = image_format->image_channel_order;
237+ cl_channel_type type = image_format->image_channel_data_type;
238+
239+ int dev_elem_size = sizeof(cl_float); //TODO
240+ int dev_channels = 4;
241+
242+ if (image_row_pitch == 0)
243+ image_row_pitch = width;
244+
245+ if (image_row_pitch != width)
246+ POCL_ABORT_UNIMPLEMENTED();
247+
248+ size = width * height * dev_elem_size * dev_channels;
249+
250+ mem->device_ptrs = (void **) malloc(context->num_devices * sizeof(void *));
251+ if (mem->device_ptrs == NULL)
252+ {
253+ free(mem);
254+ POCL_ERROR(CL_OUT_OF_HOST_MEMORY);
255+ }
256+
257+ int host_channels;
258+
259+ if (order == CL_RGBA)
260+ host_channels=4;
261+ else if (order == CL_R)
262+ host_channels=1;
263+ else
264+ POCL_ABORT_UNIMPLEMENTED();
265+
266+ mem->size = size;
267+ mem->context = context;
268+
269+ mem->image_width = width;
270+ mem->image_height = height;
271+ mem->image_row_pitch = image_row_pitch;
272+ mem->image_channel_data_type = type;
273+ mem->image_channel_order = order;
274+
275+ for (i = 0; i < context->num_devices; ++i)
276+ {
277+ if (i > 0)
278+ clRetainMemObject (mem);
279+ device_id = context->devices[i];
280+ device_ptr = device_id->malloc(device_id->data, 0, size, NULL);
281+
282+ if (device_ptr == NULL)
283+ {
284+ for (j = 0; j < i; ++j)
285+ {
286+ device_id = context->devices[j];
287+ device_id->free(device_id->data, 0, mem->device_ptrs[j]);
288+ }
289+ free(mem);
290+ POCL_ERROR(CL_MEM_OBJECT_ALLOCATION_FAILURE);
291+ }
292+ mem->device_ptrs[i] = device_ptr;
293+ /* The device allocator allocated from a device-host shared memory. */
294+ if (flags & CL_MEM_ALLOC_HOST_PTR ||
295+ flags & CL_MEM_USE_HOST_PTR)
296+ POCL_ABORT_UNIMPLEMENTED();
297+
298+ if (flags & CL_MEM_COPY_HOST_PTR)
299+ {
300+ size_t origin[3] = { 0, 0, 0 };
301+ size_t region[3] = { width, height, 1 };
302+ pocl_write_image( mem,
303+ context->devices[i],
304+ origin,
305+ region,
306+ 0,
307+ 1,
308+ host_ptr );
309+ }
310+ }
311+
312+ POCL_RETAIN_OBJECT(context);
313+
314+
315+ if (errcode_ret != NULL)
316+ *errcode_ret = CL_SUCCESS;
317+
318+ return mem;
319 }
320
321
322
323=== modified file 'lib/CL/clCreateKernel.c'
324--- lib/CL/clCreateKernel.c 2012-05-23 08:38:12 +0000
325+++ lib/CL/clCreateKernel.c 2012-05-30 08:34:23 +0000
326@@ -143,6 +143,8 @@
327 kernel->dlhandle = dlhandle; /* TODO: why is this stored? */
328 kernel->arg_is_pointer = lt_dlsym(dlhandle, "_arg_is_pointer");
329 kernel->arg_is_local = lt_dlsym(dlhandle, "_arg_is_local");
330+ kernel->arg_is_image = lt_dlsym(dlhandle, "_arg_is_image");
331+ kernel->arg_is_sampler = lt_dlsym(dlhandle, "_arg_is_sampler");
332 kernel->num_locals = *(cl_uint *) lt_dlsym(dlhandle, "_num_locals");
333 kernel->arguments =
334 (struct pocl_argument *) malloc ((kernel->num_args + kernel->num_locals) *
335
336=== modified file 'lib/CL/clCreateSampler.c'
337--- lib/CL/clCreateSampler.c 2012-05-22 21:26:29 +0000
338+++ lib/CL/clCreateSampler.c 2012-05-30 08:34:23 +0000
339@@ -1,13 +1,33 @@
340 #include "pocl_cl.h"
341-
342-CL_API_ENTRY cl_sampler CL_API_CALL
343-clCreateSampler(cl_context context ,
344- cl_bool normalized_coords ,
345- cl_addressing_mode addressing_mode ,
346- cl_filter_mode filter_mode ,
347- cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0
348+extern CL_API_ENTRY cl_sampler CL_API_CALL
349+clCreateSampler(cl_context context,
350+ cl_bool normalized_coords,
351+ cl_addressing_mode addressing_mode,
352+ cl_filter_mode filter_mode,
353+ cl_int * errcode_ret)
354+CL_API_SUFFIX__VERSION_1_0
355 {
356- POCL_ABORT_UNIMPLEMENTED();
357- return CL_SUCCESS;
358+ cl_sampler sampler;
359+
360+ if (context == NULL)
361+ POCL_ERROR(CL_INVALID_CONTEXT);
362+
363+ sampler = (cl_sampler) malloc(sizeof(struct _cl_sampler));
364+ if (sampler == NULL)
365+ POCL_ERROR(CL_OUT_OF_HOST_MEMORY);
366+
367+ if (normalized_coords == CL_TRUE)
368+ POCL_ABORT_UNIMPLEMENTED();
369+
370+ if (addressing_mode != CL_ADDRESS_CLAMP_TO_EDGE)
371+ POCL_ABORT_UNIMPLEMENTED();
372+
373+ if (filter_mode != CL_FILTER_NEAREST)
374+ POCL_ABORT_UNIMPLEMENTED();
375+
376+ sampler->normalized_coords = normalized_coords;
377+ sampler->addressing_mode = addressing_mode;
378+ sampler->filter_mode = filter_mode;
379+
380+ return sampler;
381 }
382-
383
384=== modified file 'lib/CL/clEnqueueCopyBufferToImage.c'
385--- lib/CL/clEnqueueCopyBufferToImage.c 2012-05-22 21:26:29 +0000
386+++ lib/CL/clEnqueueCopyBufferToImage.c 2012-05-30 08:34:23 +0000
387@@ -1,17 +1,50 @@
388 #include "pocl_cl.h"
389
390-CL_API_ENTRY cl_int CL_API_CALL
391-clEnqueueCopyBufferToImage(cl_command_queue command_queue ,
392- cl_mem src_buffer ,
393- cl_mem dst_image ,
394- size_t src_offset ,
395- const size_t * dst_origin ,
396- const size_t * region ,
397- cl_uint num_events_in_wait_list ,
398- const cl_event * event_wait_list ,
399+extern CL_API_ENTRY cl_int CL_API_CALL
400+clEnqueueCopyBufferToImage(cl_command_queue command_queue,
401+ cl_mem buffer,
402+ cl_mem image,
403+ size_t src_offset,
404+ const size_t * dst_origin, /*[3]*/
405+ const size_t * region, /*[3]*/
406+ cl_uint num_events_in_wait_list,
407+ const cl_event * event_wait_list,
408 cl_event * event ) CL_API_SUFFIX__VERSION_1_0
409-{
410- POCL_ABORT_UNIMPLEMENTED();
411- return CL_SUCCESS;
412-}
413+ {
414+ if (region == NULL)
415+ return CL_INVALID_VALUE;
416+
417+ if (region[2] != 1) //3D image
418+ POCL_ABORT_UNIMPLEMENTED();
419+
420+ int dev_elem_size = sizeof(cl_float);
421+ int dev_channels = 4;
422+
423+ int host_elem_size;
424+ int host_channels;
425+ pocl_get_image_information (image, &host_channels, &host_elem_size);
426+
427+ void* temp = malloc (image->size);
428+
429+ cl_device_id device_id = command_queue->device;
430
431+ device_id->read
432+ (device_id->data,
433+ temp,
434+ image->device_ptrs[device_id->dev_id],
435+ image->size);
436+
437+ cl_int ret_code = pocl_write_image
438+ (image,
439+ command_queue->device,
440+ dst_origin,
441+ region,
442+ 0,
443+ 0,
444+ temp+src_offset);
445+
446+ free(temp);
447+
448+ return ret_code;
449+ }
450+
451\ No newline at end of file
452
453=== modified file 'lib/CL/clEnqueueReadBufferRect.c'
454--- lib/CL/clEnqueueReadBufferRect.c 2012-05-14 11:45:48 +0000
455+++ lib/CL/clEnqueueReadBufferRect.c 2012-05-30 08:34:23 +0000
456@@ -23,6 +23,7 @@
457
458 #include "pocl_cl.h"
459 #include <assert.h>
460+#include <stdio.h>
461
462 CL_API_ENTRY cl_int CL_API_CALL
463 clEnqueueReadBufferRect(cl_command_queue command_queue,
464@@ -78,6 +79,7 @@
465 buffer_origin, host_origin, region,
466 buffer_row_pitch, buffer_slice_pitch,
467 host_row_pitch, host_slice_pitch);
468+
469
470 return CL_SUCCESS;
471 }
472
473=== modified file 'lib/CL/clEnqueueReadImage.c'
474--- lib/CL/clEnqueueReadImage.c 2012-05-22 21:26:29 +0000
475+++ lib/CL/clEnqueueReadImage.c 2012-05-30 08:34:23 +0000
476@@ -1,19 +1,26 @@
477 #include "pocl_cl.h"
478+#include "assert.h"
479+#include "pocl_image_util.h"
480
481-CL_API_ENTRY cl_int CL_API_CALL
482-clEnqueueReadImage(cl_command_queue command_queue ,
483- cl_mem image ,
484- cl_bool blocking_read ,
485- const size_t * origin ,
486- const size_t * region ,
487- size_t row_pitch ,
488- size_t slice_pitch ,
489- void * ptr ,
490- cl_uint num_events_in_wait_list ,
491- const cl_event * event_wait_list ,
492- cl_event * event ) CL_API_SUFFIX__VERSION_1_0
493+extern CL_API_ENTRY cl_int CL_API_CALL
494+clEnqueueReadImage(cl_command_queue command_queue,
495+ cl_mem image,
496+ cl_bool blocking_read,
497+ const size_t * origin, /* [3] */
498+ const size_t * region, /* [3] */
499+ size_t host_row_pitch,
500+ size_t host_slice_pitch,
501+ void * ptr,
502+ cl_uint num_events_in_wait_list,
503+ const cl_event * event_wait_list,
504+ cl_event * event)
505+CL_API_SUFFIX__VERSION_1_0
506 {
507- POCL_ABORT_UNIMPLEMENTED();
508- return CL_SUCCESS;
509-}
510-
511+ return pocl_read_image(image,
512+ command_queue->device,
513+ origin,
514+ region,
515+ host_row_pitch,
516+ host_slice_pitch,
517+ ptr);
518+}
519\ No newline at end of file
520
521=== modified file 'lib/CL/clEnqueueWriteImage.c'
522--- lib/CL/clEnqueueWriteImage.c 2012-05-22 21:26:29 +0000
523+++ lib/CL/clEnqueueWriteImage.c 2012-05-30 08:34:23 +0000
524@@ -1,19 +1,25 @@
525 #include "pocl_cl.h"
526+#include "pocl_image_util.h"
527
528-CL_API_ENTRY cl_int CL_API_CALL
529-clEnqueueWriteImage(cl_command_queue command_queue ,
530- cl_mem image ,
531- cl_bool blocking_write ,
532- const size_t * origin,
533- const size_t * region,
534- size_t input_row_pitch ,
535- size_t input_slice_pitch ,
536- const void * ptr ,
537- cl_uint num_events_in_wait_list ,
538- const cl_event * event_wait_list ,
539- cl_event * event ) CL_API_SUFFIX__VERSION_1_0
540-{
541- POCL_ABORT_UNIMPLEMENTED();
542- return CL_SUCCESS;
543-}
544+extern CL_API_ENTRY cl_int CL_API_CALL
545+clEnqueueWriteImage(cl_command_queue command_queue,
546+ cl_mem image,
547+ cl_bool blocking_write,
548+ const size_t * origin, /*[3]*/
549+ const size_t * region, /*[3]*/
550+ size_t host_row_pitch,
551+ size_t host_slice_pitch,
552+ const void * ptr,
553+ cl_uint num_events_in_wait_list,
554+ const cl_event * event_wait_list,
555+ cl_event * event) CL_API_SUFFIX__VERSION_1_0
556+ {
557+ return pocl_write_image(image,
558+ command_queue->device,
559+ origin,
560+ region,
561+ host_row_pitch,
562+ host_slice_pitch,
563+ ptr);
564+ }
565
566
567=== modified file 'lib/CL/clGetDeviceInfo.c'
568--- lib/CL/clGetDeviceInfo.c 2012-05-15 22:38:06 +0000
569+++ lib/CL/clGetDeviceInfo.c 2012-05-30 08:34:23 +0000
570@@ -62,8 +62,7 @@
571 switch (param_name)
572 {
573 case CL_DEVICE_IMAGE_SUPPORT:
574- /* Return CL_FALSE until the APIs are implemented. */
575- POCL_RETURN_DEVICE_INFO(cl_bool, CL_FALSE);
576+ POCL_RETURN_DEVICE_INFO(cl_bool, CL_TRUE);
577 case CL_DEVICE_TYPE:
578 POCL_RETURN_DEVICE_INFO(cl_device_type, device->type);
579 case CL_DEVICE_VENDOR_ID:
580
581=== modified file 'lib/CL/clGetSupportedImageFormats.c'
582--- lib/CL/clGetSupportedImageFormats.c 2012-05-22 21:26:29 +0000
583+++ lib/CL/clGetSupportedImageFormats.c 2012-05-30 08:34:23 +0000
584@@ -1,14 +1,53 @@
585 #include "pocl_cl.h"
586
587-CL_API_ENTRY cl_int CL_API_CALL
588-clGetSupportedImageFormats(cl_context context ,
589- cl_mem_flags flags ,
590- cl_mem_object_type image_type ,
591- cl_uint num_entries ,
592- cl_image_format * image_formats ,
593- cl_uint * num_image_formats ) CL_API_SUFFIX__VERSION_1_0
594+extern CL_API_ENTRY cl_int CL_API_CALL
595+clGetSupportedImageFormats(cl_context context,
596+ cl_mem_flags flags,
597+ cl_mem_object_type image_type,
598+ cl_uint num_entries,
599+ cl_image_format * image_formats,
600+ cl_uint * num_image_formats) CL_API_SUFFIX__VERSION_1_0
601 {
602- POCL_ABORT_UNIMPLEMENTED();
603- return CL_SUCCESS;
604+ if (context == NULL)
605+ return CL_INVALID_CONTEXT;
606+
607+ if (image_type != CL_MEM_OBJECT_IMAGE2D)
608+ return CL_INVALID_VALUE;
609+
610+ if (num_entries==0 && image_formats!=NULL)
611+ return CL_INVALID_VALUE;
612+
613+ int idx=0;
614+
615+ const int supported_order_count = 2;
616+ cl_channel_order supported_orders[] =
617+ {
618+ CL_RGBA,
619+ CL_R
620+ };
621+
622+ const int supported_type_count = 2;
623+ cl_channel_type supported_types[] =
624+ {
625+ CL_UNORM_INT8,
626+ CL_FLOAT
627+ };
628+
629+ int i, j;
630+ for (i=0; i<supported_order_count; i++)
631+ for (j=0; j<supported_type_count; j++)
632+ {
633+ if (idx >= num_entries)
634+ return CL_SUCCESS;
635+
636+ image_formats[idx].image_channel_order = supported_orders[i];
637+ image_formats[idx].image_channel_data_type = supported_types[j];
638+
639+ idx++;
640+ }
641+
642+ // Add special cases here if a channel order is supported with only some types or vice versa.
643+ *num_image_formats = idx;
644+
645+ return CL_SUCCESS;
646 }
647-
648
649=== modified file 'lib/CL/clReleaseSampler.c'
650--- lib/CL/clReleaseSampler.c 2012-05-22 21:26:29 +0000
651+++ lib/CL/clReleaseSampler.c 2012-05-30 08:34:23 +0000
652@@ -1,9 +1,7 @@
653 #include "pocl_cl.h"
654-
655-CL_API_ENTRY cl_int CL_API_CALL
656-clReleaseSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0
657+extern CL_API_ENTRY cl_int CL_API_CALL
658+clReleaseSampler(cl_sampler sampler)
659+CL_API_SUFFIX__VERSION_1_0
660 {
661- POCL_ABORT_UNIMPLEMENTED();
662 return CL_SUCCESS;
663 }
664-
665
666=== modified file 'lib/CL/devices/basic/basic.c'
667--- lib/CL/devices/basic/basic.c 2012-05-28 16:17:57 +0000
668+++ lib/CL/devices/basic/basic.c 2012-05-30 08:34:23 +0000
669@@ -27,6 +27,7 @@
670 #include <string.h>
671 #include <stdlib.h>
672 #include <unistd.h>
673+#include <../dev_image.h>
674 #include <sys/time.h>
675
676 #define max(a,b) (((a) > (b)) ? (a) : (b))
677@@ -235,6 +236,30 @@
678 {
679 arguments[i] = &((*(cl_mem *) (p->value))->device_ptrs[device]);
680 }
681+ else if (kernel->arg_is_image[i])
682+ {
683+ dev_image2d_t di;
684+ cl_mem mem = *(cl_mem*)p->value;
685+ di.data = &((*(cl_mem *) (p->value))->device_ptrs[device]);
686+ di.data = ((*(cl_mem *) (p->value))->device_ptrs[device]);
687+ di.width = mem->image_width;
688+ di.height = mem->image_height;
689+ di.rowpitch = mem->image_row_pitch;
690+ di.order = mem->image_channel_order;
691+ di.data_type = mem->image_channel_data_type;
692+ void* devptr = pocl_basic_malloc(data, 0, sizeof(dev_image2d_t), NULL);
693+ arguments[i] = malloc (sizeof (void *));
694+ *(void **)(arguments[i]) = devptr;
695+ pocl_basic_write( data, &di, devptr, sizeof(dev_image2d_t) );
696+ }
697+ else if (kernel->arg_is_sampler[i])
698+ {
699+ dev_sampler_t ds;
700+
701+ arguments[i] = malloc (sizeof (void *));
702+ *(void **)(arguments[i]) = pocl_basic_malloc(data, 0, sizeof(dev_sampler_t), NULL);
703+ pocl_basic_write( data, &ds, *(void**)arguments[i], sizeof(dev_sampler_t) );
704+ }
705 else
706 {
707 arguments[i] = p->value;
708
709=== added file 'lib/CL/devices/dev_image.h'
710--- lib/CL/devices/dev_image.h 1970-01-01 00:00:00 +0000
711+++ lib/CL/devices/dev_image.h 2012-05-30 08:34:23 +0000
712@@ -0,0 +1,17 @@
713+#ifndef __X86_IMAGE_H__
714+#define __X86_IMAGE_H__
715+
716+//Definition of the image datatype used on basic and pthread (and probably tce?)
717+
718+typedef cl_int dev_sampler_t;
719+
720+typedef struct dev_image2d_t {
721+ void* data;
722+ cl_int width;
723+ cl_int height;
724+ cl_int rowpitch;
725+ cl_int order;
726+ cl_int data_type;
727+} dev_image2d_t;
728+
729+#endif
730
731=== modified file 'lib/CL/devices/pthread/pthread.c'
732--- lib/CL/devices/pthread/pthread.c 2012-05-29 12:16:56 +0000
733+++ lib/CL/devices/pthread/pthread.c 2012-05-30 08:34:23 +0000
734@@ -36,6 +36,7 @@
735 #ifdef CUSTOM_BUFFER_ALLOCATOR
736
737 #include "bufalloc.h"
738+#include <../dev_image.h>
739
740 /* Instead of mallocing a buffer size for a region, try to allocate
741 this many times the buffer size to hopefully avoid mallocs for
742@@ -651,6 +652,15 @@
743 }
744
745 void *
746+pocl_pthread_map_mem (void *data, void *buf_ptr,
747+ size_t offset, size_t size, void* host_ptr)
748+{
749+ /* All global pointers of the pthread/CPU device are in
750+ the host address space already, and up to date. */
751+ return buf_ptr + offset;
752+}
753+
754+void *
755 workgroup_thread (void *p)
756 {
757 struct thread_arguments *ta = (struct thread_arguments *) p;
758@@ -669,6 +679,30 @@
759 }
760 else if (kernel->arg_is_pointer[i])
761 arguments[i] = &((*(cl_mem *) (al->value))->device_ptrs[ta->device]);
762+ else if (kernel->arg_is_image[i])
763+ {
764+ dev_image2d_t di;
765+ cl_mem mem = *(cl_mem*)al->value;
766+ di.data = &((*(cl_mem *) (al->value))->device_ptrs[ta->device]);
767+ di.data = ((*(cl_mem *) (al->value))->device_ptrs[ta->device]);
768+ di.width = mem->image_width;
769+ di.height = mem->image_height;
770+ di.rowpitch = mem->image_row_pitch;
771+ di.order = mem->image_channel_order;
772+ di.data_type = mem->image_channel_data_type;
773+ void* devptr = pocl_pthread_malloc(ta->data, 0, sizeof(dev_image2d_t), NULL);
774+ arguments[i] = malloc (sizeof (void *));
775+ *(void **)(arguments[i]) = devptr;
776+ pocl_pthread_write( ta->data, &di, devptr, sizeof(dev_image2d_t) );
777+ }
778+ else if (kernel->arg_is_sampler[i])
779+ {
780+ dev_sampler_t ds;
781+
782+ arguments[i] = malloc (sizeof (void *));
783+ *(void **)(arguments[i]) = pocl_pthread_malloc(ta->data, 0, sizeof(dev_sampler_t), NULL);
784+ pocl_pthread_write( ta->data, &ds, *(void**)arguments[i], sizeof(dev_sampler_t) );
785+ }
786 else
787 arguments[i] = al->value;
788 }
789@@ -699,11 +733,15 @@
790
791 for (i = 0; i < kernel->num_args; ++i)
792 {
793- if (kernel->arg_is_local[i])
794+ if (kernel->arg_is_local[i] )
795 {
796 pocl_pthread_free(ta->data, 0, *(void **)(arguments[i]));
797 free(arguments[i]);
798 }
799+ else if( kernel->arg_is_sampler[i] || kernel->arg_is_image[i] )
800+ {
801+ free(arguments[i]);
802+ }
803 }
804 for (i = kernel->num_args;
805 i < kernel->num_args + kernel->num_locals;
806
807=== modified file 'lib/CL/pocl_cl.h'
808--- lib/CL/pocl_cl.h 2012-05-29 12:16:56 +0000
809+++ lib/CL/pocl_cl.h 2012-05-30 08:34:23 +0000
810@@ -303,6 +303,15 @@
811 /* in case this is a sub buffer, this points to the parent
812 buffer */
813 cl_mem_t *parent;
814+ /* Image flags */
815+ cl_bool is_image;
816+ cl_channel_order image_channel_order;
817+ cl_channel_type image_channel_data_type;
818+ size_t image_width;
819+ size_t image_height;
820+ /*size_t image_depth;*/
821+ size_t image_row_pitch;
822+ /*size_t image_slice_pitch;*/
823 };
824
825 struct _cl_program {
826@@ -337,6 +346,8 @@
827 lt_dlhandle dlhandle;
828 cl_int *arg_is_pointer;
829 cl_int *arg_is_local;
830+ cl_int *arg_is_image;
831+ cl_int *arg_is_sampler;
832 cl_uint num_locals;
833 struct pocl_argument *arguments;
834 struct _cl_kernel *next;
835@@ -359,8 +370,13 @@
836
837 };
838
839+typedef struct _cl_sampler cl_sampler_t;
840+
841 struct _cl_sampler {
842- POCL_ICD_OBJECT
843+ POCL_ICD_OBJECT;
844+ cl_bool normalized_coords;
845+ cl_addressing_mode addressing_mode;
846+ cl_filter_mode filter_mode;
847 };
848
849 #define POCL_PROFILE_QUEUED \
850
851=== added file 'lib/CL/pocl_image_util.c'
852--- lib/CL/pocl_image_util.c 1970-01-01 00:00:00 +0000
853+++ lib/CL/pocl_image_util.c 2012-05-30 08:34:23 +0000
854@@ -0,0 +1,228 @@
855+#include "pocl_cl.h"
856+#include "pocl_image_util.h"
857+#include "assert.h"
858+
859+extern void
860+pocl_get_image_information (cl_mem image,
861+ int* channels_out,
862+ int* elem_size_out)
863+ {
864+ cl_channel_order order = image->image_channel_order;
865+ cl_channel_type type = image->image_channel_data_type;
866+
867+ int host_elem_size;
868+ if (type == CL_FLOAT)
869+ host_elem_size=4;
870+ else if (type==CL_UNORM_INT8)
871+ host_elem_size=1;
872+ else
873+ POCL_ABORT_UNIMPLEMENTED();
874+ if (elem_size_out != NULL)
875+ *elem_size_out = host_elem_size;
876+
877+ int host_channels;
878+ if (order == CL_RGBA)
879+ host_channels=4;
880+ else if (order == CL_R)
881+ host_channels=1;
882+ else
883+ POCL_ABORT_UNIMPLEMENTED();
884+ if (channels_out != NULL)
885+ *channels_out = host_channels;
886+ }
887+extern cl_int
888+pocl_write_image (cl_mem image,
889+ cl_device_id device_id,
890+ const size_t * origin_, /*[3]*/
891+ const size_t * region_, /*[3]*/
892+ size_t host_row_pitch,
893+ size_t host_slice_pitch,
894+ const void * ptr)
895+ {
896+ if (image == NULL)
897+ return CL_INVALID_MEM_OBJECT;
898+
899+ if ((ptr == NULL) ||
900+ (region_ == NULL))
901+ return CL_INVALID_VALUE;
902+
903+ int width = image->image_width;
904+ int height = image->image_height;
905+ cl_channel_order order = image->image_channel_order;
906+ cl_channel_type type = image->image_channel_data_type;
907+
908+ size_t dev_elem_size = sizeof(cl_float);
909+ int dev_channels = 4;
910+
911+ int host_elem_size;
912+ int host_channels;
913+ pocl_get_image_information (image, &host_channels, &host_elem_size);
914+
915+ size_t origin[3] = { origin_[0]*dev_elem_size*dev_channels, origin_[1], origin_[2] };
916+ size_t region[3] = { region_[0]*dev_elem_size*dev_channels, region_[1], region_[2] };
917+
918+ size_t image_row_pitch = width*dev_elem_size*dev_channels;
919+ size_t image_slice_pitch = 0;
920+
921+ if ((region[0]*region[1]*region[2] > 0) &&
922+ (region[0]-1 +
923+ image_row_pitch * (region[1]-1) +
924+ image_slice_pitch * (region[2]-1) >= image->size))
925+ return CL_INVALID_VALUE;
926+
927+ cl_float* temp = malloc( width*height*dev_channels*dev_elem_size );
928+
929+ if (temp == NULL)
930+ return CL_OUT_OF_HOST_MEMORY;
931+
932+ int x, y, k;
933+
934+ for (y=0; y<height; y++)
935+ for (x=0; x<width*dev_channels; x++)
936+ temp[x+y*width*dev_channels] = 0.f;
937+
938+ for (y=0; y<height; y++)
939+ {
940+ for (x=0; x<width; x++)
941+ {
942+ cl_float elem[4]; //TODO 0,0,0,0 for some modes?
943+
944+ for (k=0; k<host_channels; k++)
945+ {
946+ if (type == CL_FLOAT)
947+ elem[k] = ((float*)ptr)[k+(x+y*width)*host_channels];
948+ else if (type==CL_UNORM_INT8)
949+ {
950+ cl_uchar foo = ((cl_uchar*)ptr)[k+(x+y*width)*host_channels];
951+ elem[k] = (float)(foo) * (1.f/255.f);
952+ }
953+ else
954+ POCL_ABORT_UNIMPLEMENTED();
955+ }
956+
957+ if (order == CL_RGBA)
958+ for (k=0; k<4; k++)
959+ temp[(x+y*width)*dev_channels+k] = elem[k];
960+ else if (order == CL_R)
961+ {
962+ temp[(x+y*width)*dev_channels+0] = elem[0];
963+ temp[(x+y*width)*dev_channels+1] = 0.f;
964+ temp[(x+y*width)*dev_channels+2] = 0.f;
965+ temp[(x+y*width)*dev_channels+3] = 1.f;
966+ }
967+ }
968+ }
969+
970+
971+ device_id->write_rect(device_id->data, temp,
972+ image->device_ptrs[device_id->dev_id],
973+ origin, origin, region,
974+ image_row_pitch, image_slice_pitch,
975+ image_row_pitch, image_slice_pitch);
976+
977+ free (temp);
978+ return CL_SUCCESS;
979+ }
980+
981+extern cl_int
982+pocl_read_image (cl_mem image,
983+ cl_device_id device_id,
984+ const size_t * origin_, /*[3]*/
985+ const size_t * region_, /*[3]*/
986+ size_t host_row_pitch,
987+ size_t host_slice_pitch,
988+ void * ptr)
989+ {
990+
991+ if (image == NULL)
992+ return CL_INVALID_MEM_OBJECT;
993+
994+ if ((ptr == NULL) ||
995+ (region_ == NULL))
996+ return CL_INVALID_VALUE;
997+
998+ int width = image->image_width;
999+ int height = image->image_height;
1000+ int dev_elem_size = sizeof(cl_float);
1001+ int dev_channels = 4;
1002+ size_t origin[3] = { origin_[0]*dev_elem_size*dev_channels, origin_[1], origin_[2] };
1003+ size_t region[3] = { region_[0]*dev_elem_size*dev_channels, region_[1], region_[2] };
1004+
1005+ size_t image_row_pitch = width*dev_elem_size*dev_channels;
1006+ size_t image_slice_pitch = 0;
1007+
1008+ if ((region[0]*region[1]*region[2] > 0) &&
1009+ (region[0]-1 +
1010+ image_row_pitch * (region[1]-1) +
1011+ image_slice_pitch * (region[2]-1) >= image->size))
1012+ return CL_INVALID_VALUE;
1013+
1014+
1015+ int i, j, k;
1016+
1017+ cl_channel_order order = image->image_channel_order;
1018+ cl_channel_type type = image->image_channel_data_type;
1019+
1020+ cl_float* temp = malloc( width*height*dev_channels*dev_elem_size );
1021+
1022+ if (temp == NULL)
1023+ return CL_OUT_OF_HOST_MEMORY;
1024+
1025+ int host_channels, host_elem_size;
1026+
1027+ pocl_get_image_information(image, &host_channels, &host_elem_size);
1028+
1029+ if (host_row_pitch == 0) {
1030+ host_row_pitch = width*host_channels;
1031+ }
1032+
1033+ size_t buffer_origin[3] = { 0, 0, 0 };
1034+
1035+ device_id->read_rect(device_id->data, temp,
1036+ image->device_ptrs[device_id->dev_id],
1037+ origin, origin, region,
1038+ image_row_pitch, image_slice_pitch,
1039+ image_row_pitch, image_slice_pitch);
1040+
1041+ for (j=0; j<height; j++) {
1042+ for (i=0; i<width; i++) {
1043+ cl_float elem[4];
1044+
1045+ for (k=0; k<4; k++)
1046+ elem[k]=0;
1047+
1048+ if (order == CL_RGBA) {
1049+ for (k=0; k<4; k++)
1050+ elem[k] = temp[i*dev_channels + j*width*dev_channels + k];
1051+ }
1052+ else if (order == CL_R) { // host_channels == 1
1053+ elem[0] = temp[i*dev_channels + j*width*dev_channels + 0];
1054+ }
1055+
1056+ if (type == CL_UNORM_INT8)
1057+ { // host_channels == 4
1058+ for (k=0; k<host_channels; k++)
1059+ {
1060+ ((cl_uchar*)ptr)[i*host_channels + j*host_row_pitch + k]
1061+ = (unsigned char)(255*elem[k]);
1062+ }
1063+ }
1064+ else if (type == CL_FLOAT)
1065+ {
1066+ for (k=0; k<host_channels; k++)
1067+ {
1068+ POCL_ABORT_UNIMPLEMENTED();
1069+ ((cl_float*)ptr)[i*host_channels + j*host_row_pitch + k]
1070+ = elem[k];
1071+ }
1072+ }
1073+ else
1074+ POCL_ABORT_UNIMPLEMENTED();
1075+ }
1076+ }
1077+
1078+ free (temp);
1079+
1080+ return CL_SUCCESS;
1081+ }
1082+
1083\ No newline at end of file
1084
1085=== added file 'lib/CL/pocl_image_util.h'
1086--- lib/CL/pocl_image_util.h 1970-01-01 00:00:00 +0000
1087+++ lib/CL/pocl_image_util.h 2012-05-30 08:34:23 +0000
1088@@ -0,0 +1,50 @@
1089+/* OpenCL runtime library: pocl_image_util image utility functions
1090+
1091+ Copyright (c) 2012 Timo Viitanen / Tampere University of Technology
1092+
1093+ Permission is hereby granted, free of charge, to any person obtaining a copy
1094+ of this software and associated documentation files (the "Software"), to deal
1095+ in the Software without restriction, including without limitation the rights
1096+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
1097+ copies of the Software, and to permit persons to whom the Software is
1098+ furnished to do so, subject to the following conditions:
1099+
1100+ The above copyright notice and this permission notice shall be included in
1101+ all copies or substantial portions of the Software.
1102+
1103+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1104+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1105+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
1106+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1107+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
1108+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
1109+ THE SOFTWARE.
1110+*/
1111+
1112+#ifndef POCL_IMAGE_UTIL_H
1113+#define POCL_IMAGE_UTIL_H
1114+
1115+extern cl_int
1116+pocl_write_image (cl_mem image,
1117+ cl_device_id device_id,
1118+ const size_t * origin_, /*[3]*/
1119+ const size_t * region_, /*[3]*/
1120+ size_t host_row_pitch,
1121+ size_t host_slice_pitch,
1122+ const void * ptr);
1123+
1124+extern cl_int
1125+pocl_read_image (cl_mem image,
1126+ cl_device_id device,
1127+ const size_t * origin, /*[3]*/
1128+ const size_t * region, /*[3]*/
1129+ size_t host_row_pitch,
1130+ size_t host_slice_pitch,
1131+ void * ptr);
1132+
1133+extern void
1134+pocl_get_image_information (cl_mem image,
1135+ int* host_channels,
1136+ int* host_elem_size);
1137+
1138+#endif
1139
1140=== added file 'lib/kernel/get_image_height.cl'
1141--- lib/kernel/get_image_height.cl 1970-01-01 00:00:00 +0000
1142+++ lib/kernel/get_image_height.cl 2012-05-30 08:34:23 +0000
1143@@ -0,0 +1,7 @@
1144+#include "templates.h"
1145+#include "image.h"
1146+
1147+int get_image_width (image2d_t image)
1148+{
1149+ return image->height;
1150+}
1151\ No newline at end of file
1152
1153=== added file 'lib/kernel/get_image_width.cl'
1154--- lib/kernel/get_image_width.cl 1970-01-01 00:00:00 +0000
1155+++ lib/kernel/get_image_width.cl 2012-05-30 08:34:23 +0000
1156@@ -0,0 +1,7 @@
1157+#include "templates.h"
1158+#include "image.h"
1159+
1160+int get_image_width (image2d_t image)
1161+{
1162+ return image->width;
1163+}
1164\ No newline at end of file
1165
1166=== added file 'lib/kernel/image.h'
1167--- lib/kernel/image.h 1970-01-01 00:00:00 +0000
1168+++ lib/kernel/image.h 2012-05-30 08:34:23 +0000
1169@@ -0,0 +1,15 @@
1170+#ifndef __IMAGE_H__
1171+#define __IMAGE_H__
1172+
1173+#include "templates.h"
1174+
1175+typedef struct image2d_t_ {
1176+ uchar4* data;
1177+ int width;
1178+ int height;
1179+ int rowpitch;
1180+ int order;
1181+ int data_type;
1182+} image2d_t_;
1183+
1184+#endif
1185
1186=== added file 'lib/kernel/read_image.cl'
1187--- lib/kernel/read_image.cl 1970-01-01 00:00:00 +0000
1188+++ lib/kernel/read_image.cl 2012-05-30 08:34:23 +0000
1189@@ -0,0 +1,30 @@
1190+#include "templates.h"
1191+
1192+#include "image.h"
1193+
1194+float4 _cl_overloadable read_imagef ( image2d_t image,
1195+ sampler_t sampler,
1196+ int2 coord) {
1197+ //TODO: Sampler options
1198+ if( coord.x<0 )
1199+ coord.x=0;
1200+ if( coord.y<0 )
1201+ coord.y=0;
1202+ if( coord.x>=image->width )
1203+ coord.x=image->width-1;
1204+ if( coord.y>=image->height )
1205+ coord.y=image->height-1;
1206+
1207+ float4 color = ((float4*)image->data)[ coord.x + coord.y*image->rowpitch ];
1208+
1209+ return color;
1210+}
1211+
1212+float4 _cl_overloadable read_imagef ( image2d_t image,
1213+ sampler_t sampler,
1214+ float2 coord) {
1215+
1216+ float4 color = ((float4*)image->data)[ (int)coord.x + (int)coord.y*image->rowpitch ];
1217+
1218+ return color;
1219+}
1220
1221=== modified file 'lib/kernel/sources.mk'
1222--- lib/kernel/sources.mk 2012-03-05 15:33:38 +0000
1223+++ lib/kernel/sources.mk 2012-05-30 08:34:23 +0000
1224@@ -130,7 +130,11 @@
1225 vload_half.cl \
1226 vstore_half.cl \
1227 async_work_group_copy.cl \
1228- wait_group_events.cl
1229+ wait_group_events.cl \
1230+ read_image.cl \
1231+ write_image.cl \
1232+ get_image_width.cl \
1233+ get_image_height.cl
1234
1235
1236
1237
1238=== added file 'lib/kernel/write_image.cl'
1239--- lib/kernel/write_image.cl 1970-01-01 00:00:00 +0000
1240+++ lib/kernel/write_image.cl 2012-05-30 08:34:23 +0000
1241@@ -0,0 +1,17 @@
1242+#include "templates.h"
1243+
1244+#include "image.h"
1245+
1246+//typedef struct image2d_t_* image2d_t;
1247+
1248+void _cl_overloadable write_imagef ( image2d_t image,
1249+ int2 coord,
1250+ float4 color) {
1251+ ((float4*)image->data)[ coord.x + coord.y*image->rowpitch ] = color;
1252+}
1253+
1254+void _cl_overloadable write_imagei ( image2d_t image,
1255+ int2 coord,
1256+ int4 color) {
1257+ ((float4*)image->data)[ coord.x + coord.y*image->rowpitch ] = (float4)(color.x,color.y,color.z,color.w);
1258+}
1259
1260=== modified file 'lib/llvmopencl/GenerateHeader.cc'
1261--- lib/llvmopencl/GenerateHeader.cc 2012-02-24 17:50:01 +0000
1262+++ lib/llvmopencl/GenerateHeader.cc 2012-05-30 08:34:23 +0000
1263@@ -123,6 +123,8 @@
1264 return changed;
1265 }
1266
1267+#include <iostream>
1268+
1269
1270
1271 void
1272 GenerateHeader::ProcessPointers(Function *F,
1273@@ -134,12 +136,17 @@
1274
1275 bool is_pointer[num_args];
1276 bool is_local[num_args];
1277+ bool is_image[num_args];
1278+ bool is_sampler[num_args];
1279
1280 int i = 0;
1281 for (Function::const_arg_iterator ii = F->arg_begin(),
1282 ee = F->arg_end();
1283 ii != ee; ++ii) {
1284 Type *t = ii->getType();
1285+
1286+ is_image[i] = false;
1287+ is_sampler[i] = false;
1288
1289 if (const PointerType *p = dyn_cast<PointerType> (t)) {
1290 is_pointer[i] = true;
1291@@ -153,6 +160,22 @@
1292 is_pointer[i] = false;
1293 is_local[i] = false;
1294 }
1295+
1296+ if( t->isPointerTy() ) {
1297+ if( t->getPointerElementType()->isStructTy() ) {
1298+ string name = t->getPointerElementType()->getStructName().str();
1299+ if( name == "struct.image2d_t_" ) { // TODO image3d?
1300+ is_image[i] = true;
1301+ is_pointer[i] = false;
1302+ is_local[i] = false;
1303+ }
1304+ if( name == "struct.sampler_t_" ) {
1305+ is_sampler[i] = true;
1306+ is_pointer[i] = false;
1307+ is_local[i] = false;
1308+ }
1309+ }
1310+ }
1311
1312 ++i;
1313 }
1314@@ -172,6 +195,22 @@
1315 out << ", " << is_local[i];
1316 }
1317 out << "}\n";
1318+
1319+ out << "#define _" << F->getName() << "_ARG_IS_IMAGE {";
1320+ if (num_args != 0) {
1321+ out << is_image[0];
1322+ for (i = 1; i < num_args; ++i)
1323+ out << ", " << is_image[i];
1324+ }
1325+ out << "}\n";
1326+
1327+ out << "#define _" << F->getName() << "_ARG_IS_SAMPLER {";
1328+ if (num_args != 0) {
1329+ out << is_sampler[0];
1330+ for (i = 1; i < num_args; ++i)
1331+ out << ", " << is_sampler[i];
1332+ }
1333+ out << "}\n";
1334 }
1335
1336
1337
1338
1339=== modified file 'scripts/pocl-kernel.in'
1340--- scripts/pocl-kernel.in 2012-05-14 17:44:09 +0000
1341+++ scripts/pocl-kernel.in 2012-05-30 08:34:23 +0000
1342@@ -60,6 +60,8 @@
1343 unsigned _num_args = _${kernel}_NUM_ARGS;
1344 int _arg_is_pointer[] = _${kernel}_ARG_IS_POINTER;
1345 int _arg_is_local[] = _${kernel}_ARG_IS_LOCAL;
1346+int _arg_is_image[] = _${kernel}_ARG_IS_IMAGE;
1347+int _arg_is_sampler[] = _${kernel}_ARG_IS_SAMPLER;
1348 unsigned _num_locals = _${kernel}_NUM_LOCALS;
1349 #if _${kernel}_NUM_LOCALS != 0
1350 unsigned _local_sizes[_${kernel}_NUM_LOCALS] = _${kernel}_LOCAL_SIZE;
1351@@ -93,6 +95,8 @@
1352 #endif
1353 _${kernel}_ARG_IS_LOCAL,
1354 _${kernel}_ARG_IS_POINTER,
1355+ _${kernel}_ARG_IS_IMAGE,
1356+ _${kernel}_ARG_IS_SAMPLER,
1357 _${kernel}_workgroup_fast
1358 };
1359 EOF
1360
1361=== modified file 'tests/testsuite-samples.at'
1362--- tests/testsuite-samples.at 2012-05-24 21:26:09 +0000
1363+++ tests/testsuite-samples.at 2012-05-30 08:34:23 +0000
1364@@ -73,6 +73,32 @@
1365
1366 AT_CLEANUP
1367
1368+AT_SETUP([Run Chapter 8: ImageFilter2D])
1369+AT_KEYWORDS([booksamples imagefilter2d])
1370+AT_SKIP_IF([! test -e $abs_top_srcdir/examples/opencl-book-samples/checkout])
1371+AT_CHECK_UNQUOTED([
1372+cd ${abs_top_srcdir}/examples/opencl-book-samples/checkout/src/Chapter_8/ImageFilter2D ;
1373+#sed '13c\ ' -i ImageFilter2D.cl ;
1374+#sed '14c\ ' -i ImageFilter2D.cl ;
1375+#sed '15c\ ' -i ImageFilter2D.cl ;
1376+#sed "3c float kernelWeights[9] = { 1.0f, 2.0f, 1.0f, 2.0f, 4.0f, 2.0f,1.0f, 2.0f, 1.0f }; \ " -i ImageFilter2D.cl ;
1377+cp ${abs_top_srcdir}/examples/opencl-book-samples/ImageFilter2D.cl ./
1378+sed '418cclFinish(commandQueue);' -i ImageFilter2D.cpp ;
1379+cd ${abs_top_srcdir}/examples/opencl-book-samples/checkout/build/src/Chapter_8/ImageFilter2D ;
1380+make]
1381+, 0, [ignore], [ignore])
1382+
1383+AT_CHECK_UNQUOTED([
1384+cd $abs_top_srcdir/examples/opencl-book-samples/checkout/build/src/Chapter_8/ImageFilter2D
1385+./ImageFilter2D ../../../../src/Chapter_19/oclFlow/data/minicooper/frame10.png output.png
1386+], 0,
1387+[Could not create GPU context, trying CPU...
1388+
1389+Executed program succesfully.
1390+], [ignore])
1391+
1392+AT_CLEANUP
1393+
1394 AT_SETUP([Run Chapter 12: VectorAdd (C++ bindings)])
1395 AT_KEYWORDS([booksamples])
1396 AT_SKIP_IF([! test -e $abs_top_srcdir/examples/opencl-book-samples/checkout])