Merge lp:~viitanet/pocl/ImageSupport into lp:~pocl/pocl/trunk
- ImageSupport
- Merge into trunk
Status: | Merged |
---|---|
Merged at revision: | 306 |
Proposed branch: | lp:~viitanet/pocl/ImageSupport |
Merge into: | lp:~pocl/pocl/trunk |
Diff against target: |
1394 lines (+900/-95) 32 files modified
TODO (+0/-3) examples/opencl-book-samples/ImageFilter2D.cl (+34/-0) examples/opencl-book-samples/Makefile.am (+1/-0) include/_kernel.h (+36/-0) include/pocl_device.h (+2/-0) lib/CL/Makefile.am (+14/-13) lib/CL/clCreateBuffer.c (+1/-0) lib/CL/clCreateImage2D.c (+108/-4) lib/CL/clCreateKernel.c (+2/-0) lib/CL/clCreateSampler.c (+30/-10) lib/CL/clEnqueueCopyBufferToImage.c (+46/-13) lib/CL/clEnqueueReadBufferRect.c (+2/-0) lib/CL/clEnqueueReadImage.c (+23/-16) lib/CL/clEnqueueWriteImage.c (+22/-16) lib/CL/clGetDeviceInfo.c (+1/-2) lib/CL/clGetSupportedImageFormats.c (+49/-10) lib/CL/clReleaseSampler.c (+3/-5) lib/CL/devices/basic/basic.c (+25/-0) lib/CL/devices/dev_image.h (+17/-0) lib/CL/devices/pthread/pthread.c (+39/-1) lib/CL/pocl_cl.h (+17/-1) lib/CL/pocl_image_util.c (+228/-0) lib/CL/pocl_image_util.h (+50/-0) lib/kernel/get_image_height.cl (+7/-0) lib/kernel/get_image_width.cl (+7/-0) lib/kernel/image.h (+15/-0) lib/kernel/read_image.cl (+30/-0) lib/kernel/sources.mk (+5/-1) lib/kernel/write_image.cl (+17/-0) lib/llvmopencl/GenerateHeader.cc (+39/-0) scripts/pocl-kernel.in (+4/-0) tests/testsuite-samples.at (+26/-0) |
To merge this branch: | bzr merge lp:~viitanet/pocl/ImageSupport |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
pocl maintaners | Pending | ||
Review via email: mp+107929@code.launchpad.net |
This proposal supersedes a proposal from 2012-05-28.
Commit message
Description of the change
Implemented parts of OpenCL Image API:
clCreateImage2D
clCreateSampler
clReleaseSampler
clEnqueueCopyBu
clEnqueueWriteImage
clEnqueueReadImage
clGetSupportedI
read_imagef
write_imagef
get_image_width
get_image_height
Supported data types: CL_UNORM_INT8, CL_FLOAT
Supported channel orders: CL_RGBA, CL_R
Sampler always defaults to CLK_NORMALIZED_
Added a 'make check' test from opencl-
Other possible tests:
Chapter 14, histogram, requires atomics
Chapter 19, oclFlow, requires OpenCV
Rodinia-leukocyte crashes on compiler bug (https:/
Rodinia-nn doesn't actually use images, but runs, TODO
Rodinia-gaussian doesn't actually use images; crashes
Rodinia-
- 306. By Pekka Jääskeläinen
-
Some image APIs from Timo.
Preview Diff
1 | === modified file 'TODO' | |||
2 | --- TODO 2012-05-24 21:26:09 +0000 | |||
3 | +++ TODO 2012-05-30 08:34:23 +0000 | |||
4 | @@ -49,15 +49,12 @@ | |||
5 | 49 | * 5.2.1 Creating buffer objects | 49 | * 5.2.1 Creating buffer objects |
6 | 50 | * 5.2.4 Mapping buffer objects | 50 | * 5.2.4 Mapping buffer objects |
7 | 51 | * 5.3 Image objects | 51 | * 5.3 Image objects |
8 | 52 | * clCreateImage2D (deprecated in OpenCL 1.2) (*, R[gaussian, leukocyte, nn]) | ||
9 | 53 | * 5.3.3 Reading, Writing and Copying Image Objects | 52 | * 5.3.3 Reading, Writing and Copying Image Objects |
10 | 54 | * clEnqueueReadImage (*, R[nn]) | ||
11 | 55 | * 5.4 Querying, Umapping, Migrating, ... Mem objects | 53 | * 5.4 Querying, Umapping, Migrating, ... Mem objects |
12 | 56 | * 5.4.1 Retaining and Releasing Memory Objects | 54 | * 5.4.1 Retaining and Releasing Memory Objects |
13 | 57 | * 5.4.2 Unmapping Mapped Memory Objects | 55 | * 5.4.2 Unmapping Mapped Memory Objects |
14 | 58 | * 5.5 Sampler objects | 56 | * 5.5 Sampler objects |
15 | 59 | * 5.5.1 Creating Sampler Objects | 57 | * 5.5.1 Creating Sampler Objects |
16 | 60 | * clCreateSampler (*) | ||
17 | 61 | * 5.6.1 Creating Program Objects | 58 | * 5.6.1 Creating Program Objects |
18 | 62 | * 5.7.1 Creating Kernel Objects | 59 | * 5.7.1 Creating Kernel Objects |
19 | 63 | * 5.9 Event objects | 60 | * 5.9 Event objects |
20 | 64 | 61 | ||
21 | === added file 'examples/opencl-book-samples/ImageFilter2D.cl' | |||
22 | --- examples/opencl-book-samples/ImageFilter2D.cl 1970-01-01 00:00:00 +0000 | |||
23 | +++ examples/opencl-book-samples/ImageFilter2D.cl 2012-05-30 08:34:23 +0000 | |||
24 | @@ -0,0 +1,34 @@ | |||
25 | 1 | |||
26 | 2 | // Gaussian Kernel is: | ||
27 | 3 | // 1 2 1 | ||
28 | 4 | // 2 4 2 | ||
29 | 5 | // 1 2 1 | ||
30 | 6 | float kernelWeights[9] = { 1.0f, 2.0f, 1.0f, 2.0f, 4.0f, 2.0f,1.0f, 2.0f, 1.0f }; | ||
31 | 7 | |||
32 | 8 | // Gaussian filter of image | ||
33 | 9 | __kernel void gaussian_filter(__read_only image2d_t srcImg, | ||
34 | 10 | __write_only image2d_t dstImg, | ||
35 | 11 | sampler_t sampler, | ||
36 | 12 | int width, int height) | ||
37 | 13 | { | ||
38 | 14 | int2 startImageCoord = (int2) (get_global_id(0) - 1, get_global_id(1) - 1); | ||
39 | 15 | int2 endImageCoord = (int2) (get_global_id(0) + 1, get_global_id(1) + 1); | ||
40 | 16 | int2 outImageCoord = (int2) (get_global_id(0), get_global_id(1)); | ||
41 | 17 | |||
42 | 18 | if (outImageCoord.x < width && outImageCoord.y < height) | ||
43 | 19 | { | ||
44 | 20 | int weight = 0; | ||
45 | 21 | float4 outColor = (float4)(0.0f, 0.0f, 0.0f, 0.0f); | ||
46 | 22 | for( int y = startImageCoord.y; y <= endImageCoord.y; y++) | ||
47 | 23 | { | ||
48 | 24 | for( int x = startImageCoord.x; x <= endImageCoord.x; x++) | ||
49 | 25 | { | ||
50 | 26 | outColor += (read_imagef(srcImg, sampler, (int2)(x, y)) * (kernelWeights[weight] / 16.0f)); | ||
51 | 27 | weight += 1; | ||
52 | 28 | } | ||
53 | 29 | } | ||
54 | 30 | |||
55 | 31 | // Write the output value to image | ||
56 | 32 | write_imagef(dstImg, outImageCoord, outColor); | ||
57 | 33 | } | ||
58 | 34 | } | ||
59 | 0 | 35 | ||
60 | === modified file 'examples/opencl-book-samples/Makefile.am' | |||
61 | --- examples/opencl-book-samples/Makefile.am 2012-05-21 08:51:32 +0000 | |||
62 | +++ examples/opencl-book-samples/Makefile.am 2012-05-30 08:34:23 +0000 | |||
63 | @@ -39,6 +39,7 @@ | |||
64 | 39 | sed -i 's/size_t localWorkSize = maxWorkGroupSize/size_t localWorkSize = 2/g' src/Chapter_16/Dijkstra/oclDijkstraKernel.cpp; \ | 39 | sed -i 's/size_t localWorkSize = maxWorkGroupSize/size_t localWorkSize = 2/g' src/Chapter_16/Dijkstra/oclDijkstraKernel.cpp; \ |
65 | 40 | sed -i 's/device.j..type == CL_DEVICE_TYPE_GPU/device[j].type \& CL_DEVICE_TYPE_GPU/g' src/Chapter_22/spmv.c; \ | 40 | sed -i 's/device.j..type == CL_DEVICE_TYPE_GPU/device[j].type \& CL_DEVICE_TYPE_GPU/g' src/Chapter_22/spmv.c; \ |
66 | 41 | sed -i 's/context.CL_DEVICE_TYPE_GPU/context\(CL_DEVICE_TYPE_CPU/g' src/Chapter_12/VectorAdd/vecadd.cpp; \ | 41 | sed -i 's/context.CL_DEVICE_TYPE_GPU/context\(CL_DEVICE_TYPE_CPU/g' src/Chapter_12/VectorAdd/vecadd.cpp; \ |
67 | 42 | rm src/Chapter_12/Sinewave/CMakeLists.txt; \ | ||
68 | 42 | mkdir -p build; cd build; \ | 43 | mkdir -p build; cd build; \ |
69 | 43 | cmake \ | 44 | cmake \ |
70 | 44 | -D CMAKE_CXX_FLAGS:STRING='-g -O3 -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -lGL -lglut ${PTHREAD_CFLAGS}' \ | 45 | -D CMAKE_CXX_FLAGS:STRING='-g -O3 -DCL_USE_DEPRECATED_OPENCL_1_1_APIS -lGL -lglut ${PTHREAD_CFLAGS}' \ |
71 | 45 | 46 | ||
72 | === modified file 'include/_kernel.h' | |||
73 | --- include/_kernel.h 2012-03-06 23:17:03 +0000 | |||
74 | +++ include/_kernel.h 2012-05-30 08:34:23 +0000 | |||
75 | @@ -1772,3 +1772,39 @@ | |||
76 | 1772 | _CL_DECLARE_ASYNC_COPY_FUNCS(float); | 1772 | _CL_DECLARE_ASYNC_COPY_FUNCS(float); |
77 | 1773 | __IF_FP64(_CL_DECLARE_ASYNC_COPY_FUNCS(double)); | 1773 | __IF_FP64(_CL_DECLARE_ASYNC_COPY_FUNCS(double)); |
78 | 1774 | 1774 | ||
79 | 1775 | // Image support | ||
80 | 1776 | |||
81 | 1777 | typedef int sampler_t; | ||
82 | 1778 | |||
83 | 1779 | #define CLK_ADDRESS_NONE 0x00 | ||
84 | 1780 | #define CLK_ADDRESS_MIRRORED_REPEAT 0x01 | ||
85 | 1781 | #define CLK_ADDRESS_REPEAT 0x02 | ||
86 | 1782 | #define CLK_ADDRESS_CLAMP_TO_EDGE 0x03 | ||
87 | 1783 | #define CLK_ADDRESS_CLAMP 0x04 | ||
88 | 1784 | |||
89 | 1785 | #define CLK_NORMALIZED_COORDS_FALSE 0x00 | ||
90 | 1786 | #define CLK_NORMALIZED_COORDS_TRUE 0x08 | ||
91 | 1787 | |||
92 | 1788 | #define CLK_FILTER_NEAREST 0x00 | ||
93 | 1789 | #define CLK_FILTER_LINEAR 0x10 | ||
94 | 1790 | |||
95 | 1791 | typedef struct image2d_t_* image2d_t; | ||
96 | 1792 | |||
97 | 1793 | float4 _cl_overloadable read_imagef( image2d_t image, | ||
98 | 1794 | sampler_t sampler, | ||
99 | 1795 | int2 coord); | ||
100 | 1796 | |||
101 | 1797 | float4 _cl_overloadable read_imagef( image2d_t image, | ||
102 | 1798 | sampler_t sampler, | ||
103 | 1799 | float2 coord); | ||
104 | 1800 | |||
105 | 1801 | void _cl_overloadable write_imagef( image2d_t image, | ||
106 | 1802 | int2 coord, | ||
107 | 1803 | float4 color); | ||
108 | 1804 | |||
109 | 1805 | void _cl_overloadable write_imagei( image2d_t image, | ||
110 | 1806 | int2 coord, | ||
111 | 1807 | int4 color); | ||
112 | 1808 | |||
113 | 1809 | int get_image_width (image2d_t image); | ||
114 | 1810 | int get_image_height (image2d_t image); | ||
115 | 1775 | 1811 | ||
116 | === modified file 'include/pocl_device.h' | |||
117 | --- include/pocl_device.h 2012-05-08 17:37:35 +0000 | |||
118 | +++ include/pocl_device.h 2012-05-30 08:34:23 +0000 | |||
119 | @@ -51,6 +51,8 @@ | |||
120 | 51 | unsigned short alocal_sizes[MAX_KERNEL_ARGS]; | 51 | unsigned short alocal_sizes[MAX_KERNEL_ARGS]; |
121 | 52 | char arg_is_local[MAX_KERNEL_ARGS]; | 52 | char arg_is_local[MAX_KERNEL_ARGS]; |
122 | 53 | char arg_is_pointer[MAX_KERNEL_ARGS]; | 53 | char arg_is_pointer[MAX_KERNEL_ARGS]; |
123 | 54 | char arg_is_image[MAX_KERNEL_ARGS]; | ||
124 | 55 | char arg_is_sampler[MAX_KERNEL_ARGS]; | ||
125 | 54 | pocl_workgroup work_group_func; | 56 | pocl_workgroup work_group_func; |
126 | 55 | } __kernel_metadata; | 57 | } __kernel_metadata; |
127 | 56 | 58 | ||
128 | 57 | 59 | ||
129 | === modified file 'lib/CL/Makefile.am' | |||
130 | --- lib/CL/Makefile.am 2012-05-28 12:03:51 +0000 | |||
131 | +++ lib/CL/Makefile.am 2012-05-30 08:34:23 +0000 | |||
132 | @@ -38,9 +38,9 @@ | |||
133 | 38 | clGetCommandQueueInfo.c \ | 38 | clGetCommandQueueInfo.c \ |
134 | 39 | clCreateBuffer.c \ | 39 | clCreateBuffer.c \ |
135 | 40 | clCreateSubBuffer.c \ | 40 | clCreateSubBuffer.c \ |
139 | 41 | clEnqueueReadBuffer.c \ | 41 | clEnqueueReadBuffer.c \ |
140 | 42 | clEnqueueReadBufferRect.c \ | 42 | clEnqueueReadBufferRect.c \ |
141 | 43 | clEnqueueMapBuffer.c \ | 43 | clEnqueueMapBuffer.c \ |
142 | 44 | clEnqueueUnmapMemObject.c \ | 44 | clEnqueueUnmapMemObject.c \ |
143 | 45 | clReleaseMemObject.c \ | 45 | clReleaseMemObject.c \ |
144 | 46 | clRetainMemObject.c \ | 46 | clRetainMemObject.c \ |
145 | @@ -89,23 +89,24 @@ | |||
146 | 89 | clEnqueueTask.c \ | 89 | clEnqueueTask.c \ |
147 | 90 | clCreateImage2D.c \ | 90 | clCreateImage2D.c \ |
148 | 91 | clCreateImage3D.c \ | 91 | clCreateImage3D.c \ |
151 | 92 | clEnqueueReadImage.c \ | 92 | clEnqueueReadImage.c \ |
152 | 93 | clEnqueueWriteImage.c \ | 93 | clEnqueueWriteImage.c \ |
153 | 94 | clCreateSampler.c \ | ||
154 | 95 | clReleaseSampler.c \ | ||
155 | 96 | clRetainSampler.c \ | ||
156 | 97 | clGetSamplerInfo.c \ | ||
157 | 94 | clEnqueueCopyImage.c \ | 98 | clEnqueueCopyImage.c \ |
160 | 95 | clEnqueueMapImage.c \ | 99 | clEnqueueMapImage.c \ \ |
159 | 96 | clGetSupportedImageFormats.c \ | ||
161 | 97 | clGetImageInfo.c \ | 100 | clGetImageInfo.c \ |
162 | 98 | clCreateFromGLTexture2D.c \ | 101 | clCreateFromGLTexture2D.c \ |
163 | 99 | clCreateFromGLTexture3D.c \ | 102 | clCreateFromGLTexture3D.c \ |
164 | 100 | clUnloadCompiler.c \ | 103 | clUnloadCompiler.c \ |
165 | 104 | clGetSupportedImageFormats.c \ | ||
166 | 101 | clGetExtensionFunctionAddress.c \ | 105 | clGetExtensionFunctionAddress.c \ |
167 | 102 | clIcdGetPlatformIDsKHR.c \ | 106 | clIcdGetPlatformIDsKHR.c \ |
174 | 103 | clCreateSampler.c \ | 107 | pocl_cl.h \ |
175 | 104 | clReleaseSampler.c \ | 108 | pocl_util.c pocl_util.h \ |
176 | 105 | clRetainSampler.c \ | 109 | pocl_image_util.c pocl_image_util.h |
171 | 106 | clGetSamplerInfo.c \ | ||
172 | 107 | pocl_cl.h \ | ||
173 | 108 | pocl_util.c pocl_util.h | ||
177 | 109 | 110 | ||
178 | 110 | 111 | ||
179 | 111 | 112 | ||
180 | 112 | 113 | ||
181 | === modified file 'lib/CL/clCreateBuffer.c' | |||
182 | --- lib/CL/clCreateBuffer.c 2012-05-24 20:46:33 +0000 | |||
183 | +++ lib/CL/clCreateBuffer.c 2012-05-30 08:34:23 +0000 | |||
184 | @@ -49,6 +49,7 @@ | |||
185 | 49 | mem->map_count = 0; | 49 | mem->map_count = 0; |
186 | 50 | mem->mappings = NULL; | 50 | mem->mappings = NULL; |
187 | 51 | mem->flags = flags; | 51 | mem->flags = flags; |
188 | 52 | mem->is_image = CL_FALSE; | ||
189 | 52 | POCL_INIT_ICD_OBJECT(mem); | 53 | POCL_INIT_ICD_OBJECT(mem); |
190 | 53 | 54 | ||
191 | 54 | /* Store the per device buffer pointers always to a known | 55 | /* Store the per device buffer pointers always to a known |
192 | 55 | 56 | ||
193 | === modified file 'lib/CL/clCreateImage2D.c' | |||
194 | --- lib/CL/clCreateImage2D.c 2012-01-30 12:21:12 +0000 | |||
195 | +++ lib/CL/clCreateImage2D.c 2012-05-30 08:34:23 +0000 | |||
196 | @@ -1,17 +1,121 @@ | |||
197 | 1 | #include "pocl_cl.h" | 1 | #include "pocl_cl.h" |
198 | 2 | #include "assert.h" | ||
199 | 3 | #include "pocl_image_util.h" | ||
200 | 4 | |||
201 | 2 | CL_API_ENTRY cl_mem CL_API_CALL | 5 | CL_API_ENTRY cl_mem CL_API_CALL |
202 | 3 | clCreateImage2D(cl_context context, | 6 | clCreateImage2D(cl_context context, |
203 | 4 | cl_mem_flags flags, | 7 | cl_mem_flags flags, |
204 | 5 | const cl_image_format * image_format, | 8 | const cl_image_format * image_format, |
207 | 6 | size_t image_width, | 9 | size_t width, |
208 | 7 | size_t image_height, | 10 | size_t height, |
209 | 8 | size_t image_row_pitch, | 11 | size_t image_row_pitch, |
210 | 9 | void * host_ptr, | 12 | void * host_ptr, |
211 | 10 | cl_int * errcode_ret) | 13 | cl_int * errcode_ret) |
212 | 11 | CL_API_SUFFIX__VERSION_1_0 | 14 | CL_API_SUFFIX__VERSION_1_0 |
213 | 12 | { | 15 | { |
216 | 13 | POCL_ABORT_UNIMPLEMENTED(); | 16 | cl_mem mem; |
217 | 14 | return CL_SUCCESS; | 17 | cl_device_id device_id; |
218 | 18 | void *device_ptr; | ||
219 | 19 | unsigned i, j; | ||
220 | 20 | int size; | ||
221 | 21 | |||
222 | 22 | if (context == NULL) | ||
223 | 23 | POCL_ERROR(CL_INVALID_CONTEXT); | ||
224 | 24 | |||
225 | 25 | mem = (cl_mem) malloc(sizeof(struct _cl_mem)); | ||
226 | 26 | if (mem == NULL) | ||
227 | 27 | POCL_ERROR(CL_OUT_OF_HOST_MEMORY); | ||
228 | 28 | |||
229 | 29 | POCL_INIT_OBJECT(mem); | ||
230 | 30 | mem->parent = NULL; | ||
231 | 31 | mem->map_count = 0; | ||
232 | 32 | mem->mappings = NULL; | ||
233 | 33 | mem->flags = flags; | ||
234 | 34 | mem->is_image = CL_TRUE; | ||
235 | 35 | |||
236 | 36 | cl_channel_order order = image_format->image_channel_order; | ||
237 | 37 | cl_channel_type type = image_format->image_channel_data_type; | ||
238 | 38 | |||
239 | 39 | int dev_elem_size = sizeof(cl_float); //TODO | ||
240 | 40 | int dev_channels = 4; | ||
241 | 41 | |||
242 | 42 | if (image_row_pitch == 0) | ||
243 | 43 | image_row_pitch = width; | ||
244 | 44 | |||
245 | 45 | if (image_row_pitch != width) | ||
246 | 46 | POCL_ABORT_UNIMPLEMENTED(); | ||
247 | 47 | |||
248 | 48 | size = width * height * dev_elem_size * dev_channels; | ||
249 | 49 | |||
250 | 50 | mem->device_ptrs = (void **) malloc(context->num_devices * sizeof(void *)); | ||
251 | 51 | if (mem->device_ptrs == NULL) | ||
252 | 52 | { | ||
253 | 53 | free(mem); | ||
254 | 54 | POCL_ERROR(CL_OUT_OF_HOST_MEMORY); | ||
255 | 55 | } | ||
256 | 56 | |||
257 | 57 | int host_channels; | ||
258 | 58 | |||
259 | 59 | if (order == CL_RGBA) | ||
260 | 60 | host_channels=4; | ||
261 | 61 | else if (order == CL_R) | ||
262 | 62 | host_channels=1; | ||
263 | 63 | else | ||
264 | 64 | POCL_ABORT_UNIMPLEMENTED(); | ||
265 | 65 | |||
266 | 66 | mem->size = size; | ||
267 | 67 | mem->context = context; | ||
268 | 68 | |||
269 | 69 | mem->image_width = width; | ||
270 | 70 | mem->image_height = height; | ||
271 | 71 | mem->image_row_pitch = image_row_pitch; | ||
272 | 72 | mem->image_channel_data_type = type; | ||
273 | 73 | mem->image_channel_order = order; | ||
274 | 74 | |||
275 | 75 | for (i = 0; i < context->num_devices; ++i) | ||
276 | 76 | { | ||
277 | 77 | if (i > 0) | ||
278 | 78 | clRetainMemObject (mem); | ||
279 | 79 | device_id = context->devices[i]; | ||
280 | 80 | device_ptr = device_id->malloc(device_id->data, 0, size, NULL); | ||
281 | 81 | |||
282 | 82 | if (device_ptr == NULL) | ||
283 | 83 | { | ||
284 | 84 | for (j = 0; j < i; ++j) | ||
285 | 85 | { | ||
286 | 86 | device_id = context->devices[j]; | ||
287 | 87 | device_id->free(device_id->data, 0, mem->device_ptrs[j]); | ||
288 | 88 | } | ||
289 | 89 | free(mem); | ||
290 | 90 | POCL_ERROR(CL_MEM_OBJECT_ALLOCATION_FAILURE); | ||
291 | 91 | } | ||
292 | 92 | mem->device_ptrs[i] = device_ptr; | ||
293 | 93 | /* The device allocator allocated from a device-host shared memory. */ | ||
294 | 94 | if (flags & CL_MEM_ALLOC_HOST_PTR || | ||
295 | 95 | flags & CL_MEM_USE_HOST_PTR) | ||
296 | 96 | POCL_ABORT_UNIMPLEMENTED(); | ||
297 | 97 | |||
298 | 98 | if (flags & CL_MEM_COPY_HOST_PTR) | ||
299 | 99 | { | ||
300 | 100 | size_t origin[3] = { 0, 0, 0 }; | ||
301 | 101 | size_t region[3] = { width, height, 1 }; | ||
302 | 102 | pocl_write_image( mem, | ||
303 | 103 | context->devices[i], | ||
304 | 104 | origin, | ||
305 | 105 | region, | ||
306 | 106 | 0, | ||
307 | 107 | 1, | ||
308 | 108 | host_ptr ); | ||
309 | 109 | } | ||
310 | 110 | } | ||
311 | 111 | |||
312 | 112 | POCL_RETAIN_OBJECT(context); | ||
313 | 113 | |||
314 | 114 | |||
315 | 115 | if (errcode_ret != NULL) | ||
316 | 116 | *errcode_ret = CL_SUCCESS; | ||
317 | 117 | |||
318 | 118 | return mem; | ||
319 | 15 | } | 119 | } |
320 | 16 | 120 | ||
321 | 17 | 121 | ||
322 | 18 | 122 | ||
323 | === modified file 'lib/CL/clCreateKernel.c' | |||
324 | --- lib/CL/clCreateKernel.c 2012-05-23 08:38:12 +0000 | |||
325 | +++ lib/CL/clCreateKernel.c 2012-05-30 08:34:23 +0000 | |||
326 | @@ -143,6 +143,8 @@ | |||
327 | 143 | kernel->dlhandle = dlhandle; /* TODO: why is this stored? */ | 143 | kernel->dlhandle = dlhandle; /* TODO: why is this stored? */ |
328 | 144 | kernel->arg_is_pointer = lt_dlsym(dlhandle, "_arg_is_pointer"); | 144 | kernel->arg_is_pointer = lt_dlsym(dlhandle, "_arg_is_pointer"); |
329 | 145 | kernel->arg_is_local = lt_dlsym(dlhandle, "_arg_is_local"); | 145 | kernel->arg_is_local = lt_dlsym(dlhandle, "_arg_is_local"); |
330 | 146 | kernel->arg_is_image = lt_dlsym(dlhandle, "_arg_is_image"); | ||
331 | 147 | kernel->arg_is_sampler = lt_dlsym(dlhandle, "_arg_is_sampler"); | ||
332 | 146 | kernel->num_locals = *(cl_uint *) lt_dlsym(dlhandle, "_num_locals"); | 148 | kernel->num_locals = *(cl_uint *) lt_dlsym(dlhandle, "_num_locals"); |
333 | 147 | kernel->arguments = | 149 | kernel->arguments = |
334 | 148 | (struct pocl_argument *) malloc ((kernel->num_args + kernel->num_locals) * | 150 | (struct pocl_argument *) malloc ((kernel->num_args + kernel->num_locals) * |
335 | 149 | 151 | ||
336 | === modified file 'lib/CL/clCreateSampler.c' | |||
337 | --- lib/CL/clCreateSampler.c 2012-05-22 21:26:29 +0000 | |||
338 | +++ lib/CL/clCreateSampler.c 2012-05-30 08:34:23 +0000 | |||
339 | @@ -1,13 +1,33 @@ | |||
340 | 1 | #include "pocl_cl.h" | 1 | #include "pocl_cl.h" |
348 | 2 | 2 | extern CL_API_ENTRY cl_sampler CL_API_CALL | |
349 | 3 | CL_API_ENTRY cl_sampler CL_API_CALL | 3 | clCreateSampler(cl_context context, |
350 | 4 | clCreateSampler(cl_context context , | 4 | cl_bool normalized_coords, |
351 | 5 | cl_bool normalized_coords , | 5 | cl_addressing_mode addressing_mode, |
352 | 6 | cl_addressing_mode addressing_mode , | 6 | cl_filter_mode filter_mode, |
353 | 7 | cl_filter_mode filter_mode , | 7 | cl_int * errcode_ret) |
354 | 8 | cl_int * errcode_ret ) CL_API_SUFFIX__VERSION_1_0 | 8 | CL_API_SUFFIX__VERSION_1_0 |
355 | 9 | { | 9 | { |
358 | 10 | POCL_ABORT_UNIMPLEMENTED(); | 10 | cl_sampler sampler; |
359 | 11 | return CL_SUCCESS; | 11 | |
360 | 12 | if (context == NULL) | ||
361 | 13 | POCL_ERROR(CL_INVALID_CONTEXT); | ||
362 | 14 | |||
363 | 15 | sampler = (cl_sampler) malloc(sizeof(struct _cl_sampler)); | ||
364 | 16 | if (sampler == NULL) | ||
365 | 17 | POCL_ERROR(CL_OUT_OF_HOST_MEMORY); | ||
366 | 18 | |||
367 | 19 | if (normalized_coords == CL_TRUE) | ||
368 | 20 | POCL_ABORT_UNIMPLEMENTED(); | ||
369 | 21 | |||
370 | 22 | if (addressing_mode != CL_ADDRESS_CLAMP_TO_EDGE) | ||
371 | 23 | POCL_ABORT_UNIMPLEMENTED(); | ||
372 | 24 | |||
373 | 25 | if (filter_mode != CL_FILTER_NEAREST) | ||
374 | 26 | POCL_ABORT_UNIMPLEMENTED(); | ||
375 | 27 | |||
376 | 28 | sampler->normalized_coords = normalized_coords; | ||
377 | 29 | sampler->addressing_mode = addressing_mode; | ||
378 | 30 | sampler->filter_mode = filter_mode; | ||
379 | 31 | |||
380 | 32 | return sampler; | ||
381 | 12 | } | 33 | } |
382 | 13 | |||
383 | 14 | 34 | ||
384 | === modified file 'lib/CL/clEnqueueCopyBufferToImage.c' | |||
385 | --- lib/CL/clEnqueueCopyBufferToImage.c 2012-05-22 21:26:29 +0000 | |||
386 | +++ lib/CL/clEnqueueCopyBufferToImage.c 2012-05-30 08:34:23 +0000 | |||
387 | @@ -1,17 +1,50 @@ | |||
388 | 1 | #include "pocl_cl.h" | 1 | #include "pocl_cl.h" |
389 | 2 | 2 | ||
399 | 3 | CL_API_ENTRY cl_int CL_API_CALL | 3 | extern CL_API_ENTRY cl_int CL_API_CALL |
400 | 4 | clEnqueueCopyBufferToImage(cl_command_queue command_queue , | 4 | clEnqueueCopyBufferToImage(cl_command_queue command_queue, |
401 | 5 | cl_mem src_buffer , | 5 | cl_mem buffer, |
402 | 6 | cl_mem dst_image , | 6 | cl_mem image, |
403 | 7 | size_t src_offset , | 7 | size_t src_offset, |
404 | 8 | const size_t * dst_origin , | 8 | const size_t * dst_origin, /*[3]*/ |
405 | 9 | const size_t * region , | 9 | const size_t * region, /*[3]*/ |
406 | 10 | cl_uint num_events_in_wait_list , | 10 | cl_uint num_events_in_wait_list, |
407 | 11 | const cl_event * event_wait_list , | 11 | const cl_event * event_wait_list, |
408 | 12 | cl_event * event ) CL_API_SUFFIX__VERSION_1_0 | 12 | cl_event * event ) CL_API_SUFFIX__VERSION_1_0 |
413 | 13 | { | 13 | { |
414 | 14 | POCL_ABORT_UNIMPLEMENTED(); | 14 | if (region == NULL) |
415 | 15 | return CL_SUCCESS; | 15 | return CL_INVALID_VALUE; |
416 | 16 | } | 16 | |
417 | 17 | if (region[2] != 1) //3D image | ||
418 | 18 | POCL_ABORT_UNIMPLEMENTED(); | ||
419 | 19 | |||
420 | 20 | int dev_elem_size = sizeof(cl_float); | ||
421 | 21 | int dev_channels = 4; | ||
422 | 22 | |||
423 | 23 | int host_elem_size; | ||
424 | 24 | int host_channels; | ||
425 | 25 | pocl_get_image_information (image, &host_channels, &host_elem_size); | ||
426 | 26 | |||
427 | 27 | void* temp = malloc (image->size); | ||
428 | 28 | |||
429 | 29 | cl_device_id device_id = command_queue->device; | ||
430 | 17 | 30 | ||
431 | 31 | device_id->read | ||
432 | 32 | (device_id->data, | ||
433 | 33 | temp, | ||
434 | 34 | image->device_ptrs[device_id->dev_id], | ||
435 | 35 | image->size); | ||
436 | 36 | |||
437 | 37 | cl_int ret_code = pocl_write_image | ||
438 | 38 | (image, | ||
439 | 39 | command_queue->device, | ||
440 | 40 | dst_origin, | ||
441 | 41 | region, | ||
442 | 42 | 0, | ||
443 | 43 | 0, | ||
444 | 44 | temp+src_offset); | ||
445 | 45 | |||
446 | 46 | free(temp); | ||
447 | 47 | |||
448 | 48 | return ret_code; | ||
449 | 49 | } | ||
450 | 50 | |||
451 | 18 | \ No newline at end of file | 51 | \ No newline at end of file |
452 | 19 | 52 | ||
453 | === modified file 'lib/CL/clEnqueueReadBufferRect.c' | |||
454 | --- lib/CL/clEnqueueReadBufferRect.c 2012-05-14 11:45:48 +0000 | |||
455 | +++ lib/CL/clEnqueueReadBufferRect.c 2012-05-30 08:34:23 +0000 | |||
456 | @@ -23,6 +23,7 @@ | |||
457 | 23 | 23 | ||
458 | 24 | #include "pocl_cl.h" | 24 | #include "pocl_cl.h" |
459 | 25 | #include <assert.h> | 25 | #include <assert.h> |
460 | 26 | #include <stdio.h> | ||
461 | 26 | 27 | ||
462 | 27 | CL_API_ENTRY cl_int CL_API_CALL | 28 | CL_API_ENTRY cl_int CL_API_CALL |
463 | 28 | clEnqueueReadBufferRect(cl_command_queue command_queue, | 29 | clEnqueueReadBufferRect(cl_command_queue command_queue, |
464 | @@ -78,6 +79,7 @@ | |||
465 | 78 | buffer_origin, host_origin, region, | 79 | buffer_origin, host_origin, region, |
466 | 79 | buffer_row_pitch, buffer_slice_pitch, | 80 | buffer_row_pitch, buffer_slice_pitch, |
467 | 80 | host_row_pitch, host_slice_pitch); | 81 | host_row_pitch, host_slice_pitch); |
468 | 82 | |||
469 | 81 | 83 | ||
470 | 82 | return CL_SUCCESS; | 84 | return CL_SUCCESS; |
471 | 83 | } | 85 | } |
472 | 84 | 86 | ||
473 | === modified file 'lib/CL/clEnqueueReadImage.c' | |||
474 | --- lib/CL/clEnqueueReadImage.c 2012-05-22 21:26:29 +0000 | |||
475 | +++ lib/CL/clEnqueueReadImage.c 2012-05-30 08:34:23 +0000 | |||
476 | @@ -1,19 +1,26 @@ | |||
477 | 1 | #include "pocl_cl.h" | 1 | #include "pocl_cl.h" |
478 | 2 | #include "assert.h" | ||
479 | 3 | #include "pocl_image_util.h" | ||
480 | 2 | 4 | ||
493 | 3 | CL_API_ENTRY cl_int CL_API_CALL | 5 | extern CL_API_ENTRY cl_int CL_API_CALL |
494 | 4 | clEnqueueReadImage(cl_command_queue command_queue , | 6 | clEnqueueReadImage(cl_command_queue command_queue, |
495 | 5 | cl_mem image , | 7 | cl_mem image, |
496 | 6 | cl_bool blocking_read , | 8 | cl_bool blocking_read, |
497 | 7 | const size_t * origin , | 9 | const size_t * origin, /* [3] */ |
498 | 8 | const size_t * region , | 10 | const size_t * region, /* [3] */ |
499 | 9 | size_t row_pitch , | 11 | size_t host_row_pitch, |
500 | 10 | size_t slice_pitch , | 12 | size_t host_slice_pitch, |
501 | 11 | void * ptr , | 13 | void * ptr, |
502 | 12 | cl_uint num_events_in_wait_list , | 14 | cl_uint num_events_in_wait_list, |
503 | 13 | const cl_event * event_wait_list , | 15 | const cl_event * event_wait_list, |
504 | 14 | cl_event * event ) CL_API_SUFFIX__VERSION_1_0 | 16 | cl_event * event) |
505 | 17 | CL_API_SUFFIX__VERSION_1_0 | ||
506 | 15 | { | 18 | { |
511 | 16 | POCL_ABORT_UNIMPLEMENTED(); | 19 | return pocl_read_image(image, |
512 | 17 | return CL_SUCCESS; | 20 | command_queue->device, |
513 | 18 | } | 21 | origin, |
514 | 19 | 22 | region, | |
515 | 23 | host_row_pitch, | ||
516 | 24 | host_slice_pitch, | ||
517 | 25 | ptr); | ||
518 | 26 | } | ||
519 | 20 | \ No newline at end of file | 27 | \ No newline at end of file |
520 | 21 | 28 | ||
521 | === modified file 'lib/CL/clEnqueueWriteImage.c' | |||
522 | --- lib/CL/clEnqueueWriteImage.c 2012-05-22 21:26:29 +0000 | |||
523 | +++ lib/CL/clEnqueueWriteImage.c 2012-05-30 08:34:23 +0000 | |||
524 | @@ -1,19 +1,25 @@ | |||
525 | 1 | #include "pocl_cl.h" | 1 | #include "pocl_cl.h" |
526 | 2 | #include "pocl_image_util.h" | ||
527 | 2 | 3 | ||
544 | 3 | CL_API_ENTRY cl_int CL_API_CALL | 4 | extern CL_API_ENTRY cl_int CL_API_CALL |
545 | 4 | clEnqueueWriteImage(cl_command_queue command_queue , | 5 | clEnqueueWriteImage(cl_command_queue command_queue, |
546 | 5 | cl_mem image , | 6 | cl_mem image, |
547 | 6 | cl_bool blocking_write , | 7 | cl_bool blocking_write, |
548 | 7 | const size_t * origin, | 8 | const size_t * origin, /*[3]*/ |
549 | 8 | const size_t * region, | 9 | const size_t * region, /*[3]*/ |
550 | 9 | size_t input_row_pitch , | 10 | size_t host_row_pitch, |
551 | 10 | size_t input_slice_pitch , | 11 | size_t host_slice_pitch, |
552 | 11 | const void * ptr , | 12 | const void * ptr, |
553 | 12 | cl_uint num_events_in_wait_list , | 13 | cl_uint num_events_in_wait_list, |
554 | 13 | const cl_event * event_wait_list , | 14 | const cl_event * event_wait_list, |
555 | 14 | cl_event * event ) CL_API_SUFFIX__VERSION_1_0 | 15 | cl_event * event) CL_API_SUFFIX__VERSION_1_0 |
556 | 15 | { | 16 | { |
557 | 16 | POCL_ABORT_UNIMPLEMENTED(); | 17 | return pocl_write_image(image, |
558 | 17 | return CL_SUCCESS; | 18 | command_queue->device, |
559 | 18 | } | 19 | origin, |
560 | 20 | region, | ||
561 | 21 | host_row_pitch, | ||
562 | 22 | host_slice_pitch, | ||
563 | 23 | ptr); | ||
564 | 24 | } | ||
565 | 19 | 25 | ||
566 | 20 | 26 | ||
567 | === modified file 'lib/CL/clGetDeviceInfo.c' | |||
568 | --- lib/CL/clGetDeviceInfo.c 2012-05-15 22:38:06 +0000 | |||
569 | +++ lib/CL/clGetDeviceInfo.c 2012-05-30 08:34:23 +0000 | |||
570 | @@ -62,8 +62,7 @@ | |||
571 | 62 | switch (param_name) | 62 | switch (param_name) |
572 | 63 | { | 63 | { |
573 | 64 | case CL_DEVICE_IMAGE_SUPPORT: | 64 | case CL_DEVICE_IMAGE_SUPPORT: |
576 | 65 | /* Return CL_FALSE until the APIs are implemented. */ | 65 | POCL_RETURN_DEVICE_INFO(cl_bool, CL_TRUE); |
575 | 66 | POCL_RETURN_DEVICE_INFO(cl_bool, CL_FALSE); | ||
577 | 67 | case CL_DEVICE_TYPE: | 66 | case CL_DEVICE_TYPE: |
578 | 68 | POCL_RETURN_DEVICE_INFO(cl_device_type, device->type); | 67 | POCL_RETURN_DEVICE_INFO(cl_device_type, device->type); |
579 | 69 | case CL_DEVICE_VENDOR_ID: | 68 | case CL_DEVICE_VENDOR_ID: |
580 | 70 | 69 | ||
581 | === modified file 'lib/CL/clGetSupportedImageFormats.c' | |||
582 | --- lib/CL/clGetSupportedImageFormats.c 2012-05-22 21:26:29 +0000 | |||
583 | +++ lib/CL/clGetSupportedImageFormats.c 2012-05-30 08:34:23 +0000 | |||
584 | @@ -1,14 +1,53 @@ | |||
585 | 1 | #include "pocl_cl.h" | 1 | #include "pocl_cl.h" |
586 | 2 | 2 | ||
594 | 3 | CL_API_ENTRY cl_int CL_API_CALL | 3 | extern CL_API_ENTRY cl_int CL_API_CALL |
595 | 4 | clGetSupportedImageFormats(cl_context context , | 4 | clGetSupportedImageFormats(cl_context context, |
596 | 5 | cl_mem_flags flags , | 5 | cl_mem_flags flags, |
597 | 6 | cl_mem_object_type image_type , | 6 | cl_mem_object_type image_type, |
598 | 7 | cl_uint num_entries , | 7 | cl_uint num_entries, |
599 | 8 | cl_image_format * image_formats , | 8 | cl_image_format * image_formats, |
600 | 9 | cl_uint * num_image_formats ) CL_API_SUFFIX__VERSION_1_0 | 9 | cl_uint * num_image_formats) CL_API_SUFFIX__VERSION_1_0 |
601 | 10 | { | 10 | { |
604 | 11 | POCL_ABORT_UNIMPLEMENTED(); | 11 | if (context == NULL) |
605 | 12 | return CL_SUCCESS; | 12 | return CL_INVALID_CONTEXT; |
606 | 13 | |||
607 | 14 | if (image_type != CL_MEM_OBJECT_IMAGE2D) | ||
608 | 15 | return CL_INVALID_VALUE; | ||
609 | 16 | |||
610 | 17 | if (num_entries==0 && image_formats!=NULL) | ||
611 | 18 | return CL_INVALID_VALUE; | ||
612 | 19 | |||
613 | 20 | int idx=0; | ||
614 | 21 | |||
615 | 22 | const int supported_order_count = 2; | ||
616 | 23 | cl_channel_order supported_orders[] = | ||
617 | 24 | { | ||
618 | 25 | CL_RGBA, | ||
619 | 26 | CL_R | ||
620 | 27 | }; | ||
621 | 28 | |||
622 | 29 | const int supported_type_count = 2; | ||
623 | 30 | cl_channel_type supported_types[] = | ||
624 | 31 | { | ||
625 | 32 | CL_UNORM_INT8, | ||
626 | 33 | CL_FLOAT | ||
627 | 34 | }; | ||
628 | 35 | |||
629 | 36 | int i, j; | ||
630 | 37 | for (i=0; i<supported_order_count; i++) | ||
631 | 38 | for (j=0; j<supported_type_count; j++) | ||
632 | 39 | { | ||
633 | 40 | if (idx >= num_entries) | ||
634 | 41 | return CL_SUCCESS; | ||
635 | 42 | |||
636 | 43 | image_formats[idx].image_channel_order = supported_orders[i]; | ||
637 | 44 | image_formats[idx].image_channel_data_type = supported_types[j]; | ||
638 | 45 | |||
639 | 46 | idx++; | ||
640 | 47 | } | ||
641 | 48 | |||
642 | 49 | // Add special cases here if a channel order is supported with only some types or vice versa. | ||
643 | 50 | *num_image_formats = idx; | ||
644 | 51 | |||
645 | 52 | return CL_SUCCESS; | ||
646 | 13 | } | 53 | } |
647 | 14 | |||
648 | 15 | 54 | ||
649 | === modified file 'lib/CL/clReleaseSampler.c' | |||
650 | --- lib/CL/clReleaseSampler.c 2012-05-22 21:26:29 +0000 | |||
651 | +++ lib/CL/clReleaseSampler.c 2012-05-30 08:34:23 +0000 | |||
652 | @@ -1,9 +1,7 @@ | |||
653 | 1 | #include "pocl_cl.h" | 1 | #include "pocl_cl.h" |
657 | 2 | 2 | extern CL_API_ENTRY cl_int CL_API_CALL | |
658 | 3 | CL_API_ENTRY cl_int CL_API_CALL | 3 | clReleaseSampler(cl_sampler sampler) |
659 | 4 | clReleaseSampler(cl_sampler sampler) CL_API_SUFFIX__VERSION_1_0 | 4 | CL_API_SUFFIX__VERSION_1_0 |
660 | 5 | { | 5 | { |
661 | 6 | POCL_ABORT_UNIMPLEMENTED(); | ||
662 | 7 | return CL_SUCCESS; | 6 | return CL_SUCCESS; |
663 | 8 | } | 7 | } |
664 | 9 | |||
665 | 10 | 8 | ||
666 | === modified file 'lib/CL/devices/basic/basic.c' | |||
667 | --- lib/CL/devices/basic/basic.c 2012-05-28 16:17:57 +0000 | |||
668 | +++ lib/CL/devices/basic/basic.c 2012-05-30 08:34:23 +0000 | |||
669 | @@ -27,6 +27,7 @@ | |||
670 | 27 | #include <string.h> | 27 | #include <string.h> |
671 | 28 | #include <stdlib.h> | 28 | #include <stdlib.h> |
672 | 29 | #include <unistd.h> | 29 | #include <unistd.h> |
673 | 30 | #include <../dev_image.h> | ||
674 | 30 | #include <sys/time.h> | 31 | #include <sys/time.h> |
675 | 31 | 32 | ||
676 | 32 | #define max(a,b) (((a) > (b)) ? (a) : (b)) | 33 | #define max(a,b) (((a) > (b)) ? (a) : (b)) |
677 | @@ -235,6 +236,30 @@ | |||
678 | 235 | { | 236 | { |
679 | 236 | arguments[i] = &((*(cl_mem *) (p->value))->device_ptrs[device]); | 237 | arguments[i] = &((*(cl_mem *) (p->value))->device_ptrs[device]); |
680 | 237 | } | 238 | } |
681 | 239 | else if (kernel->arg_is_image[i]) | ||
682 | 240 | { | ||
683 | 241 | dev_image2d_t di; | ||
684 | 242 | cl_mem mem = *(cl_mem*)p->value; | ||
685 | 243 | di.data = &((*(cl_mem *) (p->value))->device_ptrs[device]); | ||
686 | 244 | di.data = ((*(cl_mem *) (p->value))->device_ptrs[device]); | ||
687 | 245 | di.width = mem->image_width; | ||
688 | 246 | di.height = mem->image_height; | ||
689 | 247 | di.rowpitch = mem->image_row_pitch; | ||
690 | 248 | di.order = mem->image_channel_order; | ||
691 | 249 | di.data_type = mem->image_channel_data_type; | ||
692 | 250 | void* devptr = pocl_basic_malloc(data, 0, sizeof(dev_image2d_t), NULL); | ||
693 | 251 | arguments[i] = malloc (sizeof (void *)); | ||
694 | 252 | *(void **)(arguments[i]) = devptr; | ||
695 | 253 | pocl_basic_write( data, &di, devptr, sizeof(dev_image2d_t) ); | ||
696 | 254 | } | ||
697 | 255 | else if (kernel->arg_is_sampler[i]) | ||
698 | 256 | { | ||
699 | 257 | dev_sampler_t ds; | ||
700 | 258 | |||
701 | 259 | arguments[i] = malloc (sizeof (void *)); | ||
702 | 260 | *(void **)(arguments[i]) = pocl_basic_malloc(data, 0, sizeof(dev_sampler_t), NULL); | ||
703 | 261 | pocl_basic_write( data, &ds, *(void**)arguments[i], sizeof(dev_sampler_t) ); | ||
704 | 262 | } | ||
705 | 238 | else | 263 | else |
706 | 239 | { | 264 | { |
707 | 240 | arguments[i] = p->value; | 265 | arguments[i] = p->value; |
708 | 241 | 266 | ||
709 | === added file 'lib/CL/devices/dev_image.h' | |||
710 | --- lib/CL/devices/dev_image.h 1970-01-01 00:00:00 +0000 | |||
711 | +++ lib/CL/devices/dev_image.h 2012-05-30 08:34:23 +0000 | |||
712 | @@ -0,0 +1,17 @@ | |||
713 | 1 | #ifndef __X86_IMAGE_H__ | ||
714 | 2 | #define __X86_IMAGE_H__ | ||
715 | 3 | |||
716 | 4 | //Definition of the image datatype used on basic and pthread (and probably tce?) | ||
717 | 5 | |||
718 | 6 | typedef cl_int dev_sampler_t; | ||
719 | 7 | |||
720 | 8 | typedef struct dev_image2d_t { | ||
721 | 9 | void* data; | ||
722 | 10 | cl_int width; | ||
723 | 11 | cl_int height; | ||
724 | 12 | cl_int rowpitch; | ||
725 | 13 | cl_int order; | ||
726 | 14 | cl_int data_type; | ||
727 | 15 | } dev_image2d_t; | ||
728 | 16 | |||
729 | 17 | #endif | ||
730 | 0 | 18 | ||
731 | === modified file 'lib/CL/devices/pthread/pthread.c' | |||
732 | --- lib/CL/devices/pthread/pthread.c 2012-05-29 12:16:56 +0000 | |||
733 | +++ lib/CL/devices/pthread/pthread.c 2012-05-30 08:34:23 +0000 | |||
734 | @@ -36,6 +36,7 @@ | |||
735 | 36 | #ifdef CUSTOM_BUFFER_ALLOCATOR | 36 | #ifdef CUSTOM_BUFFER_ALLOCATOR |
736 | 37 | 37 | ||
737 | 38 | #include "bufalloc.h" | 38 | #include "bufalloc.h" |
738 | 39 | #include <../dev_image.h> | ||
739 | 39 | 40 | ||
740 | 40 | /* Instead of mallocing a buffer size for a region, try to allocate | 41 | /* Instead of mallocing a buffer size for a region, try to allocate |
741 | 41 | this many times the buffer size to hopefully avoid mallocs for | 42 | this many times the buffer size to hopefully avoid mallocs for |
742 | @@ -651,6 +652,15 @@ | |||
743 | 651 | } | 652 | } |
744 | 652 | 653 | ||
745 | 653 | void * | 654 | void * |
746 | 655 | pocl_pthread_map_mem (void *data, void *buf_ptr, | ||
747 | 656 | size_t offset, size_t size, void* host_ptr) | ||
748 | 657 | { | ||
749 | 658 | /* All global pointers of the pthread/CPU device are in | ||
750 | 659 | the host address space already, and up to date. */ | ||
751 | 660 | return buf_ptr + offset; | ||
752 | 661 | } | ||
753 | 662 | |||
754 | 663 | void * | ||
755 | 654 | workgroup_thread (void *p) | 664 | workgroup_thread (void *p) |
756 | 655 | { | 665 | { |
757 | 656 | struct thread_arguments *ta = (struct thread_arguments *) p; | 666 | struct thread_arguments *ta = (struct thread_arguments *) p; |
758 | @@ -669,6 +679,30 @@ | |||
759 | 669 | } | 679 | } |
760 | 670 | else if (kernel->arg_is_pointer[i]) | 680 | else if (kernel->arg_is_pointer[i]) |
761 | 671 | arguments[i] = &((*(cl_mem *) (al->value))->device_ptrs[ta->device]); | 681 | arguments[i] = &((*(cl_mem *) (al->value))->device_ptrs[ta->device]); |
762 | 682 | else if (kernel->arg_is_image[i]) | ||
763 | 683 | { | ||
764 | 684 | dev_image2d_t di; | ||
765 | 685 | cl_mem mem = *(cl_mem*)al->value; | ||
766 | 686 | di.data = &((*(cl_mem *) (al->value))->device_ptrs[ta->device]); | ||
767 | 687 | di.data = ((*(cl_mem *) (al->value))->device_ptrs[ta->device]); | ||
768 | 688 | di.width = mem->image_width; | ||
769 | 689 | di.height = mem->image_height; | ||
770 | 690 | di.rowpitch = mem->image_row_pitch; | ||
771 | 691 | di.order = mem->image_channel_order; | ||
772 | 692 | di.data_type = mem->image_channel_data_type; | ||
773 | 693 | void* devptr = pocl_pthread_malloc(ta->data, 0, sizeof(dev_image2d_t), NULL); | ||
774 | 694 | arguments[i] = malloc (sizeof (void *)); | ||
775 | 695 | *(void **)(arguments[i]) = devptr; | ||
776 | 696 | pocl_pthread_write( ta->data, &di, devptr, sizeof(dev_image2d_t) ); | ||
777 | 697 | } | ||
778 | 698 | else if (kernel->arg_is_sampler[i]) | ||
779 | 699 | { | ||
780 | 700 | dev_sampler_t ds; | ||
781 | 701 | |||
782 | 702 | arguments[i] = malloc (sizeof (void *)); | ||
783 | 703 | *(void **)(arguments[i]) = pocl_pthread_malloc(ta->data, 0, sizeof(dev_sampler_t), NULL); | ||
784 | 704 | pocl_pthread_write( ta->data, &ds, *(void**)arguments[i], sizeof(dev_sampler_t) ); | ||
785 | 705 | } | ||
786 | 672 | else | 706 | else |
787 | 673 | arguments[i] = al->value; | 707 | arguments[i] = al->value; |
788 | 674 | } | 708 | } |
789 | @@ -699,11 +733,15 @@ | |||
790 | 699 | 733 | ||
791 | 700 | for (i = 0; i < kernel->num_args; ++i) | 734 | for (i = 0; i < kernel->num_args; ++i) |
792 | 701 | { | 735 | { |
794 | 702 | if (kernel->arg_is_local[i]) | 736 | if (kernel->arg_is_local[i] ) |
795 | 703 | { | 737 | { |
796 | 704 | pocl_pthread_free(ta->data, 0, *(void **)(arguments[i])); | 738 | pocl_pthread_free(ta->data, 0, *(void **)(arguments[i])); |
797 | 705 | free(arguments[i]); | 739 | free(arguments[i]); |
798 | 706 | } | 740 | } |
799 | 741 | else if( kernel->arg_is_sampler[i] || kernel->arg_is_image[i] ) | ||
800 | 742 | { | ||
801 | 743 | free(arguments[i]); | ||
802 | 744 | } | ||
803 | 707 | } | 745 | } |
804 | 708 | for (i = kernel->num_args; | 746 | for (i = kernel->num_args; |
805 | 709 | i < kernel->num_args + kernel->num_locals; | 747 | i < kernel->num_args + kernel->num_locals; |
806 | 710 | 748 | ||
807 | === modified file 'lib/CL/pocl_cl.h' | |||
808 | --- lib/CL/pocl_cl.h 2012-05-29 12:16:56 +0000 | |||
809 | +++ lib/CL/pocl_cl.h 2012-05-30 08:34:23 +0000 | |||
810 | @@ -303,6 +303,15 @@ | |||
811 | 303 | /* in case this is a sub buffer, this points to the parent | 303 | /* in case this is a sub buffer, this points to the parent |
812 | 304 | buffer */ | 304 | buffer */ |
813 | 305 | cl_mem_t *parent; | 305 | cl_mem_t *parent; |
814 | 306 | /* Image flags */ | ||
815 | 307 | cl_bool is_image; | ||
816 | 308 | cl_channel_order image_channel_order; | ||
817 | 309 | cl_channel_type image_channel_data_type; | ||
818 | 310 | size_t image_width; | ||
819 | 311 | size_t image_height; | ||
820 | 312 | /*size_t image_depth;*/ | ||
821 | 313 | size_t image_row_pitch; | ||
822 | 314 | /*size_t image_slice_pitch;*/ | ||
823 | 306 | }; | 315 | }; |
824 | 307 | 316 | ||
825 | 308 | struct _cl_program { | 317 | struct _cl_program { |
826 | @@ -337,6 +346,8 @@ | |||
827 | 337 | lt_dlhandle dlhandle; | 346 | lt_dlhandle dlhandle; |
828 | 338 | cl_int *arg_is_pointer; | 347 | cl_int *arg_is_pointer; |
829 | 339 | cl_int *arg_is_local; | 348 | cl_int *arg_is_local; |
830 | 349 | cl_int *arg_is_image; | ||
831 | 350 | cl_int *arg_is_sampler; | ||
832 | 340 | cl_uint num_locals; | 351 | cl_uint num_locals; |
833 | 341 | struct pocl_argument *arguments; | 352 | struct pocl_argument *arguments; |
834 | 342 | struct _cl_kernel *next; | 353 | struct _cl_kernel *next; |
835 | @@ -359,8 +370,13 @@ | |||
836 | 359 | 370 | ||
837 | 360 | }; | 371 | }; |
838 | 361 | 372 | ||
839 | 373 | typedef struct _cl_sampler cl_sampler_t; | ||
840 | 374 | |||
841 | 362 | struct _cl_sampler { | 375 | struct _cl_sampler { |
843 | 363 | POCL_ICD_OBJECT | 376 | POCL_ICD_OBJECT; |
844 | 377 | cl_bool normalized_coords; | ||
845 | 378 | cl_addressing_mode addressing_mode; | ||
846 | 379 | cl_filter_mode filter_mode; | ||
847 | 364 | }; | 380 | }; |
848 | 365 | 381 | ||
849 | 366 | #define POCL_PROFILE_QUEUED \ | 382 | #define POCL_PROFILE_QUEUED \ |
850 | 367 | 383 | ||
851 | === added file 'lib/CL/pocl_image_util.c' | |||
852 | --- lib/CL/pocl_image_util.c 1970-01-01 00:00:00 +0000 | |||
853 | +++ lib/CL/pocl_image_util.c 2012-05-30 08:34:23 +0000 | |||
854 | @@ -0,0 +1,228 @@ | |||
855 | 1 | #include "pocl_cl.h" | ||
856 | 2 | #include "pocl_image_util.h" | ||
857 | 3 | #include "assert.h" | ||
858 | 4 | |||
859 | 5 | extern void | ||
860 | 6 | pocl_get_image_information (cl_mem image, | ||
861 | 7 | int* channels_out, | ||
862 | 8 | int* elem_size_out) | ||
863 | 9 | { | ||
864 | 10 | cl_channel_order order = image->image_channel_order; | ||
865 | 11 | cl_channel_type type = image->image_channel_data_type; | ||
866 | 12 | |||
867 | 13 | int host_elem_size; | ||
868 | 14 | if (type == CL_FLOAT) | ||
869 | 15 | host_elem_size=4; | ||
870 | 16 | else if (type==CL_UNORM_INT8) | ||
871 | 17 | host_elem_size=1; | ||
872 | 18 | else | ||
873 | 19 | POCL_ABORT_UNIMPLEMENTED(); | ||
874 | 20 | if (elem_size_out != NULL) | ||
875 | 21 | *elem_size_out = host_elem_size; | ||
876 | 22 | |||
877 | 23 | int host_channels; | ||
878 | 24 | if (order == CL_RGBA) | ||
879 | 25 | host_channels=4; | ||
880 | 26 | else if (order == CL_R) | ||
881 | 27 | host_channels=1; | ||
882 | 28 | else | ||
883 | 29 | POCL_ABORT_UNIMPLEMENTED(); | ||
884 | 30 | if (channels_out != NULL) | ||
885 | 31 | *channels_out = host_channels; | ||
886 | 32 | } | ||
887 | 33 | extern cl_int | ||
888 | 34 | pocl_write_image (cl_mem image, | ||
889 | 35 | cl_device_id device_id, | ||
890 | 36 | const size_t * origin_, /*[3]*/ | ||
891 | 37 | const size_t * region_, /*[3]*/ | ||
892 | 38 | size_t host_row_pitch, | ||
893 | 39 | size_t host_slice_pitch, | ||
894 | 40 | const void * ptr) | ||
895 | 41 | { | ||
896 | 42 | if (image == NULL) | ||
897 | 43 | return CL_INVALID_MEM_OBJECT; | ||
898 | 44 | |||
899 | 45 | if ((ptr == NULL) || | ||
900 | 46 | (region_ == NULL)) | ||
901 | 47 | return CL_INVALID_VALUE; | ||
902 | 48 | |||
903 | 49 | int width = image->image_width; | ||
904 | 50 | int height = image->image_height; | ||
905 | 51 | cl_channel_order order = image->image_channel_order; | ||
906 | 52 | cl_channel_type type = image->image_channel_data_type; | ||
907 | 53 | |||
908 | 54 | size_t dev_elem_size = sizeof(cl_float); | ||
909 | 55 | int dev_channels = 4; | ||
910 | 56 | |||
911 | 57 | int host_elem_size; | ||
912 | 58 | int host_channels; | ||
913 | 59 | pocl_get_image_information (image, &host_channels, &host_elem_size); | ||
914 | 60 | |||
915 | 61 | size_t origin[3] = { origin_[0]*dev_elem_size*dev_channels, origin_[1], origin_[2] }; | ||
916 | 62 | size_t region[3] = { region_[0]*dev_elem_size*dev_channels, region_[1], region_[2] }; | ||
917 | 63 | |||
918 | 64 | size_t image_row_pitch = width*dev_elem_size*dev_channels; | ||
919 | 65 | size_t image_slice_pitch = 0; | ||
920 | 66 | |||
921 | 67 | if ((region[0]*region[1]*region[2] > 0) && | ||
922 | 68 | (region[0]-1 + | ||
923 | 69 | image_row_pitch * (region[1]-1) + | ||
924 | 70 | image_slice_pitch * (region[2]-1) >= image->size)) | ||
925 | 71 | return CL_INVALID_VALUE; | ||
926 | 72 | |||
927 | 73 | cl_float* temp = malloc( width*height*dev_channels*dev_elem_size ); | ||
928 | 74 | |||
929 | 75 | if (temp == NULL) | ||
930 | 76 | return CL_OUT_OF_HOST_MEMORY; | ||
931 | 77 | |||
932 | 78 | int x, y, k; | ||
933 | 79 | |||
934 | 80 | for (y=0; y<height; y++) | ||
935 | 81 | for (x=0; x<width*dev_channels; x++) | ||
936 | 82 | temp[x+y*width*dev_channels] = 0.f; | ||
937 | 83 | |||
938 | 84 | for (y=0; y<height; y++) | ||
939 | 85 | { | ||
940 | 86 | for (x=0; x<width; x++) | ||
941 | 87 | { | ||
942 | 88 | cl_float elem[4]; //TODO 0,0,0,0 for some modes? | ||
943 | 89 | |||
944 | 90 | for (k=0; k<host_channels; k++) | ||
945 | 91 | { | ||
946 | 92 | if (type == CL_FLOAT) | ||
947 | 93 | elem[k] = ((float*)ptr)[k+(x+y*width)*host_channels]; | ||
948 | 94 | else if (type==CL_UNORM_INT8) | ||
949 | 95 | { | ||
950 | 96 | cl_uchar foo = ((cl_uchar*)ptr)[k+(x+y*width)*host_channels]; | ||
951 | 97 | elem[k] = (float)(foo) * (1.f/255.f); | ||
952 | 98 | } | ||
953 | 99 | else | ||
954 | 100 | POCL_ABORT_UNIMPLEMENTED(); | ||
955 | 101 | } | ||
956 | 102 | |||
957 | 103 | if (order == CL_RGBA) | ||
958 | 104 | for (k=0; k<4; k++) | ||
959 | 105 | temp[(x+y*width)*dev_channels+k] = elem[k]; | ||
960 | 106 | else if (order == CL_R) | ||
961 | 107 | { | ||
962 | 108 | temp[(x+y*width)*dev_channels+0] = elem[0]; | ||
963 | 109 | temp[(x+y*width)*dev_channels+1] = 0.f; | ||
964 | 110 | temp[(x+y*width)*dev_channels+2] = 0.f; | ||
965 | 111 | temp[(x+y*width)*dev_channels+3] = 1.f; | ||
966 | 112 | } | ||
967 | 113 | } | ||
968 | 114 | } | ||
969 | 115 | |||
970 | 116 | |||
971 | 117 | device_id->write_rect(device_id->data, temp, | ||
972 | 118 | image->device_ptrs[device_id->dev_id], | ||
973 | 119 | origin, origin, region, | ||
974 | 120 | image_row_pitch, image_slice_pitch, | ||
975 | 121 | image_row_pitch, image_slice_pitch); | ||
976 | 122 | |||
977 | 123 | free (temp); | ||
978 | 124 | return CL_SUCCESS; | ||
979 | 125 | } | ||
980 | 126 | |||
981 | 127 | extern cl_int | ||
982 | 128 | pocl_read_image (cl_mem image, | ||
983 | 129 | cl_device_id device_id, | ||
984 | 130 | const size_t * origin_, /*[3]*/ | ||
985 | 131 | const size_t * region_, /*[3]*/ | ||
986 | 132 | size_t host_row_pitch, | ||
987 | 133 | size_t host_slice_pitch, | ||
988 | 134 | void * ptr) | ||
989 | 135 | { | ||
990 | 136 | |||
991 | 137 | if (image == NULL) | ||
992 | 138 | return CL_INVALID_MEM_OBJECT; | ||
993 | 139 | |||
994 | 140 | if ((ptr == NULL) || | ||
995 | 141 | (region_ == NULL)) | ||
996 | 142 | return CL_INVALID_VALUE; | ||
997 | 143 | |||
998 | 144 | int width = image->image_width; | ||
999 | 145 | int height = image->image_height; | ||
1000 | 146 | int dev_elem_size = sizeof(cl_float); | ||
1001 | 147 | int dev_channels = 4; | ||
1002 | 148 | size_t origin[3] = { origin_[0]*dev_elem_size*dev_channels, origin_[1], origin_[2] }; | ||
1003 | 149 | size_t region[3] = { region_[0]*dev_elem_size*dev_channels, region_[1], region_[2] }; | ||
1004 | 150 | |||
1005 | 151 | size_t image_row_pitch = width*dev_elem_size*dev_channels; | ||
1006 | 152 | size_t image_slice_pitch = 0; | ||
1007 | 153 | |||
1008 | 154 | if ((region[0]*region[1]*region[2] > 0) && | ||
1009 | 155 | (region[0]-1 + | ||
1010 | 156 | image_row_pitch * (region[1]-1) + | ||
1011 | 157 | image_slice_pitch * (region[2]-1) >= image->size)) | ||
1012 | 158 | return CL_INVALID_VALUE; | ||
1013 | 159 | |||
1014 | 160 | |||
1015 | 161 | int i, j, k; | ||
1016 | 162 | |||
1017 | 163 | cl_channel_order order = image->image_channel_order; | ||
1018 | 164 | cl_channel_type type = image->image_channel_data_type; | ||
1019 | 165 | |||
1020 | 166 | cl_float* temp = malloc( width*height*dev_channels*dev_elem_size ); | ||
1021 | 167 | |||
1022 | 168 | if (temp == NULL) | ||
1023 | 169 | return CL_OUT_OF_HOST_MEMORY; | ||
1024 | 170 | |||
1025 | 171 | int host_channels, host_elem_size; | ||
1026 | 172 | |||
1027 | 173 | pocl_get_image_information(image, &host_channels, &host_elem_size); | ||
1028 | 174 | |||
1029 | 175 | if (host_row_pitch == 0) { | ||
1030 | 176 | host_row_pitch = width*host_channels; | ||
1031 | 177 | } | ||
1032 | 178 | |||
1033 | 179 | size_t buffer_origin[3] = { 0, 0, 0 }; | ||
1034 | 180 | |||
1035 | 181 | device_id->read_rect(device_id->data, temp, | ||
1036 | 182 | image->device_ptrs[device_id->dev_id], | ||
1037 | 183 | origin, origin, region, | ||
1038 | 184 | image_row_pitch, image_slice_pitch, | ||
1039 | 185 | image_row_pitch, image_slice_pitch); | ||
1040 | 186 | |||
1041 | 187 | for (j=0; j<height; j++) { | ||
1042 | 188 | for (i=0; i<width; i++) { | ||
1043 | 189 | cl_float elem[4]; | ||
1044 | 190 | |||
1045 | 191 | for (k=0; k<4; k++) | ||
1046 | 192 | elem[k]=0; | ||
1047 | 193 | |||
1048 | 194 | if (order == CL_RGBA) { | ||
1049 | 195 | for (k=0; k<4; k++) | ||
1050 | 196 | elem[k] = temp[i*dev_channels + j*width*dev_channels + k]; | ||
1051 | 197 | } | ||
1052 | 198 | else if (order == CL_R) { // host_channels == 1 | ||
1053 | 199 | elem[0] = temp[i*dev_channels + j*width*dev_channels + 0]; | ||
1054 | 200 | } | ||
1055 | 201 | |||
1056 | 202 | if (type == CL_UNORM_INT8) | ||
1057 | 203 | { // host_channels == 4 | ||
1058 | 204 | for (k=0; k<host_channels; k++) | ||
1059 | 205 | { | ||
1060 | 206 | ((cl_uchar*)ptr)[i*host_channels + j*host_row_pitch + k] | ||
1061 | 207 | = (unsigned char)(255*elem[k]); | ||
1062 | 208 | } | ||
1063 | 209 | } | ||
1064 | 210 | else if (type == CL_FLOAT) | ||
1065 | 211 | { | ||
1066 | 212 | for (k=0; k<host_channels; k++) | ||
1067 | 213 | { | ||
1068 | 214 | POCL_ABORT_UNIMPLEMENTED(); | ||
1069 | 215 | ((cl_float*)ptr)[i*host_channels + j*host_row_pitch + k] | ||
1070 | 216 | = elem[k]; | ||
1071 | 217 | } | ||
1072 | 218 | } | ||
1073 | 219 | else | ||
1074 | 220 | POCL_ABORT_UNIMPLEMENTED(); | ||
1075 | 221 | } | ||
1076 | 222 | } | ||
1077 | 223 | |||
1078 | 224 | free (temp); | ||
1079 | 225 | |||
1080 | 226 | return CL_SUCCESS; | ||
1081 | 227 | } | ||
1082 | 228 | |||
1083 | 0 | \ No newline at end of file | 229 | \ No newline at end of file |
1084 | 1 | 230 | ||
1085 | === added file 'lib/CL/pocl_image_util.h' | |||
1086 | --- lib/CL/pocl_image_util.h 1970-01-01 00:00:00 +0000 | |||
1087 | +++ lib/CL/pocl_image_util.h 2012-05-30 08:34:23 +0000 | |||
1088 | @@ -0,0 +1,50 @@ | |||
1089 | 1 | /* OpenCL runtime library: pocl_image_util image utility functions | ||
1090 | 2 | |||
1091 | 3 | Copyright (c) 2012 Timo Viitanen / Tampere University of Technology | ||
1092 | 4 | |||
1093 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy | ||
1094 | 6 | of this software and associated documentation files (the "Software"), to deal | ||
1095 | 7 | in the Software without restriction, including without limitation the rights | ||
1096 | 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
1097 | 9 | copies of the Software, and to permit persons to whom the Software is | ||
1098 | 10 | furnished to do so, subject to the following conditions: | ||
1099 | 11 | |||
1100 | 12 | The above copyright notice and this permission notice shall be included in | ||
1101 | 13 | all copies or substantial portions of the Software. | ||
1102 | 14 | |||
1103 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
1104 | 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
1105 | 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
1106 | 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
1107 | 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
1108 | 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | ||
1109 | 21 | THE SOFTWARE. | ||
1110 | 22 | */ | ||
1111 | 23 | |||
1112 | 24 | #ifndef POCL_IMAGE_UTIL_H | ||
1113 | 25 | #define POCL_IMAGE_UTIL_H | ||
1114 | 26 | |||
1115 | 27 | extern cl_int | ||
1116 | 28 | pocl_write_image (cl_mem image, | ||
1117 | 29 | cl_device_id device_id, | ||
1118 | 30 | const size_t * origin_, /*[3]*/ | ||
1119 | 31 | const size_t * region_, /*[3]*/ | ||
1120 | 32 | size_t host_row_pitch, | ||
1121 | 33 | size_t host_slice_pitch, | ||
1122 | 34 | const void * ptr); | ||
1123 | 35 | |||
1124 | 36 | extern cl_int | ||
1125 | 37 | pocl_read_image (cl_mem image, | ||
1126 | 38 | cl_device_id device, | ||
1127 | 39 | const size_t * origin, /*[3]*/ | ||
1128 | 40 | const size_t * region, /*[3]*/ | ||
1129 | 41 | size_t host_row_pitch, | ||
1130 | 42 | size_t host_slice_pitch, | ||
1131 | 43 | void * ptr); | ||
1132 | 44 | |||
1133 | 45 | extern void | ||
1134 | 46 | pocl_get_image_information (cl_mem image, | ||
1135 | 47 | int* host_channels, | ||
1136 | 48 | int* host_elem_size); | ||
1137 | 49 | |||
1138 | 50 | #endif | ||
1139 | 0 | 51 | ||
1140 | === added file 'lib/kernel/get_image_height.cl' | |||
1141 | --- lib/kernel/get_image_height.cl 1970-01-01 00:00:00 +0000 | |||
1142 | +++ lib/kernel/get_image_height.cl 2012-05-30 08:34:23 +0000 | |||
1143 | @@ -0,0 +1,7 @@ | |||
1144 | 1 | #include "templates.h" | ||
1145 | 2 | #include "image.h" | ||
1146 | 3 | |||
1147 | 4 | int get_image_width (image2d_t image) | ||
1148 | 5 | { | ||
1149 | 6 | return image->height; | ||
1150 | 7 | } | ||
1151 | 0 | \ No newline at end of file | 8 | \ No newline at end of file |
1152 | 1 | 9 | ||
1153 | === added file 'lib/kernel/get_image_width.cl' | |||
1154 | --- lib/kernel/get_image_width.cl 1970-01-01 00:00:00 +0000 | |||
1155 | +++ lib/kernel/get_image_width.cl 2012-05-30 08:34:23 +0000 | |||
1156 | @@ -0,0 +1,7 @@ | |||
1157 | 1 | #include "templates.h" | ||
1158 | 2 | #include "image.h" | ||
1159 | 3 | |||
1160 | 4 | int get_image_width (image2d_t image) | ||
1161 | 5 | { | ||
1162 | 6 | return image->width; | ||
1163 | 7 | } | ||
1164 | 0 | \ No newline at end of file | 8 | \ No newline at end of file |
1165 | 1 | 9 | ||
1166 | === added file 'lib/kernel/image.h' | |||
1167 | --- lib/kernel/image.h 1970-01-01 00:00:00 +0000 | |||
1168 | +++ lib/kernel/image.h 2012-05-30 08:34:23 +0000 | |||
1169 | @@ -0,0 +1,15 @@ | |||
1170 | 1 | #ifndef __IMAGE_H__ | ||
1171 | 2 | #define __IMAGE_H__ | ||
1172 | 3 | |||
1173 | 4 | #include "templates.h" | ||
1174 | 5 | |||
1175 | 6 | typedef struct image2d_t_ { | ||
1176 | 7 | uchar4* data; | ||
1177 | 8 | int width; | ||
1178 | 9 | int height; | ||
1179 | 10 | int rowpitch; | ||
1180 | 11 | int order; | ||
1181 | 12 | int data_type; | ||
1182 | 13 | } image2d_t_; | ||
1183 | 14 | |||
1184 | 15 | #endif | ||
1185 | 0 | 16 | ||
1186 | === added file 'lib/kernel/read_image.cl' | |||
1187 | --- lib/kernel/read_image.cl 1970-01-01 00:00:00 +0000 | |||
1188 | +++ lib/kernel/read_image.cl 2012-05-30 08:34:23 +0000 | |||
1189 | @@ -0,0 +1,30 @@ | |||
1190 | 1 | #include "templates.h" | ||
1191 | 2 | |||
1192 | 3 | #include "image.h" | ||
1193 | 4 | |||
1194 | 5 | float4 _cl_overloadable read_imagef ( image2d_t image, | ||
1195 | 6 | sampler_t sampler, | ||
1196 | 7 | int2 coord) { | ||
1197 | 8 | //TODO: Sampler options | ||
1198 | 9 | if( coord.x<0 ) | ||
1199 | 10 | coord.x=0; | ||
1200 | 11 | if( coord.y<0 ) | ||
1201 | 12 | coord.y=0; | ||
1202 | 13 | if( coord.x>=image->width ) | ||
1203 | 14 | coord.x=image->width-1; | ||
1204 | 15 | if( coord.y>=image->height ) | ||
1205 | 16 | coord.y=image->height-1; | ||
1206 | 17 | |||
1207 | 18 | float4 color = ((float4*)image->data)[ coord.x + coord.y*image->rowpitch ]; | ||
1208 | 19 | |||
1209 | 20 | return color; | ||
1210 | 21 | } | ||
1211 | 22 | |||
1212 | 23 | float4 _cl_overloadable read_imagef ( image2d_t image, | ||
1213 | 24 | sampler_t sampler, | ||
1214 | 25 | float2 coord) { | ||
1215 | 26 | |||
1216 | 27 | float4 color = ((float4*)image->data)[ (int)coord.x + (int)coord.y*image->rowpitch ]; | ||
1217 | 28 | |||
1218 | 29 | return color; | ||
1219 | 30 | } | ||
1220 | 0 | 31 | ||
1221 | === modified file 'lib/kernel/sources.mk' | |||
1222 | --- lib/kernel/sources.mk 2012-03-05 15:33:38 +0000 | |||
1223 | +++ lib/kernel/sources.mk 2012-05-30 08:34:23 +0000 | |||
1224 | @@ -130,7 +130,11 @@ | |||
1225 | 130 | vload_half.cl \ | 130 | vload_half.cl \ |
1226 | 131 | vstore_half.cl \ | 131 | vstore_half.cl \ |
1227 | 132 | async_work_group_copy.cl \ | 132 | async_work_group_copy.cl \ |
1229 | 133 | wait_group_events.cl | 133 | wait_group_events.cl \ |
1230 | 134 | read_image.cl \ | ||
1231 | 135 | write_image.cl \ | ||
1232 | 136 | get_image_width.cl \ | ||
1233 | 137 | get_image_height.cl | ||
1234 | 134 | 138 | ||
1235 | 135 | 139 | ||
1236 | 136 | 140 | ||
1237 | 137 | 141 | ||
1238 | === added file 'lib/kernel/write_image.cl' | |||
1239 | --- lib/kernel/write_image.cl 1970-01-01 00:00:00 +0000 | |||
1240 | +++ lib/kernel/write_image.cl 2012-05-30 08:34:23 +0000 | |||
1241 | @@ -0,0 +1,17 @@ | |||
1242 | 1 | #include "templates.h" | ||
1243 | 2 | |||
1244 | 3 | #include "image.h" | ||
1245 | 4 | |||
1246 | 5 | //typedef struct image2d_t_* image2d_t; | ||
1247 | 6 | |||
1248 | 7 | void _cl_overloadable write_imagef ( image2d_t image, | ||
1249 | 8 | int2 coord, | ||
1250 | 9 | float4 color) { | ||
1251 | 10 | ((float4*)image->data)[ coord.x + coord.y*image->rowpitch ] = color; | ||
1252 | 11 | } | ||
1253 | 12 | |||
1254 | 13 | void _cl_overloadable write_imagei ( image2d_t image, | ||
1255 | 14 | int2 coord, | ||
1256 | 15 | int4 color) { | ||
1257 | 16 | ((float4*)image->data)[ coord.x + coord.y*image->rowpitch ] = (float4)(color.x,color.y,color.z,color.w); | ||
1258 | 17 | } | ||
1259 | 0 | 18 | ||
1260 | === modified file 'lib/llvmopencl/GenerateHeader.cc' | |||
1261 | --- lib/llvmopencl/GenerateHeader.cc 2012-02-24 17:50:01 +0000 | |||
1262 | +++ lib/llvmopencl/GenerateHeader.cc 2012-05-30 08:34:23 +0000 | |||
1263 | @@ -123,6 +123,8 @@ | |||
1264 | 123 | return changed; | 123 | return changed; |
1265 | 124 | } | 124 | } |
1266 | 125 | 125 | ||
1267 | 126 | #include <iostream> | ||
1268 | 127 | |||
1269 | 126 | 128 | ||
1270 | 127 | 129 | ||
1271 | 128 | void | 130 | void |
1272 | 129 | GenerateHeader::ProcessPointers(Function *F, | 131 | GenerateHeader::ProcessPointers(Function *F, |
1273 | @@ -134,12 +136,17 @@ | |||
1274 | 134 | 136 | ||
1275 | 135 | bool is_pointer[num_args]; | 137 | bool is_pointer[num_args]; |
1276 | 136 | bool is_local[num_args]; | 138 | bool is_local[num_args]; |
1277 | 139 | bool is_image[num_args]; | ||
1278 | 140 | bool is_sampler[num_args]; | ||
1279 | 137 | 141 | ||
1280 | 138 | int i = 0; | 142 | int i = 0; |
1281 | 139 | for (Function::const_arg_iterator ii = F->arg_begin(), | 143 | for (Function::const_arg_iterator ii = F->arg_begin(), |
1282 | 140 | ee = F->arg_end(); | 144 | ee = F->arg_end(); |
1283 | 141 | ii != ee; ++ii) { | 145 | ii != ee; ++ii) { |
1284 | 142 | Type *t = ii->getType(); | 146 | Type *t = ii->getType(); |
1285 | 147 | |||
1286 | 148 | is_image[i] = false; | ||
1287 | 149 | is_sampler[i] = false; | ||
1288 | 143 | 150 | ||
1289 | 144 | if (const PointerType *p = dyn_cast<PointerType> (t)) { | 151 | if (const PointerType *p = dyn_cast<PointerType> (t)) { |
1290 | 145 | is_pointer[i] = true; | 152 | is_pointer[i] = true; |
1291 | @@ -153,6 +160,22 @@ | |||
1292 | 153 | is_pointer[i] = false; | 160 | is_pointer[i] = false; |
1293 | 154 | is_local[i] = false; | 161 | is_local[i] = false; |
1294 | 155 | } | 162 | } |
1295 | 163 | |||
1296 | 164 | if( t->isPointerTy() ) { | ||
1297 | 165 | if( t->getPointerElementType()->isStructTy() ) { | ||
1298 | 166 | string name = t->getPointerElementType()->getStructName().str(); | ||
1299 | 167 | if( name == "struct.image2d_t_" ) { // TODO image3d? | ||
1300 | 168 | is_image[i] = true; | ||
1301 | 169 | is_pointer[i] = false; | ||
1302 | 170 | is_local[i] = false; | ||
1303 | 171 | } | ||
1304 | 172 | if( name == "struct.sampler_t_" ) { | ||
1305 | 173 | is_sampler[i] = true; | ||
1306 | 174 | is_pointer[i] = false; | ||
1307 | 175 | is_local[i] = false; | ||
1308 | 176 | } | ||
1309 | 177 | } | ||
1310 | 178 | } | ||
1311 | 156 | 179 | ||
1312 | 157 | ++i; | 180 | ++i; |
1313 | 158 | } | 181 | } |
1314 | @@ -172,6 +195,22 @@ | |||
1315 | 172 | out << ", " << is_local[i]; | 195 | out << ", " << is_local[i]; |
1316 | 173 | } | 196 | } |
1317 | 174 | out << "}\n"; | 197 | out << "}\n"; |
1318 | 198 | |||
1319 | 199 | out << "#define _" << F->getName() << "_ARG_IS_IMAGE {"; | ||
1320 | 200 | if (num_args != 0) { | ||
1321 | 201 | out << is_image[0]; | ||
1322 | 202 | for (i = 1; i < num_args; ++i) | ||
1323 | 203 | out << ", " << is_image[i]; | ||
1324 | 204 | } | ||
1325 | 205 | out << "}\n"; | ||
1326 | 206 | |||
1327 | 207 | out << "#define _" << F->getName() << "_ARG_IS_SAMPLER {"; | ||
1328 | 208 | if (num_args != 0) { | ||
1329 | 209 | out << is_sampler[0]; | ||
1330 | 210 | for (i = 1; i < num_args; ++i) | ||
1331 | 211 | out << ", " << is_sampler[i]; | ||
1332 | 212 | } | ||
1333 | 213 | out << "}\n"; | ||
1334 | 175 | } | 214 | } |
1335 | 176 | 215 | ||
1336 | 177 | 216 | ||
1337 | 178 | 217 | ||
1338 | 179 | 218 | ||
1339 | === modified file 'scripts/pocl-kernel.in' | |||
1340 | --- scripts/pocl-kernel.in 2012-05-14 17:44:09 +0000 | |||
1341 | +++ scripts/pocl-kernel.in 2012-05-30 08:34:23 +0000 | |||
1342 | @@ -60,6 +60,8 @@ | |||
1343 | 60 | unsigned _num_args = _${kernel}_NUM_ARGS; | 60 | unsigned _num_args = _${kernel}_NUM_ARGS; |
1344 | 61 | int _arg_is_pointer[] = _${kernel}_ARG_IS_POINTER; | 61 | int _arg_is_pointer[] = _${kernel}_ARG_IS_POINTER; |
1345 | 62 | int _arg_is_local[] = _${kernel}_ARG_IS_LOCAL; | 62 | int _arg_is_local[] = _${kernel}_ARG_IS_LOCAL; |
1346 | 63 | int _arg_is_image[] = _${kernel}_ARG_IS_IMAGE; | ||
1347 | 64 | int _arg_is_sampler[] = _${kernel}_ARG_IS_SAMPLER; | ||
1348 | 63 | unsigned _num_locals = _${kernel}_NUM_LOCALS; | 65 | unsigned _num_locals = _${kernel}_NUM_LOCALS; |
1349 | 64 | #if _${kernel}_NUM_LOCALS != 0 | 66 | #if _${kernel}_NUM_LOCALS != 0 |
1350 | 65 | unsigned _local_sizes[_${kernel}_NUM_LOCALS] = _${kernel}_LOCAL_SIZE; | 67 | unsigned _local_sizes[_${kernel}_NUM_LOCALS] = _${kernel}_LOCAL_SIZE; |
1351 | @@ -93,6 +95,8 @@ | |||
1352 | 93 | #endif | 95 | #endif |
1353 | 94 | _${kernel}_ARG_IS_LOCAL, | 96 | _${kernel}_ARG_IS_LOCAL, |
1354 | 95 | _${kernel}_ARG_IS_POINTER, | 97 | _${kernel}_ARG_IS_POINTER, |
1355 | 98 | _${kernel}_ARG_IS_IMAGE, | ||
1356 | 99 | _${kernel}_ARG_IS_SAMPLER, | ||
1357 | 96 | _${kernel}_workgroup_fast | 100 | _${kernel}_workgroup_fast |
1358 | 97 | }; | 101 | }; |
1359 | 98 | EOF | 102 | EOF |
1360 | 99 | 103 | ||
1361 | === modified file 'tests/testsuite-samples.at' | |||
1362 | --- tests/testsuite-samples.at 2012-05-24 21:26:09 +0000 | |||
1363 | +++ tests/testsuite-samples.at 2012-05-30 08:34:23 +0000 | |||
1364 | @@ -73,6 +73,32 @@ | |||
1365 | 73 | 73 | ||
1366 | 74 | AT_CLEANUP | 74 | AT_CLEANUP |
1367 | 75 | 75 | ||
1368 | 76 | AT_SETUP([Run Chapter 8: ImageFilter2D]) | ||
1369 | 77 | AT_KEYWORDS([booksamples imagefilter2d]) | ||
1370 | 78 | AT_SKIP_IF([! test -e $abs_top_srcdir/examples/opencl-book-samples/checkout]) | ||
1371 | 79 | AT_CHECK_UNQUOTED([ | ||
1372 | 80 | cd ${abs_top_srcdir}/examples/opencl-book-samples/checkout/src/Chapter_8/ImageFilter2D ; | ||
1373 | 81 | #sed '13c\ ' -i ImageFilter2D.cl ; | ||
1374 | 82 | #sed '14c\ ' -i ImageFilter2D.cl ; | ||
1375 | 83 | #sed '15c\ ' -i ImageFilter2D.cl ; | ||
1376 | 84 | #sed "3c float kernelWeights[9] = { 1.0f, 2.0f, 1.0f, 2.0f, 4.0f, 2.0f,1.0f, 2.0f, 1.0f }; \ " -i ImageFilter2D.cl ; | ||
1377 | 85 | cp ${abs_top_srcdir}/examples/opencl-book-samples/ImageFilter2D.cl ./ | ||
1378 | 86 | sed '418cclFinish(commandQueue);' -i ImageFilter2D.cpp ; | ||
1379 | 87 | cd ${abs_top_srcdir}/examples/opencl-book-samples/checkout/build/src/Chapter_8/ImageFilter2D ; | ||
1380 | 88 | make] | ||
1381 | 89 | , 0, [ignore], [ignore]) | ||
1382 | 90 | |||
1383 | 91 | AT_CHECK_UNQUOTED([ | ||
1384 | 92 | cd $abs_top_srcdir/examples/opencl-book-samples/checkout/build/src/Chapter_8/ImageFilter2D | ||
1385 | 93 | ./ImageFilter2D ../../../../src/Chapter_19/oclFlow/data/minicooper/frame10.png output.png | ||
1386 | 94 | ], 0, | ||
1387 | 95 | [Could not create GPU context, trying CPU... | ||
1388 | 96 | |||
1389 | 97 | Executed program succesfully. | ||
1390 | 98 | ], [ignore]) | ||
1391 | 99 | |||
1392 | 100 | AT_CLEANUP | ||
1393 | 101 | |||
1394 | 76 | AT_SETUP([Run Chapter 12: VectorAdd (C++ bindings)]) | 102 | AT_SETUP([Run Chapter 12: VectorAdd (C++ bindings)]) |
1395 | 77 | AT_KEYWORDS([booksamples]) | 103 | AT_KEYWORDS([booksamples]) |
1396 | 78 | AT_SKIP_IF([! test -e $abs_top_srcdir/examples/opencl-book-samples/checkout]) | 104 | AT_SKIP_IF([! test -e $abs_top_srcdir/examples/opencl-book-samples/checkout]) |