Android AARCH64 平台的 OpenCL 配置
开发环境
IDE: Android Studio 3.4.1
Android: 7.1
minSdkVersion: 25
targetSdkVersion: 26
JNI CMake: 3.4.1
ABI: arm64-v8a
OpenCL: 1.2
配置 OpenCL 使用项目中的 so 库
这里以编译 openclTest.cpp
为 libopenclTest.so
并导入 OpenCL 的动态库为例
注1: 下列 so 库需要从开发板上 pull 到项目中,其中
libGLES_mali.so
用于驱动 OpenCL,其他库为依赖库
注2: 不同平台的驱动库位于不同位置,可以下载 OpenCL-Z 查看
注3:set_target_properties
中第一项参数设置了生成库的名称,例如设置为openclTest
,则生成libopenclTest.so
库,这里使用了lib_*
作为前缀,则生成liblib_*.so
库,实际开发中要避免这种情况
查看依赖库
objdump -x libGLES_mali.so | grep NEEDED
目录结构
opencltest
├─ app
│ ├─ build
│ ├─ libs
│ └─ src
│ ├─ androidTest
│ ├─ main
│ │ ├─ java
│ │ │ └─ com
│ │ │ └─ example
│ │ │ └─ opencltest
│ │ │ MainActivity.java
│ │ ├─ jni # C/C++ 源码目录
│ │ │ └─ openclTest.cpp
│ │ ├─ jniLibs # JNI 需要调用的运行库
│ │ │ └─ arm64-v8a # 对应 ABI 版本建立文件夹
│ │ │ ├─ libbinder.so
│ │ │ ├─ libc++.so
│ │ │ ├─ libc.so
│ │ │ ├─ libcrypto.so
│ │ │ ├─ libcutils.so
│ │ │ ├─ libdl.so
│ │ │ ├─ libGLES_mali.so
│ │ │ ├─ libhardware.so
│ │ │ ├─ liblog.so
│ │ │ ├─ libm.so
│ │ │ ├─ libui.so
│ │ │ ├─ libutils.so
│ │ │ └─ libz.so
│ │ └─ res
│ └─ test
└─ gradle
CMakeLists.txt
增加配置
add_library(openclTest
SHARED
src/main/jni/openclTest.cpp )
add_library(lib_opencl SHARED IMPORTED)
set_target_properties(lib_opencl
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libGLES_mali.so )
add_library(lib_z SHARED IMPORTED)
set_target_properties(lib_z
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libz.so )
add_library(lib_log SHARED IMPORTED)
set_target_properties(lib_log
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/liblog.so )
add_library(lib_utils SHARED IMPORTED)
set_target_properties(lib_utils
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libutils.so )
add_library(lib_ui SHARED IMPORTED)
set_target_properties(lib_ui
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libui.so )
add_library(lib_cutils SHARED IMPORTED)
set_target_properties(lib_cutils
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libcutils.so )
add_library(lib_binder SHARED IMPORTED)
set_target_properties(lib_binder
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libbinder.so )
add_library(lib_crypto SHARED IMPORTED)
set_target_properties(lib_crypto
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libcrypto.so )
add_library(lib_dl SHARED IMPORTED)
set_target_properties(lib_dl
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libdl.so )
add_library(lib_hardware SHARED IMPORTED)
set_target_properties(lib_hardware
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libhardware.so )
add_library(lib_c++ SHARED IMPORTED)
set_target_properties(lib_c++
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libc++.so )
add_library(lib_c SHARED IMPORTED)
set_target_properties(lib_c
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libc.so )
add_library(lib_m SHARED IMPORTED)
set_target_properties(lib_m
PROPERTIES
IMPORTED_LOCATION ${PROJECT_SOURCE_DIR}/src/main/jniLibs/${ANDROID_ABI}/libm.so )
target_link_libraries(openclTest
${log-lib}
lib_opencl )
配置 OpenCL 使用 dlopen 打开开发板上的运行库
这种配置方法需要从 Github - KhronosGroup/OpenCL-Headers 仓库下载 OpenCL 的头文件,并且需要自行实现 OpenCL 的函数
注: 需要在源码中定义
CL_TARGET_OPENCL_VERSION 120
,否则会报找不到版本号的错误
目录结构
opencltest
├─ app
│ ├─ build
│ ├─ libs
│ └─ src
│ ├─ androidTest
│ ├─ main
│ │ ├─ java
│ │ │ └─ com
│ │ │ └─ example
│ │ │ └─ opencltest
│ │ │ MainActivity.java
│ │ ├─ jni # C/C++ 源码目录
│ │ │ ├─ dlopencl.cpp # dlopen 打开运行库,实现 OpenCL 中的函数
│ │ │ ├─ openclTest.cpp
│ │ │ └─ include # JNI 调用的头文件
│ │ │ ├─ dlopencl.h # 定义 OpenCL 中的函数
│ │ │ └─ CL # OpenCL 需要使用的头文件
│ │ │ ├─ cl.h
│ │ │ ├─ cl_ext.h
│ │ │ ├─ cl_gl.h
│ │ │ ├─ cl_gl_ext.h
│ │ │ ├─ cl_platform.h
│ │ │ ├─ cl_version.h
│ │ │ └─ opencl.h
│ │ └─ res
│ └─ test
└─ gradle
CMakeLists.txt
增加配置
include_directories(${PROJECT_SOURCE_DIR}/src/main/jni/include)
add_library(openclTest
SHARED
src/main/jni/openclTest.cpp )
add_library(lib_dlopencl
SHARED
src/main/jni/dlopencl.cpp )
target_link_libraries(openclTest
${log-lib}
lib_dlopencl )
需要导入的 dlopencl.h
和 dlopencl.cpp
写在文末
OpenCL 使用
以下均以使用 dlopen 导入运行库的方式为例
-
定义 OpenCL 版本
#define CL_TARGET_OPENCL_VERSION 120
-
包含头文件
#include <CL/cl.h> #include "dlopencl.h"
-
使用 Logcat 打印日志
由于 C 语言工作在 JNI 层,无法获取控制台,导致了
printf()
函数失效,这里使用__android_log_print
方法打印日志到 Logcat#include <android/log.h> #define DEBUG #ifdef DEBUG #define LOG "LOG-TAG" #define LOGD(...) __android_log_print(ANDROID_LOG_DEBUG, LOG, __VA_ARGS__) #define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG, __VA_ARGS__) #define LOGW(...) __android_log_print(ANDROID_LOG_WARN, LOG, __VA_ARGS__) #define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG, __VA_ARGS__) #define LOGF(...) __android_log_print(ANDROID_LOG_FATAL, LOG, __VA_ARGS__) #else #define LOG #define LOGD(...) #define LOGI(...) #define LOGW(...) #define LOGE(...) #define LOGF(...) #endif
使用方法同
printf()
,例:LOGI("Device ID: %d", device_id);
-
创建用于初始化 OpenCL 的 JNI 接口
建议在同一个文件中编写多个操作 OpenCL 的函数,这里的初始化实际上是将操作 OpenCL 的变量建立为全局变量,通过 JNI 一次调用后其他函数再使用这些被初始化过的变量
定义变量
cl_uint num_device; cl_uint num_platform; cl_platform_id *platform; cl_device_id *devices; cl_int err; cl_context context;
extern "C" JNIEXPORT jint JNICALL Java_com_example_opencltest_MainActivity_initOpencl(JNIEnv *env, jobject instance) { initFns(); LOGI("getPlatformNum"); // 获取可用平台数量 err = clGetPlatformIDs(0, 0, &num_platform); platform = (cl_platform_id*)malloc(sizeof(cl_platform_id)*num_platform); LOGI("getPlatformIDs"); // 获取平台 ID err = clGetPlatformIDs(num_platform, platform, nullptr); if(err < 0) { LOGE("clGetPlatformIDs failed"); return -1; } LOGI("getDeviceNum"); // 获取可用设备数量 err = clGetDeviceIDs(platform[0], CL_DEVICE_TYPE_GPU, 0, nullptr, &num_device); devices = (cl_device_id*)malloc(sizeof(cl_device_id)*num_device); LOGI("getDeviceIDs"); // 获取设备 ID err = clGetDeviceIDs(platform[0], CL_DEVICE_TYPE_GPU, num_device, devices, nullptr); if (err < 0) { LOGE("clGetDeviceIDs failed"); return -1; } return 0; }
使用 OpenCL 执行 Kernel 函数
通常编写 OpenCL 中的 Kernel 函数要求单独写在 *.cl
文件中,调用时使用 fopen()
打开,将里面的代码作为字符串读入之后再进行 runtime 编译,比较麻烦,这里直接将整个 Kernel 函数写成字符串
本例中因为测试原因没有释放掉创建的资源
可以使用允许正则替换的编辑器例如 VSCode,先写好 Kernel 函数,然后使用正则表达式
(.*)
选择所有行并将每一行作为一个参数($1
),替换为"$1\\n"
,例如替换前:int a = 0;
,替换后:"int a = 0;\n"
注1: 经测试发现 OpenCL 在 PC 端允许每次调用任意数量的矢量数据,而在开发板上只允许一次调用 1.2.3.4.8.16 个
PC 端允许的矢量调用:
Integralgraph.s0
Integralgraph.s01
Integralgraph.s012
Integralgraph.s0123
Integralgraph.s01234
Integralgraph.s0123456
Integralgraph.s01234567
Integralgraph.s012345678
Integralgraph.s0123456789
Integralgraph.s0123456789a
Integralgraph.s0123456789ab
Integralgraph.s0123456789abc
Integralgraph.s0123456789abcd
Integralgraph.s0123456789abcde
Integralgraph.s0123456789abcdef
开发板允许的矢量调用:
Integralgraph.s0
Integralgraph.s01
Integralgraph.s012
Integralgraph.s0123
Integralgraph.s01234567
Integralgraph.s0123456789abcdef
所以在编写运行于 AARCH64 架构的 OpenCL Kernel 函数时需要将被调用的矢量数组分组相加
例如在 PC 端为
TableInteg1 = (int16)(Integralgraph[index1].sf*flagx0, Integralgraph[index4].s0123456789abcde);
在开发板中需要改为
TableInteg1 = (int16)(Integralgraph[index1].sf*flagx0, Integralgraph[index4].s01234567, Integralgraph[index4].s89ab, Integralgraph[index4].scde);
注2: 测试中发现 PC 端的 OpenCL 遇到数组越界问题会直接跳过不予处理,但在 AARCH64 中会导致执行失败
注3: AARCH64 平台上 OpenCL 能申请到的可调用内存远小于 PC 端,如果出现输入或输出的数据超出 OpenCL 申请到的内存可能会导致数据输出不完整,读取数据时错误代码返回 -14 等问题
先在 Activity 中调用 initOpencl(),初始化 OpenCL 及其平台和设备
// 定义积分图宽度
#define CLL_IMAGE_W (400)
// 定义积分图高度
#define CLL_IMAGE_H (80)
// 构建 Kernel 函数字符串
const char *clkernel[] = {
"__kernel void kernel_Integralgraph_45int(__global int16 * grayImage,\n"
" __global int16 * Integralgraph,\n"
" __global unsigned * const p_height) {\n"
" int x = get_global_id(0);\n"
" int height = *p_height;\n"
" int width = get_global_size(0);\n"
" __local int index, index1, index2, index3, index4, index5;\n"
" __local bool flagx0, flagxw, flagy1, flagy2;\n"
" __local int16 TableInteg1, TableInteg2, TableInteg3;\n"
" flagx0 = min(0, -x);\n"
" flagxw = min(0, x - width + 1);\n"
" for (int j = 0; j < height; j++) {\n"
" flagy1 = min(0, -j);\n"
" flagy2 = min(0, 1 - j);\n"
" index = j * width + x;\n"
" if(j==0) {\n"
" Integralgraph[index] = (int16)grayImage[index];\n"
" } else if(j==1) {\n"
" index1 = (j - 1)*width + x - 1;\n"
" index3 = (j - 1)*width + x + 1;\n"
" index4 = (j - 1)*width + x;\n"
" TableInteg1 = (int16)(Integralgraph[index1].sf*flagx0, Integralgraph[index4].s01234567, Integralgraph[index4].s89ab, Integralgraph[index4].scde);\n"
" TableInteg3 = (int16)(Integralgraph[index4].s12345678, Integralgraph[index4].s9abc, Integralgraph[index4].sdef, Integralgraph[index3].s0*flagxw);\n"
" Integralgraph[index] = (int16)((int16)TableInteg1*flagy1 + (int16)TableInteg3*flagy1 + (int16)grayImage[index4] * flagy1 + (int16)grayImage[index]);\n"
" } else {\n"
" index1 = (j - 1)*width + x - 1;\n"
" index2 = (j - 2)*width + x;\n"
" index3 = (j - 1)*width + x + 1;\n"
" index4 = (j - 1)*width + x;\n"
" TableInteg1 = (int16)(Integralgraph[index1].sf*flagx0, Integralgraph[index4].s01234567, Integralgraph[index4].s89ab, Integralgraph[index4].scde);\n"
" TableInteg2 = (int16)(Integralgraph[index2].s0*flagx0, Integralgraph[index2].s12345678, Integralgraph[index2].s9ab, Integralgraph[index2].scde, Integralgraph[index2].sf*flagxw);\n"
" TableInteg3 = (int16)(Integralgraph[index4].s12345678, Integralgraph[index4].s9abc, Integralgraph[index4].sdef, Integralgraph[index3].s0*flagxw);\n"
" Integralgraph[index] = (int16)((int16)TableInteg1*flagy1 + (int16)TableInteg3*flagy1 - (int16)TableInteg2 * flagy2 + (int16)grayImage[index4] * flagy1 + (int16)grayImage[index]);\n"
" }\n"
" }\n"
"}\n"
};
// 创建 Context
context = clCreateContext(nullptr, 1, devices, nullptr, nullptr, &err);
if(err < 0) {
LOGE("Create context failed, error code: [%d]", err);
return -1;
}
int inputData[CLL_IMAGE_W * CLL_IMAGE_H];
int outputData[CLL_IMAGE_W * CLL_IMAGE_H];
int width = CLL_IMAGE_W;
int height = CLL_IMAGE_H;
// 初始化输入输出数据
for (int i = 0; i < height; i++) {
for (int j = 0; j < width; j++) {
inputData[i * width + j] = 1;
outputData[i * width + j] = 0;
}
}
clock_t time_start;
clock_t time_finish;
double total_time;
char *program_log;
cl_command_queue queue;
cl_kernel kernel;
size_t log_size;
cl_program program;
// 创建命令队列
queue = clCreateCommandQueue(context, devices[0], 0, &err);
if (err < 0) {
LOGE("Create command queue failed, error code: [%d]", err);
}
// 创建程序
program = clCreateProgramWithSource(context, sizeof(clkernel) / sizeof(clkernel[0]), clkernel, nullptr, nullptr);
// 构建/编译程序
err = clBuildProgram(program, 0, nullptr, nullptr, nullptr, nullptr);
if (err < 0) {
clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 0, nullptr, &log_size);
program_log = (char*)malloc(sizeof(log_size));
// 查询构建/编译过程中的 log
clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, log_size, program_log, nullptr);
LOGE("program_build_info: \n[%s]\n", program_log);
free(program_log);
}
// 创建 Kernel
kernel = clCreateKernel(program, "kernel_Integralgraph_45int", &err);
if (err < 0) {
LOGE("Create kernel failed, error code: [%d]", err);
}
// 创建用于输入参数、输出数据的内存空间
cl_mem meminput_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int) * height * width, inputData, &err);
cl_mem memoutput_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(int) * height * width, nullptr, &err);
cl_mem memHeight_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int), &height, &err);
// 向 Kernel 传递参数
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &meminput_buffer);
err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &memoutput_buffer);
err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &memHeight_buffer);
size_t global_work_offset[2] = {0, 0};
size_t localThreads[2] = {1, 1};
size_t globalThreads[2] = {(size_t)(width / 16), 1};
// 获得程序开始执行的时间戳
time_start = clock();
// 排布工作组与工作项并执行
err = clEnqueueNDRangeKernel(queue, kernel, 2, global_work_offset, globalThreads, localThreads, 0, nullptr, nullptr);
if (err < 0) {
LOGE("Run Kernel failed, error code: [%d]", err);
}
// 获得程序执行完成的时间戳
time_finish = clock();
// 计算运行时间
total_time = (double)(time_finish - time_start) / CLOCKS_PER_SEC;
LOGI("Total time: [%f]s", total_time);
// 从输出的内存空间中读取数据
err = clEnqueueReadBuffer(queue, memoutput_buffer, CL_TRUE, 0, sizeof(int) * height * width, outputData, 0, nullptr, nullptr);
if (err < 0) {
LOGE("Read buffer failed, error code: [%d]", err);
}
// 输出数据,积分图中的每一行拼接为一条 LOG
char outputDataTemp[2048];
for (int i = 0; i < height ; i++) {
// 清空字符串,拼接下一行
memset(outputDataTemp, 0x00, 2048);
for (int j = 0; j < width ; j++) {
// 将一行数据拼接在一个字符串中
sprintf(outputDataTemp, "%s %d", outputDataTemp, outputData[i * width + j]);
}
// LOG打印
LOGI("line [%d]\n%s", i, outputDataTemp);
}
附加
dlopencl.h
#ifndef __AOPENCL_CL_H
#define __AOPENCL_CL_H
#ifdef __APPLE__
#include <OpenCL/cl_platform.h>
#else
#include <CL/cl_platform.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
#define IAH()
//#define IAH() printf("File:%s, Line:%d\n",__FILE__, __LINE__);
void initFns();
/* Platform API */
#define clGetPlatformIDs aclGetPlatformIDs
cl_int
(*aclGetPlatformIDs)(cl_uint /* num_entries */,
cl_platform_id * /* platforms */,
cl_uint * /* num_platforms */);
#define clGetPlatformInfo aclGetPlatformInfo
cl_int
(*aclGetPlatformInfo)(cl_platform_id /* platform */,
cl_platform_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
/* Device APIs */
#define clGetDeviceIDs aclGetDeviceIDs
cl_int
(*aclGetDeviceIDs)(cl_platform_id /* platform */,
cl_device_type /* device_type */,
cl_uint /* num_entries */,
cl_device_id * /* devices */,
cl_uint * /* num_devices */);
#define clGetDeviceInfo aclGetDeviceInfo
cl_int
(*aclGetDeviceInfo)(cl_device_id /* device */,
cl_device_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
/* Context APIs */
#define clCreateContext aclCreateContext
cl_context
(*aclCreateContext)(const cl_context_properties * /* properties */,
cl_uint /* num_devices */,
const cl_device_id * /* devices */,
void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
void * /* user_data */,
cl_int * /* errcode_ret */);
#define clCreateContextFromType aclCreateContextFromType
cl_context
(*aclCreateContextFromType)(const cl_context_properties * /* properties */,
cl_device_type /* device_type */,
void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *),
void * /* user_data */,
cl_int * /* errcode_ret */);
#define clRetainContext aclRetainContext
cl_int
(*aclRetainContext)(cl_context /* context */);
#define clReleaseContext aclReleaseContext
cl_int
(*aclReleaseContext)(cl_context /* context */);
#define clGetContextInfo aclGetContextInfo
cl_int
(*aclGetContextInfo)(cl_context /* context */,
cl_context_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
/* Command Queue APIs */
#define clCreateCommandQueue aclCreateCommandQueue
cl_command_queue
(*aclCreateCommandQueue)(cl_context /* context */,
cl_device_id /* device */,
cl_command_queue_properties /* properties */,
cl_int * /* errcode_ret */);
#define clRetainCommandQueue aclRetainCommandQueue
cl_int
(*aclRetainCommandQueue)(cl_command_queue /* command_queue */);
#define clReleaseCommandQueue aclReleaseCommandQueue
cl_int
(*aclReleaseCommandQueue)(cl_command_queue /* command_queue */);
#define clGetCommandQueueInfo aclGetCommandQueueInfo
cl_int
(*aclGetCommandQueueInfo)(cl_command_queue /* command_queue */,
cl_command_queue_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
/* Memory Object APIs */
#define clCreateBuffer aclCreateBuffer
cl_mem
(*aclCreateBuffer)(cl_context /* context */,
cl_mem_flags /* flags */,
size_t /* size */,
void * /* host_ptr */,
cl_int * /* errcode_ret */);
#define clCreateSubBuffer aclCreateSubBuffer
cl_mem
(*aclCreateSubBuffer)(cl_mem /* buffer */,
cl_mem_flags /* flags */,
cl_buffer_create_type /* buffer_create_type */,
const void * /* buffer_create_info */,
cl_int * /* errcode_ret */);
#define clRetainMemObject aclRetainMemObject
cl_int
(*aclRetainMemObject)(cl_mem /* memobj */);
#define clReleaseMemObject aclReleaseMemObject
cl_int
(*aclReleaseMemObject)(cl_mem /* memobj */);
#define clGetSupportedImageFormats aclGetSupportedImageFormats
cl_int
(*aclGetSupportedImageFormats)(cl_context /* context */,
cl_mem_flags /* flags */,
cl_mem_object_type /* image_type */,
cl_uint /* num_entries */,
cl_image_format * /* image_formats */,
cl_uint * /* num_image_formats */);
#define clGetMemObjectInfo aclGetMemObjectInfo
cl_int
(*aclGetMemObjectInfo)(cl_mem /* memobj */,
cl_mem_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
#define clGetImageInfo aclGetImageInfo
cl_int
(*aclGetImageInfo)(cl_mem /* image */,
cl_image_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
#define clSetMemObjectDestructorCallback aclSetMemObjectDestructorCallback
cl_int
(*aclSetMemObjectDestructorCallback)( cl_mem /* memobj */,
void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
void * /*user_data */ );
/* Sampler APIs */
#define clCreateSampler aclCreateSampler
cl_sampler
(*aclCreateSampler)(cl_context /* context */,
cl_bool /* normalized_coords */,
cl_addressing_mode /* addressing_mode */,
cl_filter_mode /* filter_mode */,
cl_int * /* errcode_ret */);
#define clRetainSampler aclRetainSampler
cl_int
(*aclRetainSampler)(cl_sampler /* sampler */);
#define clReleaseSampler aclReleaseSampler
cl_int
(*aclReleaseSampler)(cl_sampler /* sampler */);
#define clGetSamplerInfo aclGetSamplerInfo
cl_int
(*aclGetSamplerInfo)(cl_sampler /* sampler */,
cl_sampler_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
/* Program Object APIs */
#define clCreateProgramWithSource aclCreateProgramWithSource
cl_program
(*aclCreateProgramWithSource)(cl_context /* context */,
cl_uint /* count */,
const char ** /* strings */,
const size_t * /* lengths */,
cl_int * /* errcode_ret */);
#define clCreateProgramWithBinary aclCreateProgramWithBinary
cl_program
(*aclCreateProgramWithBinary)(cl_context /* context */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const size_t * /* lengths */,
const unsigned char ** /* binaries */,
cl_int * /* binary_status */,
cl_int * /* errcode_ret */);
#define clRetainProgram aclRetainProgram
cl_int
(*aclRetainProgram)(cl_program /* program */);
#define clReleaseProgram aclReleaseProgram
cl_int
(*aclReleaseProgram)(cl_program /* program */);
#define clBuildProgram aclBuildProgram
cl_int
(*aclBuildProgram)(cl_program /* program */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const char * /* options */,
void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
void * /* user_data */);
#define clGetProgramInfo aclGetProgramInfo
cl_int
(*aclGetProgramInfo)(cl_program /* program */,
cl_program_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
#define clGetProgramBuildInfo aclGetProgramBuildInfo
cl_int
(*aclGetProgramBuildInfo)(cl_program /* program */,
cl_device_id /* device */,
cl_program_build_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
/* Kernel Object APIs */
#define clCreateKernel aclCreateKernel
cl_kernel
(*aclCreateKernel)(cl_program /* program */,
const char * /* kernel_name */,
cl_int * /* errcode_ret */);
#define clCreateKernelsInProgram aclCreateKernelsInProgram
cl_int
(*aclCreateKernelsInProgram)(cl_program /* program */,
cl_uint /* num_kernels */,
cl_kernel * /* kernels */,
cl_uint * /* num_kernels_ret */);
#define clRetainKernel aclRetainKernel
cl_int
(*aclRetainKernel)(cl_kernel /* kernel */);
#define clReleaseKernel aclReleaseKernel
cl_int
(*aclReleaseKernel)(cl_kernel /* kernel */);
#define clSetKernelArg aclSetKernelArg
cl_int
(*aclSetKernelArg)(cl_kernel /* kernel */,
cl_uint /* arg_index */,
size_t /* arg_size */,
const void * /* arg_value */);
#define clGetKernelInfo aclGetKernelInfo
cl_int
(*aclGetKernelInfo)(cl_kernel /* kernel */,
cl_kernel_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
#define clGetKernelWorkGroupInfo aclGetKernelWorkGroupInfo
cl_int
(*aclGetKernelWorkGroupInfo)(cl_kernel /* kernel */,
cl_device_id /* device */,
cl_kernel_work_group_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
/* Event Object APIs */
#define clWaitForEvents aclWaitForEvents
cl_int
(*aclWaitForEvents)(cl_uint /* num_events */,
const cl_event * /* event_list */);
#define clGetEventInfo aclGetEventInfo
cl_int
(*aclGetEventInfo)(cl_event /* event */,
cl_event_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
#define clCreateUserEvent aclCreateUserEvent
cl_event
(*aclCreateUserEvent)(cl_context /* context */,
cl_int * /* errcode_ret */);
#define clRetainEvent aclRetainEvent
cl_int
(*aclRetainEvent)(cl_event /* event */);
#define clReleaseEvent aclReleaseEvent
cl_int
(*aclReleaseEvent)(cl_event /* event */);
#define clSetUserEventStatus aclSetUserEventStatus
cl_int
(*aclSetUserEventStatus)(cl_event /* event */,
cl_int /* execution_status */);
#define clSetEventCallback aclSetEventCallback
cl_int
(*aclSetEventCallback)( cl_event /* event */,
cl_int /* command_exec_callback_type */,
void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
void * /* user_data */);
/* Profiling APIs */
#define clGetEventProfilingInfo aclGetEventProfilingInfo
cl_int
(*aclGetEventProfilingInfo)(cl_event /* event */,
cl_profiling_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */);
/* Flush and Finish APIs */
#define clFlush aclFlush
cl_int
(*aclFlush)(cl_command_queue /* command_queue */);
#define clFinish aclFinish
cl_int
(*aclFinish)(cl_command_queue /* command_queue */);
/* Enqueued Commands APIs */
#define clEnqueueReadBuffer aclEnqueueReadBuffer
cl_int
(*aclEnqueueReadBuffer)(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_read */,
size_t /* offset */,
size_t /* size */,
void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueReadBufferRect aclEnqueueReadBufferRect
cl_int
(*aclEnqueueReadBufferRect)(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_read */,
const size_t * /* buffer_offset */,
const size_t * /* host_offset */,
const size_t * /* region */,
size_t /* buffer_row_pitch */,
size_t /* buffer_slice_pitch */,
size_t /* host_row_pitch */,
size_t /* host_slice_pitch */,
void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueWriteBuffer aclEnqueueWriteBuffer
cl_int
(*aclEnqueueWriteBuffer)(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_write */,
size_t /* offset */,
size_t /* size */,
const void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueWriteBufferRect aclEnqueueWriteBufferRect
cl_int
(*aclEnqueueWriteBufferRect)(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_write */,
const size_t * /* buffer_offset */,
const size_t * /* host_offset */,
const size_t * /* region */,
size_t /* buffer_row_pitch */,
size_t /* buffer_slice_pitch */,
size_t /* host_row_pitch */,
size_t /* host_slice_pitch */,
const void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueCopyBuffer aclEnqueueCopyBuffer
cl_int
(*aclEnqueueCopyBuffer)(cl_command_queue /* command_queue */,
cl_mem /* src_buffer */,
cl_mem /* dst_buffer */,
size_t /* src_offset */,
size_t /* dst_offset */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueCopyBufferRect aclEnqueueCopyBufferRect
cl_int
(*aclEnqueueCopyBufferRect)(cl_command_queue /* command_queue */,
cl_mem /* src_buffer */,
cl_mem /* dst_buffer */,
const size_t * /* src_origin */,
const size_t * /* dst_origin */,
const size_t * /* region */,
size_t /* src_row_pitch */,
size_t /* src_slice_pitch */,
size_t /* dst_row_pitch */,
size_t /* dst_slice_pitch */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueReadImage aclEnqueueReadImage
cl_int
(*aclEnqueueReadImage)(cl_command_queue /* command_queue */,
cl_mem /* image */,
cl_bool /* blocking_read */,
const size_t * /* origin[3] */,
const size_t * /* region[3] */,
size_t /* row_pitch */,
size_t /* slice_pitch */,
void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueWriteImage aclEnqueueWriteImage
cl_int
(*aclEnqueueWriteImage)(cl_command_queue /* command_queue */,
cl_mem /* image */,
cl_bool /* blocking_write */,
const size_t * /* origin[3] */,
const size_t * /* region[3] */,
size_t /* input_row_pitch */,
size_t /* input_slice_pitch */,
const void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueCopyImage aclEnqueueCopyImage
cl_int
(*aclEnqueueCopyImage)(cl_command_queue /* command_queue */,
cl_mem /* src_image */,
cl_mem /* dst_image */,
const size_t * /* src_origin[3] */,
const size_t * /* dst_origin[3] */,
const size_t * /* region[3] */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueCopyImageToBuffer aclEnqueueCopyImageToBuffer
cl_int
(*aclEnqueueCopyImageToBuffer)(cl_command_queue /* command_queue */,
cl_mem /* src_image */,
cl_mem /* dst_buffer */,
const size_t * /* src_origin[3] */,
const size_t * /* region[3] */,
size_t /* dst_offset */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueCopyBufferToImage aclEnqueueCopyBufferToImage
cl_int
(*aclEnqueueCopyBufferToImage)(cl_command_queue /* command_queue */,
cl_mem /* src_buffer */,
cl_mem /* dst_image */,
size_t /* src_offset */,
const size_t * /* dst_origin[3] */,
const size_t * /* region[3] */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
void *
(*aclEnqueueMapBuffer)(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_map */,
cl_map_flags /* map_flags */,
size_t /* offset */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */,
cl_int * /* errcode_ret */);
void *
(*aclEnqueueMapImage)(cl_command_queue /* command_queue */,
cl_mem /* image */,
cl_bool /* blocking_map */,
cl_map_flags /* map_flags */,
const size_t * /* origin[3] */,
const size_t * /* region[3] */,
size_t * /* image_row_pitch */,
size_t * /* image_slice_pitch */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */,
cl_int * /* errcode_ret */);
#define clEnqueueUnmapMemObject aclEnqueueUnmapMemObject
cl_int
(*aclEnqueueUnmapMemObject)(cl_command_queue /* command_queue */,
cl_mem /* memobj */,
void * /* mapped_ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueNDRangeKernel aclEnqueueNDRangeKernel
cl_int
(*aclEnqueueNDRangeKernel)(cl_command_queue /* command_queue */,
cl_kernel /* kernel */,
cl_uint /* work_dim */,
const size_t * /* global_work_offset */,
const size_t * /* global_work_size */,
const size_t * /* local_work_size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueTask aclEnqueueTask
cl_int
(*aclEnqueueTask)(cl_command_queue /* command_queue */,
cl_kernel /* kernel */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#define clEnqueueNativeKernel aclEnqueueNativeKernel
cl_int
(*aclEnqueueNativeKernel)(cl_command_queue /* command_queue */,
void (CL_CALLBACK * /*user_func*/)(void *),
void * /* args */,
size_t /* cb_args */,
cl_uint /* num_mem_objects */,
const cl_mem * /* mem_list */,
const void ** /* args_mem_loc */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */);
#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
//#warning CL_USE_DEPRECATED_OPENCL_1_0_APIS is defined. These APIs are unsupported and untested in OpenCL 1.1!
/*
* WARNING:
* This API introduces mutable state into the OpenCL implementation. It has been REMOVED
* to better facilitate thread safety. The 1.0 API is not thread safe. It is not tested by the
* OpenCL 1.1 conformance test, and consequently may not work or may not work dependably.
* It is likely to be non-performant. Use of this API is not advised. Use at your own risk.
*
* Software developers previously relying on this API are instructed to set the command queue
* properties when creating the queue, instead.
*/
#define clSetCommandQueueProperty aclSetCommandQueueProperty
cl_int
(*aclSetCommandQueueProperty)(cl_command_queue /* command_queue */,
cl_command_queue_properties /* properties */,
cl_bool /* enable */,
cl_command_queue_properties * /* old_properties */);
#endif /* CL_USE_DEPRECATED_OPENCL_1_0_APIS */
#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
#define clCreateImage2D aclCreateImage2D
cl_mem
(*aclCreateImage2D)(cl_context /* context */,
cl_mem_flags /* flags */,
const cl_image_format * /* image_format */,
size_t /* image_width */,
size_t /* image_height */,
size_t /* image_row_pitch */,
void * /* host_ptr */,
cl_int * /* errcode_ret */);
#define clCreateImage3D aclCreateImage3D
cl_mem
(*aclCreateImage3D)(cl_context /* context */,
cl_mem_flags /* flags */,
const cl_image_format * /* image_format */,
size_t /* image_width */,
size_t /* image_height */,
size_t /* image_depth */,
size_t /* image_row_pitch */,
size_t /* image_slice_pitch */,
void * /* host_ptr */,
cl_int * /* errcode_ret */);
#define clEnqueueMarker aclEnqueueMarker
cl_int
(*aclEnqueueMarker)(cl_command_queue /* command_queue */,
cl_event * /* event */);
#define clEnqueueWaitForEvents aclEnqueueWaitForEvents
cl_int
(*aclEnqueueWaitForEvents)(cl_command_queue /* command_queue */,
cl_uint /* num_events */,
const cl_event * /* event_list */);
#define clEnqueueBarrier aclEnqueueBarrier
cl_int
(*aclEnqueueBarrier)(cl_command_queue /* command_queue */);
#define clUnloadCompiler aclUnloadCompiler
cl_int
(*aclUnloadCompiler)(void);
void *
(*aclGetExtensionFunctionAddress)(const char * /* func_name */);
#endif
#ifdef __cplusplus
}
#endif
#endif /* __AOPENCL_CL_H */
dlopencl.cpp
根据实际情况自行修改 so_path 的路径
#define CL_TARGET_OPENCL_VERSION 120
#include "CL/cl.h"
#include <dlfcn.h>
#include <cstdio>
#include <string.h>
#include "dlopencl.h"
int loadedCL;
void *getCLHandle() {
LOGD("get_handle");
void *res = nullptr;
char *so_path = (char*)"/system/vendor/lib64/egl/libGLES_mali.so";
res = dlopen(so_path, RTLD_LAZY);
if (res == nullptr) {
LOGD("Open library failed");
} else {
LOGD("Loaded library name: [%s]", so_path);
}
return res;
}
void initFns() {
loadedCL = 0;
void *handle = getCLHandle();
if (handle == nullptr) return;
/* Platform API */
IAH();
aclGetPlatformIDs = (cl_int (*)(cl_uint /* num_entries */,
cl_platform_id * /* platforms */,
cl_uint * /* num_platforms */)) dlsym(handle, "clGetPlatformIDs");
IAH();
aclGetPlatformInfo = (cl_int (*)(cl_platform_id /* platform */,
cl_platform_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetPlatformInfo");
/* Device APIs */
IAH();
aclGetDeviceIDs = (cl_int (*)(cl_platform_id /* platform */,
cl_device_type /* device_type */,
cl_uint /* num_entries */,
cl_device_id * /* devices */,
cl_uint * /* num_devices */)) dlsym(handle, "clGetDeviceIDs");
IAH();
aclGetDeviceInfo = (cl_int (*)(cl_device_id /* device */,
cl_device_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetDeviceInfo");
/* Context APIs */
IAH();
aclCreateContext = (cl_context (*)(const cl_context_properties * /* properties */,
cl_uint /* num_devices */,
const cl_device_id * /* devices */,
void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
void * /* user_data */,
cl_int * /* errcode_ret */)) dlsym(handle, "clCreateContext");
IAH();
aclCreateContextFromType = (cl_context (*)(const cl_context_properties * /* properties */,
cl_device_type /* device_type */,
void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *),
void * /* user_data */,
cl_int * /* errcode_ret */)) dlsym(handle, "clCreateContextFromType");
IAH();
aclRetainContext = (cl_int (*)(cl_context /* context */)) dlsym(handle, "clRetainContext");
IAH();
aclReleaseContext = (cl_int (*)(cl_context /* context */)) dlsym(handle, "clReleaseContext");
IAH();
aclGetContextInfo = (cl_int (*)(cl_context /* context */,
cl_context_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetContextInfo");
/* Command Queue APIs */
IAH();
aclCreateCommandQueue = (cl_command_queue (*)(cl_context /* context */,
cl_device_id /* device */,
cl_command_queue_properties /* properties */,
cl_int * /* errcode_ret */)) dlsym(handle, "clCreateCommandQueue");
IAH();
aclRetainCommandQueue = (cl_int (*)(cl_command_queue /* command_queue */)) dlsym(handle, "clRetainCommandQueue");
IAH();
aclReleaseCommandQueue = (cl_int (*)(cl_command_queue /* command_queue */)) dlsym(handle, "clReleaseCommandQueue");
IAH();
aclGetCommandQueueInfo = (cl_int (*)(cl_command_queue /* command_queue */,
cl_command_queue_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetCommandQueueInfo");
/* Memory Object APIs */
IAH();
aclCreateBuffer = (cl_mem (*)(cl_context /* context */,
cl_mem_flags /* flags */,
size_t /* size */,
void * /* host_ptr */,
cl_int * /* errcode_ret */)) dlsym(handle, "clCreateBuffer");
IAH();
aclCreateSubBuffer = (cl_mem (*)(cl_mem /* buffer */,
cl_mem_flags /* flags */,
cl_buffer_create_type /* buffer_create_type */,
const void * /* buffer_create_info */,
cl_int * /* errcode_ret */)) dlsym(handle, "clCreateSubBuffer");
IAH();
aclRetainMemObject = (cl_int (*)(cl_mem /* memobj */)) dlsym(handle, "clRetainMemObject");
IAH();
aclReleaseMemObject = (cl_int (*)(cl_mem /* memobj */)) dlsym(handle, "clReleaseMemObject");
IAH();
aclGetSupportedImageFormats = (cl_int (*)(cl_context /* context */,
cl_mem_flags /* flags */,
cl_mem_object_type /* image_type */,
cl_uint /* num_entries */,
cl_image_format * /* image_formats */,
cl_uint * /* num_image_formats */)) dlsym(handle, "clGetSupportedImageFormats");
IAH();
aclGetMemObjectInfo = (cl_int (*)(cl_mem /* memobj */,
cl_mem_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetMemObjectInfo");
IAH();
aclGetImageInfo = (cl_int (*)(cl_mem /* image */,
cl_image_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetImageInfo");
IAH();
aclSetMemObjectDestructorCallback = (cl_int (*)( cl_mem /* memobj */,
void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
void * /*user_data */ )) dlsym(handle, "clSetMemObjectDestructorCallback");
/* Sampler APIs */
IAH();
aclCreateSampler = (cl_sampler (*)(cl_context /* context */,
cl_bool /* normalized_coords */,
cl_addressing_mode /* addressing_mode */,
cl_filter_mode /* filter_mode */,
cl_int * /* errcode_ret */)) dlsym(handle, "clCreateSampler");
IAH();
aclRetainSampler = (cl_int (*)(cl_sampler /* sampler */)) dlsym(handle, "clRetainSampler");
IAH();
aclReleaseSampler = (cl_int (*)(cl_sampler /* sampler */)) dlsym(handle, "clReleaseSampler");
IAH();
aclGetSamplerInfo = (cl_int (*)(cl_sampler /* sampler */,
cl_sampler_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetSamplerInfo");
/* Program Object APIs */
IAH();
aclCreateProgramWithSource = (cl_program (*)(cl_context /* context */,
cl_uint /* count */,
const char ** /* strings */,
const size_t * /* lengths */,
cl_int * /* errcode_ret */)) dlsym(handle, "clCreateProgramWithSource");
IAH();
aclCreateProgramWithBinary = (cl_program (*)(cl_context /* context */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const size_t * /* lengths */,
const unsigned char ** /* binaries */,
cl_int * /* binary_status */,
cl_int * /* errcode_ret */)) dlsym(handle, "clCreateProgramWithBinary");
IAH();
aclRetainProgram = (cl_int (*)(cl_program /* program */)) dlsym(handle, "clRetainProgram");
IAH();
aclReleaseProgram = (cl_int (*)(cl_program /* program */)) dlsym(handle, "clReleaseProgram");
IAH();
aclBuildProgram = (cl_int (*)(cl_program /* program */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const char * /* options */,
void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
void * /* user_data */)) dlsym(handle, "clBuildProgram");
IAH();
aclGetProgramInfo = (cl_int (*)(cl_program /* program */,
cl_program_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetProgramInfo");
IAH();
aclGetProgramBuildInfo = (cl_int (*)(cl_program /* program */,
cl_device_id /* device */,
cl_program_build_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetProgramBuildInfo");
/* Kernel Object APIs */
IAH();
aclCreateKernel = (cl_kernel (*)(cl_program /* program */,
const char * /* kernel_name */,
cl_int * /* errcode_ret */)) dlsym(handle, "clCreateKernel");
IAH();
aclCreateKernelsInProgram = (cl_int (*)(cl_program /* program */,
cl_uint /* num_kernels */,
cl_kernel * /* kernels */,
cl_uint * /* num_kernels_ret */)) dlsym(handle, "clCreateKernelsInProgram");
IAH();
aclRetainKernel = (cl_int (*)(cl_kernel /* kernel */)) dlsym(handle, "clRetainKernel");
IAH();
aclReleaseKernel = (cl_int (*)(cl_kernel /* kernel */)) dlsym(handle, "clReleaseKernel");
IAH();
aclSetKernelArg = (cl_int (*)(cl_kernel /* kernel */,
cl_uint /* arg_index */,
size_t /* arg_size */,
const void * /* arg_value */)) dlsym(handle, "clSetKernelArg");
IAH();
aclGetKernelInfo = (cl_int (*)(cl_kernel /* kernel */,
cl_kernel_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetKernelInfo");
IAH();
aclGetKernelWorkGroupInfo = (cl_int (*)(cl_kernel /* kernel */,
cl_device_id /* device */,
cl_kernel_work_group_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetKernelWorkGroupInfo");
/* Event Object APIs */
IAH();
aclWaitForEvents = (cl_int (*)(cl_uint /* num_events */,
const cl_event * /* event_list */)) dlsym(handle, "clWaitForEvents");
IAH();
aclGetEventInfo = (cl_int (*)(cl_event /* event */,
cl_event_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetEventInfo");
IAH();
aclCreateUserEvent = (cl_event (*)(cl_context /* context */,
cl_int * /* errcode_ret */)) dlsym(handle, "clCreateUserEvent");
IAH();
aclRetainEvent = (cl_int (*)(cl_event /* event */)) dlsym(handle, "clRetainEvent");
IAH();
aclReleaseEvent = (cl_int (*)(cl_event /* event */)) dlsym(handle, "clReleaseEvent");
IAH();
aclSetUserEventStatus = (cl_int (*)(cl_event /* event */,
cl_int /* execution_status */)) dlsym(handle, "clSetUserEventStatus");
IAH();
aclSetEventCallback = (cl_int (*)( cl_event /* event */,
cl_int /* command_exec_callback_type */,
void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
void * /* user_data */)) dlsym(handle, "clSetEventCallback");
/* Profiling APIs */
IAH();
aclGetEventProfilingInfo = (cl_int (*)(cl_event /* event */,
cl_profiling_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */)) dlsym(handle, "clGetEventProfilingInfo");
/* Flush and Finish APIs */
IAH();
aclFlush = (cl_int (*)(cl_command_queue /* command_queue */)) dlsym(handle, "clFlush");
IAH();
aclFinish = (cl_int (*)(cl_command_queue /* command_queue */)) dlsym(handle, "clFinish");
/* Enqueued Commands APIs */
IAH();
aclEnqueueReadBuffer = (cl_int (*)(cl_command_queue /* command_queue */,