From e6428b55b5760ce696da994f49472a8a5b01a7cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Tue, 21 Mar 2017 11:17:07 +0100 Subject: [PATCH] Upgrade cl.hpp to v2.0.10 --- libethash-cl/CL/cl.hpp | 13054 ++++++++++++------------------- libethash-cl/ethash_cl_miner.h | 17 +- 2 files changed, 4872 insertions(+), 8199 deletions(-) diff --git a/libethash-cl/CL/cl.hpp b/libethash-cl/CL/cl.hpp index 5c9be5c5e..0d6e805a0 100644 --- a/libethash-cl/CL/cl.hpp +++ b/libethash-cl/CL/cl.hpp @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2008-2015 The Khronos Group Inc. + * Copyright (c) 2008-2016 The Khronos Group Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and/or associated documentation files (the @@ -12,6 +12,11 @@ * The above copyright notice and this permission notice shall be included * in all copies or substantial portions of the Materials. * + * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS + * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS + * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT + * https://www.khronos.org/registry/ + * * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. @@ -23,24 +28,42 @@ /*! \file * - * \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33) and - * OpenCL 1.2 (rev 15) - * \author Benedict R. Gaster, Laurent Morichetti and Lee Howes - * - * Additions and fixes from: - * Brian Cole, March 3rd 2010 and April 2012 + * \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33), + * OpenCL 1.2 (rev 15) and OpenCL 2.0 (rev 29) + * \author Lee Howes and Bruce Merry + * + * Derived from the OpenCL 1.x C++ bindings written by + * Benedict R. Gaster, Laurent Morichetti and Lee Howes + * With additions and fixes from: + * Brian Cole, March 3rd 2010 and April 2012 * Matt Gruenke, April 2012. * Bruce Merry, February 2013. * Tom Deakin and Simon McIntosh-Smith, July 2013 - * - * \version 1.2.7 - * \date January 2015 + * James Price, 2015- + * + * \version 2.0.10 + * \date 2016-07-20 * * Optional extension support * - * cl * cl_ext_device_fission - * #define USE_CL_DEVICE_FISSION + * #define CL_HPP_USE_CL_DEVICE_FISSION + * cl_khr_d3d10_sharing + * #define CL_HPP_USE_DX_INTEROP + * cl_khr_sub_groups + * #define CL_HPP_USE_CL_SUB_GROUPS_KHR + * + * Doxygen documentation for this header is available here: + * + * http://khronosgroup.github.io/OpenCL-CLHPP/ + * + * The latest version of this header can be found on the GitHub releases page: + * + * https://github.com/KhronosGroup/OpenCL-CLHPP/releases + * + * Bugs and patches can be submitted to the GitHub repository: + * + * https://github.com/KhronosGroup/OpenCL-CLHPP */ /*! \mainpage @@ -48,20 +71,140 @@ * For many large applications C++ is the language of choice and so it seems * reasonable to define C++ bindings for OpenCL. * - * - * The interface is contained with a single C++ header file \em cl.hpp and all + * The interface is contained with a single C++ header file \em cl2.hpp and all * definitions are contained within the namespace \em cl. There is no additional * requirement to include \em cl.h and to use either the C++ or original C - * bindings it is enough to simply include \em cl.hpp. + * bindings; it is enough to simply include \em cl2.hpp. * * The bindings themselves are lightweight and correspond closely to the * underlying C API. Using the C++ bindings introduces no additional execution * overhead. * - * For detail documentation on the bindings see: + * There are numerous compatibility, portability and memory management + * fixes in the new header as well as additional OpenCL 2.0 features. + * As a result the header is not directly backward compatible and for this + * reason we release it as cl2.hpp rather than a new version of cl.hpp. + * + * + * \section compatibility Compatibility + * Due to the evolution of the underlying OpenCL API the 2.0 C++ bindings + * include an updated approach to defining supported feature versions + * and the range of valid underlying OpenCL runtime versions supported. + * + * The combination of preprocessor macros CL_HPP_TARGET_OPENCL_VERSION and + * CL_HPP_MINIMUM_OPENCL_VERSION control this range. These are three digit + * decimal values representing OpenCL runime versions. The default for + * the target is 200, representing OpenCL 2.0 and the minimum is also + * defined as 200. These settings would use 2.0 API calls only. + * If backward compatibility with a 1.2 runtime is required, the minimum + * version may be set to 120. + * + * Note that this is a compile-time setting, and so affects linking against + * a particular SDK version rather than the versioning of the loaded runtime. + * + * The earlier versions of the header included basic vector and string + * classes based loosely on STL versions. These were difficult to + * maintain and very rarely used. For the 2.0 header we now assume + * the presence of the standard library unless requested otherwise. + * We use std::array, std::vector, std::shared_ptr and std::string + * throughout to safely manage memory and reduce the chance of a + * recurrance of earlier memory management bugs. + * + * These classes are used through typedefs in the cl namespace: + * cl::array, cl::vector, cl::pointer and cl::string. + * In addition cl::allocate_pointer forwards to std::allocate_shared + * by default. + * In all cases these standard library classes can be replaced with + * custom interface-compatible versions using the CL_HPP_NO_STD_ARRAY, + * CL_HPP_NO_STD_VECTOR, CL_HPP_NO_STD_UNIQUE_PTR and + * CL_HPP_NO_STD_STRING macros. + * + * The OpenCL 1.x versions of the C++ bindings included a size_t wrapper + * class to interface with kernel enqueue. This caused unpleasant interactions + * with the standard size_t declaration and led to namespacing bugs. + * In the 2.0 version we have replaced this with a std::array-based interface. + * However, the old behaviour can be regained for backward compatibility + * using the CL_HPP_ENABLE_SIZE_T_COMPATIBILITY macro. + * + * Finally, the program construction interface used a clumsy vector-of-pairs + * design in the earlier versions. We have replaced that with a cleaner + * vector-of-vectors and vector-of-strings design. However, for backward + * compatibility old behaviour can be regained with the + * CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY macro. + * + * In OpenCL 2.0 OpenCL C is not entirely backward compatibility with + * earlier versions. As a result a flag must be passed to the OpenCL C + * compiled to request OpenCL 2.0 compilation of kernels with 1.2 as + * the default in the absence of the flag. + * In some cases the C++ bindings automatically compile code for ease. + * For those cases the compilation defaults to OpenCL C 2.0. + * If this is not wanted, the CL_HPP_CL_1_2_DEFAULT_BUILD macro may + * be specified to assume 1.2 compilation. + * If more fine-grained decisions on a per-kernel bases are required + * then explicit build operations that take the flag should be used. + * + * + * \section parameterization Parameters + * This header may be parameterized by a set of preprocessor macros. + * + * - CL_HPP_TARGET_OPENCL_VERSION + * + * Defines the target OpenCL runtime version to build the header + * against. Defaults to 200, representing OpenCL 2.0. + * + * - CL_HPP_NO_STD_STRING + * + * Do not use the standard library string class. cl::string is not + * defined and may be defined by the user before cl2.hpp is + * included. + * + * - CL_HPP_NO_STD_VECTOR + * + * Do not use the standard library vector class. cl::vector is not + * defined and may be defined by the user before cl2.hpp is + * included. + * + * - CL_HPP_NO_STD_ARRAY + * + * Do not use the standard library array class. cl::array is not + * defined and may be defined by the user before cl2.hpp is + * included. + * + * - CL_HPP_NO_STD_UNIQUE_PTR + * + * Do not use the standard library unique_ptr class. cl::pointer and + * the cl::allocate_pointer functions are not defined and may be + * defined by the user before cl2.hpp is included. + * + * - CL_HPP_ENABLE_DEVICE_FISSION + * + * Enables device fission for OpenCL 1.2 platforms. + * + * - CL_HPP_ENABLE_EXCEPTIONS + * + * Enable exceptions for use in the C++ bindings header. This is the + * preferred error handling mechanism but is not required. + * + * - CL_HPP_ENABLE_SIZE_T_COMPATIBILITY + * + * Backward compatibility option to support cl.hpp-style size_t + * class. Replaces the updated std::array derived version and + * removal of size_t from the namespace. Note that in this case the + * new size_t class is placed in the cl::compatibility namespace and + * thus requires an additional using declaration for direct backward + * compatibility. + * + * - CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY + * + * Enable older vector of pairs interface for construction of + * programs. + * + * - CL_HPP_CL_1_2_DEFAULT_BUILD + * + * Default to OpenCL C 1.2 compilation rather than OpenCL C 2.0 + * applies to use of cl::Program construction and other program + * build variants. * - * The OpenCL C++ Wrapper API 1.2 (revision 09) - * http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.2.pdf * * \section example Example * @@ -71,85 +214,268 @@ * decriptions of these features. * * \code - * #define __CL_ENABLE_EXCEPTIONS - * - * #if defined(__APPLE__) || defined(__MACOSX) - * #include - * #else - * #include - * #endif - * #include - * #include - * #include - * - * const char * helloStr = "__kernel void " - * "hello(void) " - * "{ " - * " " - * "} "; - * - * int - * main(void) - * { - * cl_int err = CL_SUCCESS; - * try { - * - * std::vector platforms; - * cl::Platform::get(&platforms); - * if (platforms.size() == 0) { - * std::cout << "Platform size 0\n"; - * return -1; - * } + #define CL_HPP_ENABLE_EXCEPTIONS + #define CL_HPP_TARGET_OPENCL_VERSION 200 + + #include + #include + #include + #include + #include + + const int numElements = 32; + + int main(void) + { + // Filter for a 2.0 platform and set it as the default + std::vector platforms; + cl::Platform::get(&platforms); + cl::Platform plat; + for (auto &p : platforms) { + std::string platver = p.getInfo(); + if (platver.find("OpenCL 2.") != std::string::npos) { + plat = p; + } + } + if (plat() == 0) { + std::cout << "No OpenCL 2.0 platform found."; + return -1; + } + + cl::Platform newP = cl::Platform::setDefault(plat); + if (newP != plat) { + std::cout << "Error setting default platform."; + return -1; + } + + // Use C++11 raw string literals for kernel source code + std::string kernel1{R"CLC( + global int globalA; + kernel void updateGlobal() + { + globalA = 75; + } + )CLC"}; + std::string kernel2{R"CLC( + typedef struct { global int *bar; } Foo; + kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB, + global int *output, int val, write_only pipe int outPipe, queue_t childQueue) + { + output[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar); + write_pipe(outPipe, &val); + queue_t default_queue = get_default_queue(); + ndrange_t ndrange = ndrange_1D(get_global_size(0)/2, get_global_size(0)/2); + + // Have a child kernel write into third quarter of output + enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, + ^{ + output[get_global_size(0)*2 + get_global_id(0)] = + inputA[get_global_size(0)*2 + get_global_id(0)] + inputB[get_global_size(0)*2 + get_global_id(0)] + globalA; + }); + + // Have a child kernel write into last quarter of output + enqueue_kernel(childQueue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, + ^{ + output[get_global_size(0)*3 + get_global_id(0)] = + inputA[get_global_size(0)*3 + get_global_id(0)] + inputB[get_global_size(0)*3 + get_global_id(0)] + globalA + 2; + }); + } + )CLC"}; + + // New simpler string interface style + std::vector programStrings {kernel1, kernel2}; + + cl::Program vectorAddProgram(programStrings); + try { + vectorAddProgram.build("-cl-std=CL2.0"); + } + catch (...) { + // Print build info for all devices + cl_int buildErr = CL_SUCCESS; + auto buildInfo = vectorAddProgram.getBuildInfo(&buildErr); + for (auto &pair : buildInfo) { + std::cerr << pair.second << std::endl << std::endl; + } + + return 1; + } + + typedef struct { int *bar; } Foo; + + // Get and run kernel that initializes the program-scope global + // A test for kernels that take no arguments + auto program2Kernel = + cl::KernelFunctor<>(vectorAddProgram, "updateGlobal"); + program2Kernel( + cl::EnqueueArgs( + cl::NDRange(1))); + + ////////////////// + // SVM allocations + + auto anSVMInt = cl::allocate_svm>(); + *anSVMInt = 5; + cl::SVMAllocator>> svmAllocReadOnly; + auto fooPointer = cl::allocate_pointer(svmAllocReadOnly); + fooPointer->bar = anSVMInt.get(); + cl::SVMAllocator> svmAlloc; + std::vector>> inputA(numElements, 1, svmAlloc); + cl::coarse_svm_vector inputB(numElements, 2, svmAlloc); + + // + ////////////// + + // Traditional cl_mem allocations + std::vector output(numElements, 0xdeadbeef); + cl::Buffer outputBuffer(begin(output), end(output), false); + cl::Pipe aPipe(sizeof(cl_int), numElements / 2); + + // Default command queue, also passed in as a parameter + cl::DeviceCommandQueue defaultDeviceQueue = cl::DeviceCommandQueue::makeDefault( + cl::Context::getDefault(), cl::Device::getDefault()); + + auto vectorAddKernel = + cl::KernelFunctor< + decltype(fooPointer)&, + int*, + cl::coarse_svm_vector&, + cl::Buffer, + int, + cl::Pipe&, + cl::DeviceCommandQueue + >(vectorAddProgram, "vectorAdd"); + + // Ensure that the additional SVM pointer is available to the kernel + // This one was not passed as a parameter + vectorAddKernel.setSVMPointers(anSVMInt); + + // Hand control of coarse allocations to runtime + cl::enqueueUnmapSVM(anSVMInt); + cl::enqueueUnmapSVM(fooPointer); + cl::unmapSVM(inputB); + cl::unmapSVM(output2); + + cl_int error; + vectorAddKernel( + cl::EnqueueArgs( + cl::NDRange(numElements/2), + cl::NDRange(numElements/2)), + fooPointer, + inputA.data(), + inputB, + outputBuffer, + 3, + aPipe, + defaultDeviceQueue, + error + ); + + cl::copy(outputBuffer, begin(output), end(output)); + // Grab the SVM output vector using a map + cl::mapSVM(output2); + + cl::Device d = cl::Device::getDefault(); + + std::cout << "Output:\n"; + for (int i = 1; i < numElements; ++i) { + std::cout << "\t" << output[i] << "\n"; + } + std::cout << "\n\n"; + + return 0; + } * - * cl_context_properties properties[] = - * { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0}; - * cl::Context context(CL_DEVICE_TYPE_CPU, properties); - * - * std::vector devices = context.getInfo(); - * - * cl::Program::Sources source(1, - * std::make_pair(helloStr,strlen(helloStr))); - * cl::Program program_ = cl::Program(context, source); - * program_.build(devices); - * - * cl::Kernel kernel(program_, "hello", &err); - * - * cl::Event event; - * cl::CommandQueue queue(context, devices[0], 0, &err); - * queue.enqueueNDRangeKernel( - * kernel, - * cl::NullRange, - * cl::NDRange(4,4), - * cl::NullRange, - * NULL, - * &event); - * - * event.wait(); - * } - * catch (cl::Error err) { - * std::cerr - * << "ERROR: " - * << err.what() - * << "(" - * << err.err() - * << ")" - * << std::endl; - * } - * - * return EXIT_SUCCESS; - * } - * * \endcode * */ #ifndef CL_HPP_ #define CL_HPP_ +/* Handle deprecated preprocessor definitions. In each case, we only check for + * the old name if the new name is not defined, so that user code can define + * both and hence work with either version of the bindings. + */ +#if !defined(CL_HPP_USE_DX_INTEROP) && defined(USE_DX_INTEROP) +# pragma message("cl2.hpp: USE_DX_INTEROP is deprecated. Define CL_HPP_USE_DX_INTEROP instead") +# define CL_HPP_USE_DX_INTEROP +#endif +#if !defined(CL_HPP_USE_CL_DEVICE_FISSION) && defined(USE_CL_DEVICE_FISSION) +# pragma message("cl2.hpp: USE_CL_DEVICE_FISSION is deprecated. Define CL_HPP_USE_CL_DEVICE_FISSION instead") +# define CL_HPP_USE_CL_DEVICE_FISSION +#endif +#if !defined(CL_HPP_ENABLE_EXCEPTIONS) && defined(__CL_ENABLE_EXCEPTIONS) +# pragma message("cl2.hpp: __CL_ENABLE_EXCEPTIONS is deprecated. Define CL_HPP_ENABLE_EXCEPTIONS instead") +# define CL_HPP_ENABLE_EXCEPTIONS +#endif +#if !defined(CL_HPP_NO_STD_VECTOR) && defined(__NO_STD_VECTOR) +# pragma message("cl2.hpp: __NO_STD_VECTOR is deprecated. Define CL_HPP_NO_STD_VECTOR instead") +# define CL_HPP_NO_STD_VECTOR +#endif +#if !defined(CL_HPP_NO_STD_STRING) && defined(__NO_STD_STRING) +# pragma message("cl2.hpp: __NO_STD_STRING is deprecated. Define CL_HPP_NO_STD_STRING instead") +# define CL_HPP_NO_STD_STRING +#endif +#if defined(VECTOR_CLASS) +# pragma message("cl2.hpp: VECTOR_CLASS is deprecated. Alias cl::vector instead") +#endif +#if defined(STRING_CLASS) +# pragma message("cl2.hpp: STRING_CLASS is deprecated. Alias cl::string instead.") +#endif +#if !defined(CL_HPP_USER_OVERRIDE_ERROR_STRINGS) && defined(__CL_USER_OVERRIDE_ERROR_STRINGS) +# pragma message("cl2.hpp: __CL_USER_OVERRIDE_ERROR_STRINGS is deprecated. Define CL_HPP_USER_OVERRIDE_ERROR_STRINGS instead") +# define CL_HPP_USER_OVERRIDE_ERROR_STRINGS +#endif + +/* Warn about features that are no longer supported + */ +#if defined(__USE_DEV_VECTOR) +# pragma message("cl2.hpp: __USE_DEV_VECTOR is no longer supported. Expect compilation errors") +#endif +#if defined(__USE_DEV_STRING) +# pragma message("cl2.hpp: __USE_DEV_STRING is no longer supported. Expect compilation errors") +#endif + +/* Detect which version to target */ +#if !defined(CL_HPP_TARGET_OPENCL_VERSION) +# pragma message("cl2.hpp: CL_HPP_TARGET_OPENCL_VERSION is not defined. It will default to 200 (OpenCL 2.0)") +# define CL_HPP_TARGET_OPENCL_VERSION 200 +#endif +#if CL_HPP_TARGET_OPENCL_VERSION != 100 && CL_HPP_TARGET_OPENCL_VERSION != 110 && CL_HPP_TARGET_OPENCL_VERSION != 120 && CL_HPP_TARGET_OPENCL_VERSION != 200 +# pragma message("cl2.hpp: CL_HPP_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120 or 200). It will be set to 200") +# undef CL_HPP_TARGET_OPENCL_VERSION +# define CL_HPP_TARGET_OPENCL_VERSION 200 +#endif + +#if !defined(CL_HPP_MINIMUM_OPENCL_VERSION) +# define CL_HPP_MINIMUM_OPENCL_VERSION 200 +#endif +#if CL_HPP_MINIMUM_OPENCL_VERSION != 100 && CL_HPP_MINIMUM_OPENCL_VERSION != 110 && CL_HPP_MINIMUM_OPENCL_VERSION != 120 && CL_HPP_MINIMUM_OPENCL_VERSION != 200 +# pragma message("cl2.hpp: CL_HPP_MINIMUM_OPENCL_VERSION is not a valid value (100, 110, 120 or 200). It will be set to 100") +# undef CL_HPP_MINIMUM_OPENCL_VERSION +# define CL_HPP_MINIMUM_OPENCL_VERSION 100 +#endif +#if CL_HPP_MINIMUM_OPENCL_VERSION > CL_HPP_TARGET_OPENCL_VERSION +# error "CL_HPP_MINIMUM_OPENCL_VERSION must not be greater than CL_HPP_TARGET_OPENCL_VERSION" +#endif + +#if CL_HPP_MINIMUM_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS) +# define CL_USE_DEPRECATED_OPENCL_1_0_APIS +#endif +#if CL_HPP_MINIMUM_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +# define CL_USE_DEPRECATED_OPENCL_1_1_APIS +#endif +#if CL_HPP_MINIMUM_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) +# define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#endif +#if CL_HPP_MINIMUM_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS) +# define CL_USE_DEPRECATED_OPENCL_2_0_APIS +#endif + #ifdef _WIN32 #include -#if defined(USE_DX_INTEROP) +#if defined(CL_HPP_USE_DX_INTEROP) #include #include #endif @@ -157,10 +483,18 @@ #if defined(_MSC_VER) #include -#endif // _MSC_VER +#endif // _MSC_VER + + // Check for a valid C++ version + +// Need to do both tests here because for some reason __cplusplus is not +// updated in visual studio +#if (!defined(_MSC_VER) && __cplusplus < 201103L) || (defined(_MSC_VER) && _MSC_VER < 1700) +#error Visual studio 2013 or another C++11-supporting compiler required +#endif // -#if defined(USE_CL_DEVICE_FISSION) +#if defined(CL_HPP_USE_CL_DEVICE_FISSION) || defined(CL_HPP_USE_CL_SUB_GROUPS_KHR) #include #endif @@ -170,27 +504,17 @@ #include #endif // !__APPLE__ -#if (_MSC_VER >= 1700) || (__cplusplus >= 201103L) -#define CL_HPP_RVALUE_REFERENCES_SUPPORTED -#define CL_HPP_CPP11_ATOMICS_SUPPORTED -#include -#endif - #if (__cplusplus >= 201103L) -#define CL_HPP_NOEXCEPT noexcept +#define CL_HPP_NOEXCEPT_ noexcept #else -#define CL_HPP_NOEXCEPT +#define CL_HPP_NOEXCEPT_ #endif - -// To avoid accidentally taking ownership of core OpenCL types -// such as cl_kernel constructors are made explicit -// under OpenCL 1.2 -#if defined(CL_VERSION_1_2) && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -#define __CL_EXPLICIT_CONSTRUCTORS explicit -#else // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -#define __CL_EXPLICIT_CONSTRUCTORS -#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#if defined(_MSC_VER) +# define CL_HPP_DEFINE_STATIC_MEMBER_ __declspec(selectany) +#else +# define CL_HPP_DEFINE_STATIC_MEMBER_ __attribute__((weak)) +#endif // !_MSC_VER // Define deprecated prefixes and suffixes to ensure compilation // in case they are not pre-defined @@ -201,6 +525,13 @@ #define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED #endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED) +#if !defined(CL_EXT_PREFIX__VERSION_1_2_DEPRECATED) +#define CL_EXT_PREFIX__VERSION_1_2_DEPRECATED +#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_2_DEPRECATED) +#if !defined(CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED) +#define CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED +#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_2_DEPRECATED) + #if !defined(CL_CALLBACK) #define CL_CALLBACK #endif //CL_CALLBACK @@ -208,24 +539,132 @@ #include #include #include +#include +#include +#include + + +// Define a size_type to represent a correctly resolved size_t +#if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) +namespace cl { + using size_type = ::size_t; +} // namespace cl +#else // #if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) +namespace cl { + using size_type = size_t; +} // namespace cl +#endif // #if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) + -#if defined(__CL_ENABLE_EXCEPTIONS) +#if defined(CL_HPP_ENABLE_EXCEPTIONS) #include -#endif // #if defined(__CL_ENABLE_EXCEPTIONS) +#endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS) -#if !defined(__NO_STD_VECTOR) +#if !defined(CL_HPP_NO_STD_VECTOR) #include -#endif +namespace cl { + template < class T, class Alloc = std::allocator > + using vector = std::vector; +} // namespace cl +#endif // #if !defined(CL_HPP_NO_STD_VECTOR) -#if !defined(__NO_STD_STRING) +#if !defined(CL_HPP_NO_STD_STRING) #include +namespace cl { + using string = std::string; +} // namespace cl +#endif // #if !defined(CL_HPP_NO_STD_STRING) + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + +#if !defined(CL_HPP_NO_STD_UNIQUE_PTR) +#include +namespace cl { + // Replace unique_ptr and allocate_pointer for internal use + // to allow user to replace them + template + using pointer = std::unique_ptr; +} // namespace cl #endif +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 +#if !defined(CL_HPP_NO_STD_ARRAY) +#include +namespace cl { + template < class T, size_type N > + using array = std::array; +} // namespace cl +#endif // #if !defined(CL_HPP_NO_STD_ARRAY) -#if defined(__ANDROID__) || defined(linux) || defined(__APPLE__) || defined(__MACOSX) -#include -#endif // linux +// Define size_type appropriately to allow backward-compatibility +// use of the old size_t interface class +#if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) +namespace cl { + namespace compatibility { + /*! \brief class used to interface between C++ and + * OpenCL C calls that require arrays of size_t values, whose + * size is known statically. + */ + template + class size_t + { + private: + size_type data_[N]; -#include + public: + //! \brief Initialize size_t to all 0s + size_t() + { + for (int i = 0; i < N; ++i) { + data_[i] = 0; + } + } + + size_t(const array &rhs) + { + for (int i = 0; i < N; ++i) { + data_[i] = rhs[i]; + } + } + + size_type& operator[](int index) + { + return data_[index]; + } + + const size_type& operator[](int index) const + { + return data_[index]; + } + + //! \brief Conversion operator to T*. + operator size_type* () { return data_; } + + //! \brief Conversion operator to const T*. + operator const size_type* () const { return data_; } + + operator array() const + { + array ret; + + for (int i = 0; i < N; ++i) { + ret[i] = data_[i]; + } + return ret; + } + }; + } // namespace compatibility + + template + using size_t = compatibility::size_t; +} // namespace cl +#endif // #if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) + +// Helper alias to avoid confusing the macros +namespace cl { + namespace detail { + using size_t_array = array; + } // namespace detail +} // namespace cl /*! \namespace cl @@ -234,94 +673,87 @@ * */ namespace cl { - -class Memory; - -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) -#define __INIT_CL_EXT_FCN_PTR(name) \ - if(!pfn_##name) { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddress(#name); \ - if(!pfn_##name) { \ - } \ - } -#endif // #if defined(CL_VERSION_1_1) - -#if defined(CL_VERSION_1_2) -#define __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, name) \ - if(!pfn_##name) { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddressForPlatform(platform, #name); \ - if(!pfn_##name) { \ - } \ - } -#endif // #if defined(CL_VERSION_1_1) - -class Program; -class Device; -class Context; -class CommandQueue; -class Memory; -class Buffer; - -#if defined(__CL_ENABLE_EXCEPTIONS) -/*! \brief Exception class - * - * This may be thrown by API functions when __CL_ENABLE_EXCEPTIONS is defined. - */ -class Error : public std::exception -{ -private: - cl_int err_; - const char * errStr_; -public: - /*! \brief Create a new CL error exception for a given error code - * and corresponding message. + class Memory; + +#define CL_HPP_INIT_CL_EXT_FCN_PTR_(name) \ + if (!pfn_##name) { \ + pfn_##name = (PFN_##name) \ + clGetExtensionFunctionAddress(#name); \ + if (!pfn_##name) { \ + } \ + } + +#define CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, name) \ + if (!pfn_##name) { \ + pfn_##name = (PFN_##name) \ + clGetExtensionFunctionAddressForPlatform(platform, #name); \ + if (!pfn_##name) { \ + } \ + } + + class Program; + class Device; + class Context; + class CommandQueue; + class DeviceCommandQueue; + class Memory; + class Buffer; + class Pipe; + +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + /*! \brief Exception class * - * \param err error code value. - * - * \param errStr a descriptive string that must remain in scope until - * handling of the exception has concluded. If set, it - * will be returned by what(). + * This may be thrown by API functions when CL_HPP_ENABLE_EXCEPTIONS is defined. */ - Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) - {} + class Error : public std::exception + { + private: + cl_int err_; + const char * errStr_; + public: + /*! \brief Create a new CL error exception for a given error code + * and corresponding message. + * + * \param err error code value. + * + * \param errStr a descriptive string that must remain in scope until + * handling of the exception has concluded. If set, it + * will be returned by what(). + */ + Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr) + {} - ~Error() throw() {} + ~Error() throw() {} - /*! \brief Get error string associated with exception - * - * \return A memory pointer to the error message string. - */ - virtual const char * what() const throw () - { - if (errStr_ == NULL) { - return "empty"; - } - else { - return errStr_; + /*! \brief Get error string associated with exception + * + * \return A memory pointer to the error message string. + */ + virtual const char * what() const throw () + { + if (errStr_ == NULL) { + return "empty"; + } + else { + return errStr_; + } } - } - - /*! \brief Get error code associated with exception - * - * \return The error code. - */ - cl_int err(void) const { return err_; } -}; -#define __ERR_STR(x) #x + /*! \brief Get error code associated with exception + * + * \return The error code. + */ + cl_int err(void) const { return err_; } + }; +#define CL_HPP_ERR_STR_(x) #x #else -#define __ERR_STR(x) NULL -#endif // __CL_ENABLE_EXCEPTIONS +#define CL_HPP_ERR_STR_(x) NULL +#endif // CL_HPP_ENABLE_EXCEPTIONS namespace detail { -#if defined(__CL_ENABLE_EXCEPTIONS) +#if defined(CL_HPP_ENABLE_EXCEPTIONS) static inline cl_int errHandler ( cl_int err, const char * errStr = NULL) @@ -337,957 +769,352 @@ static inline cl_int errHandler (cl_int err, const char * errStr = NULL) (void) errStr; // suppress unused variable warning return err; } -#endif // __CL_ENABLE_EXCEPTIONS +#endif // CL_HPP_ENABLE_EXCEPTIONS } //! \cond DOXYGEN_DETAIL -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -#define __GET_DEVICE_INFO_ERR __ERR_STR(clGetDeviceInfo) -#define __GET_PLATFORM_INFO_ERR __ERR_STR(clGetPlatformInfo) -#define __GET_DEVICE_IDS_ERR __ERR_STR(clGetDeviceIDs) -#define __GET_PLATFORM_IDS_ERR __ERR_STR(clGetPlatformIDs) -#define __GET_CONTEXT_INFO_ERR __ERR_STR(clGetContextInfo) -#define __GET_EVENT_INFO_ERR __ERR_STR(clGetEventInfo) -#define __GET_EVENT_PROFILE_INFO_ERR __ERR_STR(clGetEventProfileInfo) -#define __GET_MEM_OBJECT_INFO_ERR __ERR_STR(clGetMemObjectInfo) -#define __GET_IMAGE_INFO_ERR __ERR_STR(clGetImageInfo) -#define __GET_SAMPLER_INFO_ERR __ERR_STR(clGetSamplerInfo) -#define __GET_KERNEL_INFO_ERR __ERR_STR(clGetKernelInfo) -#if defined(CL_VERSION_1_2) -#define __GET_KERNEL_ARG_INFO_ERR __ERR_STR(clGetKernelArgInfo) -#endif // #if defined(CL_VERSION_1_2) -#define __GET_KERNEL_WORK_GROUP_INFO_ERR __ERR_STR(clGetKernelWorkGroupInfo) -#define __GET_PROGRAM_INFO_ERR __ERR_STR(clGetProgramInfo) -#define __GET_PROGRAM_BUILD_INFO_ERR __ERR_STR(clGetProgramBuildInfo) -#define __GET_COMMAND_QUEUE_INFO_ERR __ERR_STR(clGetCommandQueueInfo) - -#define __CREATE_CONTEXT_ERR __ERR_STR(clCreateContext) -#define __CREATE_CONTEXT_FROM_TYPE_ERR __ERR_STR(clCreateContextFromType) -#define __GET_SUPPORTED_IMAGE_FORMATS_ERR __ERR_STR(clGetSupportedImageFormats) - -#define __CREATE_BUFFER_ERR __ERR_STR(clCreateBuffer) -#define __COPY_ERR __ERR_STR(cl::copy) -#define __CREATE_SUBBUFFER_ERR __ERR_STR(clCreateSubBuffer) -#define __CREATE_GL_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) -#define __CREATE_GL_RENDER_BUFFER_ERR __ERR_STR(clCreateFromGLBuffer) -#define __GET_GL_OBJECT_INFO_ERR __ERR_STR(clGetGLObjectInfo) -#if defined(CL_VERSION_1_2) -#define __CREATE_IMAGE_ERR __ERR_STR(clCreateImage) -#define __CREATE_GL_TEXTURE_ERR __ERR_STR(clCreateFromGLTexture) -#define __IMAGE_DIMENSION_ERR __ERR_STR(Incorrect image dimensions) -#endif // #if defined(CL_VERSION_1_2) -#define __CREATE_SAMPLER_ERR __ERR_STR(clCreateSampler) -#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback) - -#define __CREATE_USER_EVENT_ERR __ERR_STR(clCreateUserEvent) -#define __SET_USER_EVENT_STATUS_ERR __ERR_STR(clSetUserEventStatus) -#define __SET_EVENT_CALLBACK_ERR __ERR_STR(clSetEventCallback) -#define __WAIT_FOR_EVENTS_ERR __ERR_STR(clWaitForEvents) - -#define __CREATE_KERNEL_ERR __ERR_STR(clCreateKernel) -#define __SET_KERNEL_ARGS_ERR __ERR_STR(clSetKernelArg) -#define __CREATE_PROGRAM_WITH_SOURCE_ERR __ERR_STR(clCreateProgramWithSource) -#define __CREATE_PROGRAM_WITH_BINARY_ERR __ERR_STR(clCreateProgramWithBinary) -#if defined(CL_VERSION_1_2) -#define __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR __ERR_STR(clCreateProgramWithBuiltInKernels) -#endif // #if defined(CL_VERSION_1_2) -#define __BUILD_PROGRAM_ERR __ERR_STR(clBuildProgram) -#if defined(CL_VERSION_1_2) -#define __COMPILE_PROGRAM_ERR __ERR_STR(clCompileProgram) -#define __LINK_PROGRAM_ERR __ERR_STR(clLinkProgram) -#endif // #if defined(CL_VERSION_1_2) -#define __CREATE_KERNELS_IN_PROGRAM_ERR __ERR_STR(clCreateKernelsInProgram) - -#define __CREATE_COMMAND_QUEUE_ERR __ERR_STR(clCreateCommandQueue) -#define __SET_COMMAND_QUEUE_PROPERTY_ERR __ERR_STR(clSetCommandQueueProperty) -#define __ENQUEUE_READ_BUFFER_ERR __ERR_STR(clEnqueueReadBuffer) -#define __ENQUEUE_READ_BUFFER_RECT_ERR __ERR_STR(clEnqueueReadBufferRect) -#define __ENQUEUE_WRITE_BUFFER_ERR __ERR_STR(clEnqueueWriteBuffer) -#define __ENQUEUE_WRITE_BUFFER_RECT_ERR __ERR_STR(clEnqueueWriteBufferRect) -#define __ENQEUE_COPY_BUFFER_ERR __ERR_STR(clEnqueueCopyBuffer) -#define __ENQEUE_COPY_BUFFER_RECT_ERR __ERR_STR(clEnqueueCopyBufferRect) -#define __ENQUEUE_FILL_BUFFER_ERR __ERR_STR(clEnqueueFillBuffer) -#define __ENQUEUE_READ_IMAGE_ERR __ERR_STR(clEnqueueReadImage) -#define __ENQUEUE_WRITE_IMAGE_ERR __ERR_STR(clEnqueueWriteImage) -#define __ENQUEUE_COPY_IMAGE_ERR __ERR_STR(clEnqueueCopyImage) -#define __ENQUEUE_FILL_IMAGE_ERR __ERR_STR(clEnqueueFillImage) -#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR __ERR_STR(clEnqueueCopyImageToBuffer) -#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR __ERR_STR(clEnqueueCopyBufferToImage) -#define __ENQUEUE_MAP_BUFFER_ERR __ERR_STR(clEnqueueMapBuffer) -#define __ENQUEUE_MAP_IMAGE_ERR __ERR_STR(clEnqueueMapImage) -#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR __ERR_STR(clEnqueueUnMapMemObject) -#define __ENQUEUE_NDRANGE_KERNEL_ERR __ERR_STR(clEnqueueNDRangeKernel) -#define __ENQUEUE_TASK_ERR __ERR_STR(clEnqueueTask) -#define __ENQUEUE_NATIVE_KERNEL __ERR_STR(clEnqueueNativeKernel) -#if defined(CL_VERSION_1_2) -#define __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR __ERR_STR(clEnqueueMigrateMemObjects) -#endif // #if defined(CL_VERSION_1_2) - -#define __ENQUEUE_ACQUIRE_GL_ERR __ERR_STR(clEnqueueAcquireGLObjects) -#define __ENQUEUE_RELEASE_GL_ERR __ERR_STR(clEnqueueReleaseGLObjects) - - -#define __RETAIN_ERR __ERR_STR(Retain Object) -#define __RELEASE_ERR __ERR_STR(Release Object) -#define __FLUSH_ERR __ERR_STR(clFlush) -#define __FINISH_ERR __ERR_STR(clFinish) -#define __VECTOR_CAPACITY_ERR __ERR_STR(Vector capacity error) +#if !defined(CL_HPP_USER_OVERRIDE_ERROR_STRINGS) +#define __GET_DEVICE_INFO_ERR CL_HPP_ERR_STR_(clGetDeviceInfo) +#define __GET_PLATFORM_INFO_ERR CL_HPP_ERR_STR_(clGetPlatformInfo) +#define __GET_DEVICE_IDS_ERR CL_HPP_ERR_STR_(clGetDeviceIDs) +#define __GET_PLATFORM_IDS_ERR CL_HPP_ERR_STR_(clGetPlatformIDs) +#define __GET_CONTEXT_INFO_ERR CL_HPP_ERR_STR_(clGetContextInfo) +#define __GET_EVENT_INFO_ERR CL_HPP_ERR_STR_(clGetEventInfo) +#define __GET_EVENT_PROFILE_INFO_ERR CL_HPP_ERR_STR_(clGetEventProfileInfo) +#define __GET_MEM_OBJECT_INFO_ERR CL_HPP_ERR_STR_(clGetMemObjectInfo) +#define __GET_IMAGE_INFO_ERR CL_HPP_ERR_STR_(clGetImageInfo) +#define __GET_SAMPLER_INFO_ERR CL_HPP_ERR_STR_(clGetSamplerInfo) +#define __GET_KERNEL_INFO_ERR CL_HPP_ERR_STR_(clGetKernelInfo) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __GET_KERNEL_ARG_INFO_ERR CL_HPP_ERR_STR_(clGetKernelArgInfo) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __GET_KERNEL_WORK_GROUP_INFO_ERR CL_HPP_ERR_STR_(clGetKernelWorkGroupInfo) +#define __GET_PROGRAM_INFO_ERR CL_HPP_ERR_STR_(clGetProgramInfo) +#define __GET_PROGRAM_BUILD_INFO_ERR CL_HPP_ERR_STR_(clGetProgramBuildInfo) +#define __GET_COMMAND_QUEUE_INFO_ERR CL_HPP_ERR_STR_(clGetCommandQueueInfo) + +#define __CREATE_CONTEXT_ERR CL_HPP_ERR_STR_(clCreateContext) +#define __CREATE_CONTEXT_FROM_TYPE_ERR CL_HPP_ERR_STR_(clCreateContextFromType) +#define __GET_SUPPORTED_IMAGE_FORMATS_ERR CL_HPP_ERR_STR_(clGetSupportedImageFormats) + +#define __CREATE_BUFFER_ERR CL_HPP_ERR_STR_(clCreateBuffer) +#define __COPY_ERR CL_HPP_ERR_STR_(cl::copy) +#define __CREATE_SUBBUFFER_ERR CL_HPP_ERR_STR_(clCreateSubBuffer) +#define __CREATE_GL_BUFFER_ERR CL_HPP_ERR_STR_(clCreateFromGLBuffer) +#define __CREATE_GL_RENDER_BUFFER_ERR CL_HPP_ERR_STR_(clCreateFromGLBuffer) +#define __GET_GL_OBJECT_INFO_ERR CL_HPP_ERR_STR_(clGetGLObjectInfo) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __CREATE_IMAGE_ERR CL_HPP_ERR_STR_(clCreateImage) +#define __CREATE_GL_TEXTURE_ERR CL_HPP_ERR_STR_(clCreateFromGLTexture) +#define __IMAGE_DIMENSION_ERR CL_HPP_ERR_STR_(Incorrect image dimensions) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR CL_HPP_ERR_STR_(clSetMemObjectDestructorCallback) + +#define __CREATE_USER_EVENT_ERR CL_HPP_ERR_STR_(clCreateUserEvent) +#define __SET_USER_EVENT_STATUS_ERR CL_HPP_ERR_STR_(clSetUserEventStatus) +#define __SET_EVENT_CALLBACK_ERR CL_HPP_ERR_STR_(clSetEventCallback) +#define __WAIT_FOR_EVENTS_ERR CL_HPP_ERR_STR_(clWaitForEvents) + +#define __CREATE_KERNEL_ERR CL_HPP_ERR_STR_(clCreateKernel) +#define __SET_KERNEL_ARGS_ERR CL_HPP_ERR_STR_(clSetKernelArg) +#define __CREATE_PROGRAM_WITH_SOURCE_ERR CL_HPP_ERR_STR_(clCreateProgramWithSource) +#define __CREATE_PROGRAM_WITH_BINARY_ERR CL_HPP_ERR_STR_(clCreateProgramWithBinary) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR CL_HPP_ERR_STR_(clCreateProgramWithBuiltInKernels) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __BUILD_PROGRAM_ERR CL_HPP_ERR_STR_(clBuildProgram) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __COMPILE_PROGRAM_ERR CL_HPP_ERR_STR_(clCompileProgram) +#define __LINK_PROGRAM_ERR CL_HPP_ERR_STR_(clLinkProgram) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __CREATE_KERNELS_IN_PROGRAM_ERR CL_HPP_ERR_STR_(clCreateKernelsInProgram) + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +#define __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR CL_HPP_ERR_STR_(clCreateCommandQueueWithProperties) +#define __CREATE_SAMPLER_WITH_PROPERTIES_ERR CL_HPP_ERR_STR_(clCreateSamplerWithProperties) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 +#define __SET_COMMAND_QUEUE_PROPERTY_ERR CL_HPP_ERR_STR_(clSetCommandQueueProperty) +#define __ENQUEUE_READ_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueReadBuffer) +#define __ENQUEUE_READ_BUFFER_RECT_ERR CL_HPP_ERR_STR_(clEnqueueReadBufferRect) +#define __ENQUEUE_WRITE_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueWriteBuffer) +#define __ENQUEUE_WRITE_BUFFER_RECT_ERR CL_HPP_ERR_STR_(clEnqueueWriteBufferRect) +#define __ENQEUE_COPY_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueCopyBuffer) +#define __ENQEUE_COPY_BUFFER_RECT_ERR CL_HPP_ERR_STR_(clEnqueueCopyBufferRect) +#define __ENQUEUE_FILL_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueFillBuffer) +#define __ENQUEUE_READ_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueReadImage) +#define __ENQUEUE_WRITE_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueWriteImage) +#define __ENQUEUE_COPY_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueCopyImage) +#define __ENQUEUE_FILL_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueFillImage) +#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueCopyImageToBuffer) +#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueCopyBufferToImage) +#define __ENQUEUE_MAP_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueMapBuffer) +#define __ENQUEUE_MAP_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueMapImage) +#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR CL_HPP_ERR_STR_(clEnqueueUnMapMemObject) +#define __ENQUEUE_NDRANGE_KERNEL_ERR CL_HPP_ERR_STR_(clEnqueueNDRangeKernel) +#define __ENQUEUE_NATIVE_KERNEL CL_HPP_ERR_STR_(clEnqueueNativeKernel) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR CL_HPP_ERR_STR_(clEnqueueMigrateMemObjects) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + +#define __ENQUEUE_ACQUIRE_GL_ERR CL_HPP_ERR_STR_(clEnqueueAcquireGLObjects) +#define __ENQUEUE_RELEASE_GL_ERR CL_HPP_ERR_STR_(clEnqueueReleaseGLObjects) + +#define __CREATE_PIPE_ERR CL_HPP_ERR_STR_(clCreatePipe) +#define __GET_PIPE_INFO_ERR CL_HPP_ERR_STR_(clGetPipeInfo) + + +#define __RETAIN_ERR CL_HPP_ERR_STR_(Retain Object) +#define __RELEASE_ERR CL_HPP_ERR_STR_(Release Object) +#define __FLUSH_ERR CL_HPP_ERR_STR_(clFlush) +#define __FINISH_ERR CL_HPP_ERR_STR_(clFinish) +#define __VECTOR_CAPACITY_ERR CL_HPP_ERR_STR_(Vector capacity error) /** * CL 1.2 version that uses device fission. */ -#if defined(CL_VERSION_1_2) -#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevices) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __CREATE_SUB_DEVICES_ERR CL_HPP_ERR_STR_(clCreateSubDevices) #else -#define __CREATE_SUB_DEVICES __ERR_STR(clCreateSubDevicesEXT) -#endif // #if defined(CL_VERSION_1_2) +#define __CREATE_SUB_DEVICES_ERR CL_HPP_ERR_STR_(clCreateSubDevicesEXT) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 /** * Deprecated APIs for 1.2 */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) -#define __ENQUEUE_MARKER_ERR __ERR_STR(clEnqueueMarker) -#define __ENQUEUE_WAIT_FOR_EVENTS_ERR __ERR_STR(clEnqueueWaitForEvents) -#define __ENQUEUE_BARRIER_ERR __ERR_STR(clEnqueueBarrier) -#define __UNLOAD_COMPILER_ERR __ERR_STR(clUnloadCompiler) -#define __CREATE_GL_TEXTURE_2D_ERR __ERR_STR(clCreateFromGLTexture2D) -#define __CREATE_GL_TEXTURE_3D_ERR __ERR_STR(clCreateFromGLTexture3D) -#define __CREATE_IMAGE2D_ERR __ERR_STR(clCreateImage2D) -#define __CREATE_IMAGE3D_ERR __ERR_STR(clCreateImage3D) -#endif // #if defined(CL_VERSION_1_1) - -#endif // __CL_USER_OVERRIDE_ERROR_STRINGS -//! \endcond +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#define __ENQUEUE_MARKER_ERR CL_HPP_ERR_STR_(clEnqueueMarker) +#define __ENQUEUE_WAIT_FOR_EVENTS_ERR CL_HPP_ERR_STR_(clEnqueueWaitForEvents) +#define __ENQUEUE_BARRIER_ERR CL_HPP_ERR_STR_(clEnqueueBarrier) +#define __UNLOAD_COMPILER_ERR CL_HPP_ERR_STR_(clUnloadCompiler) +#define __CREATE_GL_TEXTURE_2D_ERR CL_HPP_ERR_STR_(clCreateFromGLTexture2D) +#define __CREATE_GL_TEXTURE_3D_ERR CL_HPP_ERR_STR_(clCreateFromGLTexture3D) +#define __CREATE_IMAGE2D_ERR CL_HPP_ERR_STR_(clCreateImage2D) +#define __CREATE_IMAGE3D_ERR CL_HPP_ERR_STR_(clCreateImage3D) +#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) /** - * CL 1.2 marker and barrier commands + * Deprecated APIs for 2.0 */ -#if defined(CL_VERSION_1_2) -#define __ENQUEUE_MARKER_WAIT_LIST_ERR __ERR_STR(clEnqueueMarkerWithWaitList) -#define __ENQUEUE_BARRIER_WAIT_LIST_ERR __ERR_STR(clEnqueueBarrierWithWaitList) -#endif // #if defined(CL_VERSION_1_2) - -#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING) -typedef std::string STRING_CLASS; -#elif !defined(__USE_DEV_STRING) - -/*! \class string - * \brief Simple string class, that provides a limited subset of std::string - * functionality but avoids many of the issues that come with that class. - - * \note Deprecated. Please use std::string as default or - * re-define the string class to match the std::string - * interface by defining STRING_CLASS +#if defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) +#define __CREATE_COMMAND_QUEUE_ERR CL_HPP_ERR_STR_(clCreateCommandQueue) +#define __ENQUEUE_TASK_ERR CL_HPP_ERR_STR_(clEnqueueTask) +#define __CREATE_SAMPLER_ERR CL_HPP_ERR_STR_(clCreateSampler) +#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) + +/** + * CL 1.2 marker and barrier commands */ -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED string CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#define __ENQUEUE_MARKER_WAIT_LIST_ERR CL_HPP_ERR_STR_(clEnqueueMarkerWithWaitList) +#define __ENQUEUE_BARRIER_WAIT_LIST_ERR CL_HPP_ERR_STR_(clEnqueueBarrierWithWaitList) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + +#endif // CL_HPP_USER_OVERRIDE_ERROR_STRINGS +//! \endcond + + +namespace detail { + +// Generic getInfoHelper. The final parameter is used to guide overload +// resolution: the actual parameter passed is an int, which makes this +// a worse conversion sequence than a specialization that declares the +// parameter as an int. +template +inline cl_int getInfoHelper(Functor f, cl_uint name, T* param, long) { -private: - ::size_t size_; - char * str_; -public: - //! \brief Constructs an empty string, allocating no memory. - string(void) : size_(0), str_(NULL) - { - } + return f(name, sizeof(T), param, NULL); +} - /*! \brief Constructs a string populated from an arbitrary value of - * specified size. - * - * An extra '\0' is added, in case none was contained in str. - * - * \param str the initial value of the string instance. Note that '\0' - * characters receive no special treatment. If NULL, - * the string is left empty, with a size of 0. - * - * \param size the number of characters to copy from str. - */ - string(const char * str, ::size_t size) : - size_(size), - str_(NULL) - { - if( size > 0 ) { - str_ = new char[size_+1]; - if (str_ != NULL) { - memcpy(str_, str, size_ * sizeof(char)); - str_[size_] = '\0'; - } - else { - size_ = 0; - } - } +// Specialized for getInfo +// Assumes that the output vector was correctly resized on the way in +template +inline cl_int getInfoHelper(Func f, cl_uint name, vector>* param, int) +{ + if (name != CL_PROGRAM_BINARIES) { + return CL_INVALID_VALUE; } + if (param) { + // Create array of pointers, calculate total size and pass pointer array in + size_type numBinaries = param->size(); + vector binariesPointers(numBinaries); - /*! \brief Constructs a string populated from a null-terminated value. - * - * \param str the null-terminated initial value of the string instance. - * If NULL, the string is left empty, with a size of 0. - */ - string(const char * str) : - size_(0), - str_(NULL) - { - if( str ) { - size_= ::strlen(str); - } - if( size_ > 0 ) { - str_ = new char[size_ + 1]; - if (str_ != NULL) { - memcpy(str_, str, (size_ + 1) * sizeof(char)); - } + for (size_type i = 0; i < numBinaries; ++i) + { + binariesPointers[i] = (*param)[i].data(); } - } - void resize( ::size_t n ) - { - if( size_ == n ) { - return; - } - if (n == 0) { - if( str_ ) { - delete [] str_; - } - str_ = NULL; - size_ = 0; - } - else { - char *newString = new char[n + 1]; - ::size_t copySize = n; - if( size_ < n ) { - copySize = size_; - } - size_ = n; - - if(str_) { - memcpy(newString, str_, (copySize + 1) * sizeof(char)); - } - if( copySize < size_ ) { - memset(newString + copySize, 0, size_ - copySize); - } - newString[size_] = '\0'; + cl_int err = f(name, numBinaries * sizeof(unsigned char*), binariesPointers.data(), NULL); - delete [] str_; - str_ = newString; + if (err != CL_SUCCESS) { + return err; } } - const char& operator[] ( ::size_t pos ) const - { - return str_[pos]; - } - char& operator[] ( ::size_t pos ) - { - return str_[pos]; - } + return CL_SUCCESS; +} - /*! \brief Copies the value of another string to this one. - * - * \param rhs the string to copy. - * - * \returns a reference to the modified instance. - */ - string& operator=(const string& rhs) - { - if (this == &rhs) { - return *this; - } - - if( str_ != NULL ) { - delete [] str_; - str_ = NULL; - size_ = 0; - } - - if (rhs.size_ == 0 || rhs.str_ == NULL) { - str_ = NULL; - size_ = 0; - } - else { - str_ = new char[rhs.size_ + 1]; - size_ = rhs.size_; - - if (str_ != NULL) { - memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char)); - } - else { - size_ = 0; - } - } - - return *this; +// Specialized getInfoHelper for vector params +template +inline cl_int getInfoHelper(Func f, cl_uint name, vector* param, long) +{ + size_type required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; } + const size_type elements = required / sizeof(T); - /*! \brief Constructs a string by copying the value of another instance. - * - * \param rhs the string to copy. - */ - string(const string& rhs) : - size_(0), - str_(NULL) - { - *this = rhs; + // Temporary to avoid changing param on an error + vector localData(elements); + err = f(name, required, localData.data(), NULL); + if (err != CL_SUCCESS) { + return err; } - - //! \brief Destructor - frees memory used to hold the current value. - ~string() - { - delete[] str_; - str_ = NULL; + if (param) { + *param = std::move(localData); } - - //! \brief Queries the length of the string, excluding any added '\0's. - ::size_t size(void) const { return size_; } - - //! \brief Queries the length of the string, excluding any added '\0's. - ::size_t length(void) const { return size(); } - - /*! \brief Returns a pointer to the private copy held by this instance, - * or "" if empty/unset. - */ - const char * c_str(void) const { return (str_) ? str_ : "";} -}; -typedef cl::string STRING_CLASS; -#endif // #elif !defined(__USE_DEV_STRING) - -#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) -#define VECTOR_CLASS std::vector -#elif !defined(__USE_DEV_VECTOR) -#define VECTOR_CLASS cl::vector - -#if !defined(__MAX_DEFAULT_VECTOR_SIZE) -#define __MAX_DEFAULT_VECTOR_SIZE 10 -#endif -/*! \class vector - * \brief Fixed sized vector implementation that mirroring - * - * \note Deprecated. Please use std::vector as default or - * re-define the vector class to match the std::vector - * interface by defining VECTOR_CLASS + return CL_SUCCESS; +} - * \note Not recommended for use with custom objects as - * current implementation will construct N elements - * - * std::vector functionality. - * \brief Fixed sized vector compatible with std::vector. - * - * \note - * This differs from std::vector<> not just in memory allocation, - * but also in terms of when members are constructed, destroyed, - * and assigned instead of being copy constructed. - * - * \param T type of element contained in the vector. - * - * \param N maximum size of the vector. +/* Specialization for reference-counted types. This depends on the + * existence of Wrapper::cl_type, and none of the other types having the + * cl_type member. Note that simplify specifying the parameter as Wrapper + * does not work, because when using a derived type (e.g. Context) the generic + * template will provide a better match. */ -template -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED vector +template +inline cl_int getInfoHelper( + Func f, cl_uint name, vector* param, int, typename T::cl_type = 0) { -private: - T data_[N]; - unsigned int size_; - -public: - //! \brief Constructs an empty vector with no memory allocated. - vector() : - size_(static_cast(0)) - {} - - //! \brief Deallocates the vector's memory and destroys all of its elements. - ~vector() - { - clear(); - } - - //! \brief Returns the number of elements currently contained. - unsigned int size(void) const - { - return size_; - } - - /*! \brief Empties the vector of all elements. - * \note - * This does not deallocate memory but will invoke destructors - * on contained elements. - */ - void clear() - { - while(!empty()) { - pop_back(); - } + size_type required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; } - /*! \brief Appends an element after the last valid element. - * Calling this on a vector that has reached capacity will throw an - * exception if exceptions are enabled. - */ - void push_back (const T& x) - { - if (size() < N) { - new (&data_[size_]) T(x); - size_++; - } else { - detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); - } - } + const size_type elements = required / sizeof(typename T::cl_type); - /*! \brief Removes the last valid element from the vector. - * Calling this on an empty vector will throw an exception - * if exceptions are enabled. - */ - void pop_back(void) - { - if (size_ != 0) { - --size_; - data_[size_].~T(); - } else { - detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); - } + vector value(elements); + err = f(name, required, value.data(), NULL); + if (err != CL_SUCCESS) { + return err; } - /*! \brief Constructs with a value copied from another. - * - * \param vec the vector to copy. - */ - vector(const vector& vec) : - size_(vec.size_) - { - if (size_ != 0) { - assign(vec.begin(), vec.end()); - } - } + if (param) { + // Assign to convert CL type to T for each element + param->resize(elements); - /*! \brief Constructs with a specified number of initial elements. - * - * \param size number of initial elements. - * - * \param val value of initial elements. - */ - vector(unsigned int size, const T& val = T()) : - size_(0) - { - for (unsigned int i = 0; i < size; i++) { - push_back(val); + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < elements; i++) { + (*param)[i] = T(value[i], true); } } + return CL_SUCCESS; +} - /*! \brief Overwrites the current content with that copied from another - * instance. - * - * \param rhs vector to copy. - * - * \returns a reference to this. - */ - vector& operator=(const vector& rhs) - { - if (this == &rhs) { - return *this; - } - - if (rhs.size_ != 0) { - assign(rhs.begin(), rhs.end()); - } else { - clear(); - } - - return *this; +// Specialized GetInfoHelper for string params +template +inline cl_int getInfoHelper(Func f, cl_uint name, string* param, long) +{ + size_type required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; } - /*! \brief Tests equality against another instance. - * - * \param vec the vector against which to compare. - */ - bool operator==(vector &vec) - { - if (size() != vec.size()) { - return false; + // std::string has a constant data member + // a char vector does not + if (required > 0) { + vector value(required); + err = f(name, required, value.data(), NULL); + if (err != CL_SUCCESS) { + return err; } - - for( unsigned int i = 0; i < size(); ++i ) { - if( operator[](i) != vec[i] ) { - return false; - } + if (param) { + param->assign(begin(value), prev(end(value))); } - return true; - } - - //! \brief Conversion operator to T*. - operator T* () { return data_; } - - //! \brief Conversion operator to const T*. - operator const T* () const { return data_; } - - //! \brief Tests whether this instance has any elements. - bool empty (void) const - { - return size_==0; } - - //! \brief Returns the maximum number of elements this instance can hold. - unsigned int max_size (void) const - { - return N; + else if (param) { + param->assign(""); } + return CL_SUCCESS; +} - //! \brief Returns the maximum number of elements this instance can hold. - unsigned int capacity () const - { - return N; +// Specialized GetInfoHelper for clsize_t params +template +inline cl_int getInfoHelper(Func f, cl_uint name, array* param, long) +{ + size_type required; + cl_int err = f(name, 0, NULL, &required); + if (err != CL_SUCCESS) { + return err; } - //! \brief Resizes the vector to the given size - void resize(unsigned int newSize, T fill = T()) - { - if (newSize > N) - { - detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR); - } - else - { - while (size_ < newSize) - { - new (&data_[size_]) T(fill); - size_++; - } - while (size_ > newSize) - { - --size_; - data_[size_].~T(); - } - } - } + size_type elements = required / sizeof(size_type); + vector value(elements, 0); - /*! \brief Returns a reference to a given element. - * - * \param index which element to access. * - * \note - * The caller is responsible for ensuring index is >= 0 and < size(). - */ - T& operator[](int index) - { - return data_[index]; - } - - /*! \brief Returns a const reference to a given element. - * - * \param index which element to access. - * - * \note - * The caller is responsible for ensuring index is >= 0 and < size(). - */ - const T& operator[](int index) const - { - return data_[index]; - } - - /*! \brief Assigns elements of the vector based on a source iterator range. - * - * \param start Beginning iterator of source range - * \param end Enditerator of source range - * - * \note - * Will throw an exception if exceptions are enabled and size exceeded. - */ - template - void assign(I start, I end) - { - clear(); - while(start != end) { - push_back(*start); - start++; - } + err = f(name, required, value.data(), NULL); + if (err != CL_SUCCESS) { + return err; } - - /*! \class iterator - * \brief Const iterator class for vectors - */ - class iterator - { - private: - const vector *vec_; - int index_; - - /** - * Internal iterator constructor to capture reference - * to the vector it iterates over rather than taking - * the vector by copy. - */ - iterator (const vector &vec, int index) : - vec_(&vec) - { - if( !vec.empty() ) { - index_ = index; - } else { - index_ = -1; - } - } - - public: - iterator(void) : - index_(-1), - vec_(NULL) - { - } - - iterator(const iterator& rhs) : - vec_(rhs.vec_), - index_(rhs.index_) - { - } - - ~iterator(void) {} - - static iterator begin(const cl::vector &vec) - { - iterator i(vec, 0); - - return i; - } - - static iterator end(const cl::vector &vec) - { - iterator i(vec, vec.size()); - - return i; - } - bool operator==(iterator i) - { - return ((vec_ == i.vec_) && - (index_ == i.index_)); - } - - bool operator!=(iterator i) - { - return (!(*this==i)); - } - - iterator& operator++() - { - ++index_; - return *this; - } - - iterator operator++(int) - { - iterator retVal(*this); - ++index_; - return retVal; - } - - iterator& operator--() - { - --index_; - return *this; - } - - iterator operator--(int) - { - iterator retVal(*this); - --index_; - return retVal; - } - - const T& operator *() const - { - return (*vec_)[index_]; - } - }; - - iterator begin(void) - { - return iterator::begin(*this); + // Bound the copy with N to prevent overruns + // if passed N > than the amount copied + if (elements > N) { + elements = N; } - - iterator begin(void) const - { - return iterator::begin(*this); - } - - iterator end(void) - { - return iterator::end(*this); - } - - iterator end(void) const - { - return iterator::end(*this); + for (size_type i = 0; i < elements; ++i) { + (*param)[i] = value[i]; } - T& front(void) - { - return data_[0]; - } + return CL_SUCCESS; +} - T& back(void) - { - return data_[size_]; - } +template struct ReferenceHandler; - const T& front(void) const - { - return data_[0]; +/* Specialization for reference-counted types. This depends on the + * existence of Wrapper::cl_type, and none of the other types having the + * cl_type member. Note that simplify specifying the parameter as Wrapper + * does not work, because when using a derived type (e.g. Context) the generic + * template will provide a better match. + */ +template +inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_type = 0) +{ + typename T::cl_type value; + cl_int err = f(name, sizeof(value), &value, NULL); + if (err != CL_SUCCESS) { + return err; } - - const T& back(void) const + *param = value; + if (value != NULL) { - return data_[size_-1]; + err = param->retain(); + if (err != CL_SUCCESS) { + return err; + } } -} CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; -#endif // #if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR) - - - - - -namespace detail { -#define __DEFAULT_NOT_INITIALIZED 1 -#define __DEFAULT_BEING_INITIALIZED 2 -#define __DEFAULT_INITIALIZED 4 - - /* - * Compare and exchange primitives are needed for handling of defaults - */ + return CL_SUCCESS; +} -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED - inline int compare_exchange(std::atomic * dest, int exchange, int comparand) -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED - inline int compare_exchange(volatile int * dest, int exchange, int comparand) -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED - { -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED - std::atomic_compare_exchange_strong(dest, &comparand, exchange); - return comparand; -#elif _MSC_VER - return (int)(_InterlockedCompareExchange( - (volatile long*)dest, - (long)exchange, - (long)comparand)); -#else // !_MSC_VER && !CL_HPP_CPP11_ATOMICS_SUPPORTED - return (__sync_val_compare_and_swap( - dest, - comparand, - exchange)); -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED - } - - inline void fence() { -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED - std::atomic_thread_fence(std::memory_order_seq_cst); -#elif _MSC_VER // !CL_HPP_CPP11_ATOMICS_SUPPORTED - _ReadWriteBarrier(); -#else // !_MSC_VER && !CL_HPP_CPP11_ATOMICS_SUPPORTED - __sync_synchronize(); -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED - } -} // namespace detail - - -/*! \brief class used to interface between C++ and - * OpenCL C calls that require arrays of size_t values, whose - * size is known statically. - */ -template -class size_t -{ -private: - ::size_t data_[N]; - -public: - //! \brief Initialize size_t to all 0s - size_t() - { - for( int i = 0; i < N; ++i ) { - data_[i] = 0; - } - } - - ::size_t& operator[](int index) - { - return data_[index]; - } - - const ::size_t& operator[](int index) const - { - return data_[index]; - } - - //! \brief Conversion operator to T*. - operator ::size_t* () { return data_; } - - //! \brief Conversion operator to const T*. - operator const ::size_t* () const { return data_; } -}; - -namespace detail { - -// Generic getInfoHelper. The final parameter is used to guide overload -// resolution: the actual parameter passed is an int, which makes this -// a worse conversion sequence than a specialization that declares the -// parameter as an int. -template -inline cl_int getInfoHelper(Functor f, cl_uint name, T* param, long) -{ - return f(name, sizeof(T), param, NULL); -} - -// Specialized getInfoHelper for VECTOR_CLASS params -template -inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, long) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - T* value = (T*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - param->assign(&value[0], &value[required/sizeof(T)]); - return CL_SUCCESS; -} - -/* Specialization for reference-counted types. This depends on the - * existence of Wrapper::cl_type, and none of the other types having the - * cl_type member. Note that simplify specifying the parameter as Wrapper - * does not work, because when using a derived type (e.g. Context) the generic - * template will provide a better match. - */ -template -inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, int, typename T::cl_type = 0) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - typename T::cl_type * value = (typename T::cl_type *) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - ::size_t elements = required / sizeof(typename T::cl_type); - param->assign(&value[0], &value[elements]); - for (::size_t i = 0; i < elements; i++) - { - if (value[i] != NULL) - { - err = (*param)[i].retain(); - if (err != CL_SUCCESS) { - return err; - } - } - } - return CL_SUCCESS; -} - -// Specialized for getInfo -template -inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS* param, int) -{ - cl_int err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL); - - if (err != CL_SUCCESS) { - return err; - } - - return CL_SUCCESS; -} - -// Specialized GetInfoHelper for STRING_CLASS params -template -inline cl_int getInfoHelper(Func f, cl_uint name, STRING_CLASS* param, long) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - // std::string has a constant data member - // a char vector does not - VECTOR_CLASS value(required); - err = f(name, required, value.data(), NULL); - if (err != CL_SUCCESS) { - return err; - } - if (param) { - param->assign(value.begin(), value.end()); - } - return CL_SUCCESS; -} - -// Specialized GetInfoHelper for cl::size_t params -template -inline cl_int getInfoHelper(Func f, cl_uint name, size_t* param, long) -{ - ::size_t required; - cl_int err = f(name, 0, NULL, &required); - if (err != CL_SUCCESS) { - return err; - } - - ::size_t* value = (::size_t*) alloca(required); - err = f(name, required, value, NULL); - if (err != CL_SUCCESS) { - return err; - } - - for(int i = 0; i < N; ++i) { - (*param)[i] = value[i]; - } - - return CL_SUCCESS; -} - -template struct ReferenceHandler; - -/* Specialization for reference-counted types. This depends on the - * existence of Wrapper::cl_type, and none of the other types having the - * cl_type member. Note that simplify specifying the parameter as Wrapper - * does not work, because when using a derived type (e.g. Context) the generic - * template will provide a better match. - */ -template -inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_type = 0) -{ - typename T::cl_type value; - cl_int err = f(name, sizeof(value), &value, NULL); - if (err != CL_SUCCESS) { - return err; - } - *param = value; - if (value != NULL) - { - err = param->retain(); - if (err != CL_SUCCESS) { - return err; - } - } - return CL_SUCCESS; -} - -#define __PARAM_NAME_INFO_1_0(F) \ - F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \ - F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \ +#define CL_HPP_PARAM_NAME_INFO_1_0_(F) \ + F(cl_platform_info, CL_PLATFORM_PROFILE, string) \ + F(cl_platform_info, CL_PLATFORM_VERSION, string) \ + F(cl_platform_info, CL_PLATFORM_NAME, string) \ + F(cl_platform_info, CL_PLATFORM_VENDOR, string) \ + F(cl_platform_info, CL_PLATFORM_EXTENSIONS, string) \ \ F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, size_type) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, cl::vector) \ F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ @@ -1299,13 +1126,13 @@ inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_ F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \ + F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, size_type) \ F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \ + F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, size_type) \ F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ @@ -1319,23 +1146,22 @@ inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_ F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \ + F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, size_type) \ F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ - F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \ F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \ - F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \ - F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \ - F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_NAME, string) \ + F(cl_device_info, CL_DEVICE_VENDOR, string) \ + F(cl_device_info, CL_DRIVER_VERSION, string) \ + F(cl_device_info, CL_DEVICE_PROFILE, string) \ + F(cl_device_info, CL_DEVICE_VERSION, string) \ + F(cl_device_info, CL_DEVICE_EXTENSIONS, string) \ \ F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ - F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS) \ - F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS) \ + F(cl_context_info, CL_CONTEXT_DEVICES, cl::vector) \ + F(cl_context_info, CL_CONTEXT_PROPERTIES, cl::vector) \ \ F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ @@ -1349,46 +1175,46 @@ inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_ \ F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ - F(cl_mem_info, CL_MEM_SIZE, ::size_t) \ + F(cl_mem_info, CL_MEM_SIZE, size_type) \ F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ \ F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ - F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \ - F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \ - F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \ - F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \ - F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \ + F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, size_type) \ + F(cl_image_info, CL_IMAGE_ROW_PITCH, size_type) \ + F(cl_image_info, CL_IMAGE_SLICE_PITCH, size_type) \ + F(cl_image_info, CL_IMAGE_WIDTH, size_type) \ + F(cl_image_info, CL_IMAGE_HEIGHT, size_type) \ + F(cl_image_info, CL_IMAGE_DEPTH, size_type) \ \ F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ - F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \ - F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \ - F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \ + F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_bool) \ + F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_addressing_mode) \ + F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_filter_mode) \ \ F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ - F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS) \ - F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \ - F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \ - F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS) \ + F(cl_program_info, CL_PROGRAM_DEVICES, cl::vector) \ + F(cl_program_info, CL_PROGRAM_SOURCE, string) \ + F(cl_program_info, CL_PROGRAM_BINARY_SIZES, cl::vector) \ + F(cl_program_info, CL_PROGRAM_BINARIES, cl::vector>) \ \ F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, string) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, string) \ \ - F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \ + F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, string) \ F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ \ - F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \ - F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \ + F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, size_type) \ + F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::detail::size_t_array) \ F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ \ F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ @@ -1396,8 +1222,8 @@ inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_ F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) -#if defined(CL_VERSION_1_1) -#define __PARAM_NAME_INFO_1_1(F) \ + +#define CL_HPP_PARAM_NAME_INFO_1_1_(F) \ F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ @@ -1409,57 +1235,73 @@ inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_ F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \ - F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, STRING_CLASS) \ + F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, string) \ \ F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ - F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \ + F(cl_mem_info, CL_MEM_OFFSET, size_type) \ \ - F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \ + F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, size_type) \ F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ \ F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) -#endif // CL_VERSION_1_1 - -#if defined(CL_VERSION_1_2) -#define __PARAM_NAME_INFO_1_2(F) \ - F(cl_image_info, CL_IMAGE_BUFFER, cl::Buffer) \ - \ - F(cl_program_info, CL_PROGRAM_NUM_KERNELS, ::size_t) \ - F(cl_program_info, CL_PROGRAM_KERNEL_NAMES, STRING_CLASS) \ +#define CL_HPP_PARAM_NAME_INFO_1_2_(F) \ + F(cl_program_info, CL_PROGRAM_NUM_KERNELS, size_type) \ + F(cl_program_info, CL_PROGRAM_KERNEL_NAMES, string) \ \ F(cl_program_build_info, CL_PROGRAM_BINARY_TYPE, cl_program_binary_type) \ \ - F(cl_kernel_info, CL_KERNEL_ATTRIBUTES, STRING_CLASS) \ + F(cl_kernel_info, CL_KERNEL_ATTRIBUTES, string) \ \ F(cl_kernel_arg_info, CL_KERNEL_ARG_ADDRESS_QUALIFIER, cl_kernel_arg_address_qualifier) \ F(cl_kernel_arg_info, CL_KERNEL_ARG_ACCESS_QUALIFIER, cl_kernel_arg_access_qualifier) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, STRING_CLASS) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, STRING_CLASS) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, string) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, string) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_QUALIFIER, cl_kernel_arg_type_qualifier) \ \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl_device_id) \ - F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPE, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl::Device) \ + F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, cl::vector) \ + F(cl_device_info, CL_DEVICE_PARTITION_TYPE, cl::vector) \ F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, ::size_t) \ + F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, size_type) \ F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \ - F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, STRING_CLASS) -#endif // #if defined(CL_VERSION_1_2) - -#if defined(USE_CL_DEVICE_FISSION) -#define __PARAM_NAME_DEVICE_FISSION(F) \ + F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, string) \ + \ + F(cl_image_info, CL_IMAGE_ARRAY_SIZE, size_type) \ + F(cl_image_info, CL_IMAGE_NUM_MIP_LEVELS, cl_uint) \ + F(cl_image_info, CL_IMAGE_NUM_SAMPLES, cl_uint) + +#define CL_HPP_PARAM_NAME_INFO_2_0_(F) \ + F(cl_device_info, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, cl_command_queue_properties) \ + F(cl_device_info, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, cl_command_queue_properties) \ + F(cl_device_info, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_ON_DEVICE_QUEUES, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_ON_DEVICE_EVENTS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_PIPE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, cl_uint) \ + F(cl_device_info, CL_DEVICE_PIPE_MAX_PACKET_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_SVM_CAPABILITIES, cl_device_svm_capabilities) \ + F(cl_device_info, CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, cl_uint) \ + F(cl_command_queue_info, CL_QUEUE_SIZE, cl_uint) \ + F(cl_mem_info, CL_MEM_USES_SVM_POINTER, cl_bool) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, size_type) \ + F(cl_pipe_info, CL_PIPE_PACKET_SIZE, cl_uint) \ + F(cl_pipe_info, CL_PIPE_MAX_PACKETS, cl_uint) + +#define CL_HPP_PARAM_NAME_DEVICE_FISSION_(F) \ F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS) \ - F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS) \ + F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, cl::vector) \ + F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, cl::vector) \ F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ - F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS) -#endif // USE_CL_DEVICE_FISSION + F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, cl::vector) template struct param_traits {}; -#define __CL_DECLARE_PARAM_TRAITS(token, param_name, T) \ +#define CL_HPP_DECLARE_PARAM_TRAITS_(token, param_name, T) \ struct token; \ template<> \ struct param_traits \ @@ -1468,77 +1310,103 @@ struct param_traits \ typedef T param_type; \ }; -__PARAM_NAME_INFO_1_0(__CL_DECLARE_PARAM_TRAITS) -#if defined(CL_VERSION_1_1) -__PARAM_NAME_INFO_1_1(__CL_DECLARE_PARAM_TRAITS) -#endif // CL_VERSION_1_1 -#if defined(CL_VERSION_1_2) -__PARAM_NAME_INFO_1_2(__CL_DECLARE_PARAM_TRAITS) -#endif // CL_VERSION_1_1 - -#if defined(USE_CL_DEVICE_FISSION) -__PARAM_NAME_DEVICE_FISSION(__CL_DECLARE_PARAM_TRAITS); -#endif // USE_CL_DEVICE_FISSION +CL_HPP_PARAM_NAME_INFO_1_0_(CL_HPP_DECLARE_PARAM_TRAITS_) +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 +CL_HPP_PARAM_NAME_INFO_1_1_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +CL_HPP_PARAM_NAME_INFO_1_2_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +CL_HPP_PARAM_NAME_INFO_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 + + +// Flags deprecated in OpenCL 2.0 +#define CL_HPP_PARAM_NAME_INFO_1_0_DEPRECATED_IN_2_0_(F) \ + F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) + +#define CL_HPP_PARAM_NAME_INFO_1_1_DEPRECATED_IN_2_0_(F) \ + F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) + +#define CL_HPP_PARAM_NAME_INFO_1_2_DEPRECATED_IN_2_0_(F) \ + F(cl_image_info, CL_IMAGE_BUFFER, cl::Buffer) + +// Include deprecated query flags based on versions +// Only include deprecated 1.0 flags if 2.0 not active as there is an enum clash +#if CL_HPP_TARGET_OPENCL_VERSION > 100 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 && CL_HPP_TARGET_OPENCL_VERSION < 200 +CL_HPP_PARAM_NAME_INFO_1_0_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 110 +#if CL_HPP_TARGET_OPENCL_VERSION > 110 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 +CL_HPP_PARAM_NAME_INFO_1_1_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 +#if CL_HPP_TARGET_OPENCL_VERSION > 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 +CL_HPP_PARAM_NAME_INFO_1_2_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 + +#if defined(CL_HPP_USE_CL_DEVICE_FISSION) +CL_HPP_PARAM_NAME_DEVICE_FISSION_(CL_HPP_DECLARE_PARAM_TRAITS_); +#endif // CL_HPP_USE_CL_DEVICE_FISSION #ifdef CL_PLATFORM_ICD_SUFFIX_KHR -__CL_DECLARE_PARAM_TRAITS(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, STRING_CLASS) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, string) #endif #ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong) #endif #ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, VECTOR_CLASS< ::size_t>) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, vector) #endif #ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, cl_uint) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, cl_uint) #endif #ifdef CL_DEVICE_SIMD_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_WIDTH_AMD, cl_uint) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SIMD_WIDTH_AMD, cl_uint) #endif #ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, cl_uint) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, cl_uint) #endif #ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint) #endif #ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, cl_uint) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, cl_uint) #endif #ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, cl_uint) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, cl_uint) #endif #ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, cl_uint) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, cl_uint) #endif #ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, cl_uint) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, cl_uint) #endif #ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint) #endif #ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint) #endif #ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint) #endif #ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint) #endif #ifdef CL_DEVICE_WARP_SIZE_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint) #endif #ifdef CL_DEVICE_GPU_OVERLAP_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool) #endif #ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool) #endif #ifdef CL_DEVICE_INTEGRATED_MEMORY_NV -__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool) +CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool) #endif // Convenience functions @@ -1555,7 +1423,7 @@ struct GetInfoFunctor0 { Func f_; const Arg0& arg0_; cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) + cl_uint param, size_type size, void* value, size_type* size_ret) { return f_(arg0_, param, size, value, size_ret); } }; @@ -1564,7 +1432,7 @@ struct GetInfoFunctor1 { Func f_; const Arg0& arg0_; const Arg1& arg1_; cl_int operator ()( - cl_uint param, ::size_t size, void* value, ::size_t* size_ret) + cl_uint param, size_type size, void* value, size_type* size_ret) { return f_(arg0_, arg1_, param, size, value, size_ret); } }; @@ -1584,11 +1452,12 @@ getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) return getInfoHelper(f0, name, param, 0); } + template struct ReferenceHandler { }; -#if defined(CL_VERSION_1_2) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 /** * OpenCL 1.2 devices do have retain/release. */ @@ -1618,7 +1487,7 @@ struct ReferenceHandler static cl_int release(cl_device_id device) { return ::clReleaseDevice(device); } }; -#else // #if defined(CL_VERSION_1_2) +#else // CL_HPP_TARGET_OPENCL_VERSION >= 120 /** * OpenCL 1.1 devices do not have retain/release. */ @@ -1632,7 +1501,7 @@ struct ReferenceHandler static cl_int release(cl_device_id) { return CL_SUCCESS; } }; -#endif // #if defined(CL_VERSION_1_2) +#endif // ! (CL_HPP_TARGET_OPENCL_VERSION >= 120) template <> struct ReferenceHandler @@ -1709,8 +1578,9 @@ struct ReferenceHandler }; +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 120 // Extracts version number with major in the upper 16 bits, minor in the lower 16 -static cl_uint getVersion(const char *versionInfo) +static cl_uint getVersion(const vector &versionInfo) { int highVersion = 0; int lowVersion = 0; @@ -1731,10 +1601,11 @@ static cl_uint getVersion(const char *versionInfo) static cl_uint getPlatformVersion(cl_platform_id platform) { - ::size_t size = 0; + size_type size = 0; clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &size); - char *versionInfo = (char *) alloca(size); - clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, &versionInfo[0], &size); + + vector versionInfo(size); + clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, versionInfo.data(), &size); return getVersion(versionInfo); } @@ -1745,20 +1616,19 @@ static cl_uint getDevicePlatformVersion(cl_device_id device) return getPlatformVersion(platform); } -#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) static cl_uint getContextPlatformVersion(cl_context context) { // The platform cannot be queried directly, so we first have to grab a // device and obtain its context - ::size_t size = 0; + size_type size = 0; clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size); if (size == 0) return 0; - cl_device_id *devices = (cl_device_id *) alloca(size); - clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices, NULL); + vector devices(size/sizeof(cl_device_id)); + clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices.data(), NULL); return getDevicePlatformVersion(devices[0]); } -#endif // #if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 120 template class Wrapper @@ -1771,8 +1641,13 @@ protected: public: Wrapper() : object_(NULL) { } - - Wrapper(const cl_type &obj) : object_(obj) { } + + Wrapper(const cl_type &obj, bool retainObject) : object_(obj) + { + if (retainObject) { + detail::errHandler(retain(), __RETAIN_ERR); + } + } ~Wrapper() { @@ -1782,62 +1657,73 @@ public: Wrapper(const Wrapper& rhs) { object_ = rhs.object_; - if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + detail::errHandler(retain(), __RETAIN_ERR); } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - Wrapper(Wrapper&& rhs) CL_HPP_NOEXCEPT + Wrapper(Wrapper&& rhs) CL_HPP_NOEXCEPT_ { object_ = rhs.object_; rhs.object_ = NULL; } -#endif Wrapper& operator = (const Wrapper& rhs) { if (this != &rhs) { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + detail::errHandler(release(), __RELEASE_ERR); object_ = rhs.object_; - if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + detail::errHandler(retain(), __RETAIN_ERR); } return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) Wrapper& operator = (Wrapper&& rhs) { if (this != &rhs) { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + detail::errHandler(release(), __RELEASE_ERR); object_ = rhs.object_; rhs.object_ = NULL; } return *this; } -#endif Wrapper& operator = (const cl_type &rhs) { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + detail::errHandler(release(), __RELEASE_ERR); object_ = rhs; return *this; } - cl_type operator ()() const { return object_; } + const cl_type& operator ()() const { return object_; } cl_type& operator ()() { return object_; } + const cl_type get() const { return object_; } + + cl_type get() { return object_; } + + protected: template friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); cl_int retain() const { - return ReferenceHandler::retain(object_); + if (object_ != nullptr) { + return ReferenceHandler::retain(object_); + } + else { + return CL_SUCCESS; + } } cl_int release() const { - return ReferenceHandler::release(object_); + if (object_ != nullptr) { + return ReferenceHandler::release(object_); + } + else { + return CL_SUCCESS; + } } }; @@ -1854,12 +1740,18 @@ protected: static bool isReferenceCountable(cl_device_id device) { bool retVal = false; +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 120 if (device != NULL) { int version = getDevicePlatformVersion(device); if(version > ((1 << 16) + 1)) { retVal = true; } } +#else // CL_HPP_MINIMUM_OPENCL_VERSION < 120 + retVal = true; +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 return retVal; } @@ -1868,49 +1760,52 @@ public: { } - Wrapper(const cl_type &obj) : object_(obj), referenceCountable_(false) + Wrapper(const cl_type &obj, bool retainObject) : + object_(obj), + referenceCountable_(false) { referenceCountable_ = isReferenceCountable(obj); + + if (retainObject) { + detail::errHandler(retain(), __RETAIN_ERR); + } } ~Wrapper() { - if (object_ != NULL) { release(); } + release(); } Wrapper(const Wrapper& rhs) { object_ = rhs.object_; referenceCountable_ = isReferenceCountable(object_); - if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + detail::errHandler(retain(), __RETAIN_ERR); } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - Wrapper(Wrapper&& rhs) CL_HPP_NOEXCEPT + Wrapper(Wrapper&& rhs) CL_HPP_NOEXCEPT_ { object_ = rhs.object_; referenceCountable_ = rhs.referenceCountable_; rhs.object_ = NULL; rhs.referenceCountable_ = false; } -#endif Wrapper& operator = (const Wrapper& rhs) { if (this != &rhs) { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + detail::errHandler(release(), __RELEASE_ERR); object_ = rhs.object_; referenceCountable_ = rhs.referenceCountable_; - if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); } + detail::errHandler(retain(), __RETAIN_ERR); } return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) Wrapper& operator = (Wrapper&& rhs) { if (this != &rhs) { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + detail::errHandler(release(), __RELEASE_ERR); object_ = rhs.object_; referenceCountable_ = rhs.referenceCountable_; rhs.object_ = NULL; @@ -1918,30 +1813,33 @@ public: } return *this; } -#endif Wrapper& operator = (const cl_type &rhs) { - if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); } + detail::errHandler(release(), __RELEASE_ERR); object_ = rhs; referenceCountable_ = isReferenceCountable(object_); return *this; } - cl_type operator ()() const { return object_; } + const cl_type& operator ()() const { return object_; } cl_type& operator ()() { return object_; } + const cl_type get() const { return object_; } + + cl_type get() { return object_; } + protected: template friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); template - friend inline cl_int getInfoHelper(Func, cl_uint, VECTOR_CLASS*, int, typename U::cl_type); + friend inline cl_int getInfoHelper(Func, cl_uint, vector*, int, typename U::cl_type); cl_int retain() const { - if( referenceCountable_ ) { + if( object_ != nullptr && referenceCountable_ ) { return ReferenceHandler::retain(object_); } else { @@ -1951,7 +1849,7 @@ protected: cl_int release() const { - if( referenceCountable_ ) { + if (object_ != nullptr && referenceCountable_) { return ReferenceHandler::release(object_); } else { @@ -1960,9 +1858,69 @@ protected: } }; +template +inline bool operator==(const Wrapper &lhs, const Wrapper &rhs) +{ + return lhs() == rhs(); +} + +template +inline bool operator!=(const Wrapper &lhs, const Wrapper &rhs) +{ + return !operator==(lhs, rhs); +} + } // namespace detail //! \endcond + +using BuildLogType = vector::param_type>>; +#if defined(CL_HPP_ENABLE_EXCEPTIONS) +/** +* Exception class for build errors to carry build info +*/ +class BuildError : public Error +{ +private: + BuildLogType buildLogs; +public: + BuildError(cl_int err, const char * errStr, const BuildLogType &vec) : Error(err, errStr), buildLogs(vec) + { + } + + BuildLogType getBuildLog() const + { + return buildLogs; + } +}; +namespace detail { + static inline cl_int buildErrHandler( + cl_int err, + const char * errStr, + const BuildLogType &buildLogs) + { + if (err != CL_SUCCESS) { + throw BuildError(err, errStr, buildLogs); + } + return err; + } +} // namespace detail + +#else +namespace detail { + static inline cl_int buildErrHandler( + cl_int err, + const char * errStr, + const BuildLogType &buildLogs) + { + (void)buildLogs; // suppress unused variable warning + (void)errStr; + return err; + } +} // namespace detail +#endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS) + + /*! \stuct ImageFormat * \brief Adds constructors and member functions for cl_image_format. * @@ -2000,7 +1958,40 @@ struct ImageFormat : public cl_image_format */ class Device : public detail::Wrapper { +private: + static std::once_flag default_initialized_; + static Device default_; + static cl_int default_error_; + + /*! \brief Create the default context. + * + * This sets @c default_ and @c default_error_. It does not throw + * @c cl::Error. + */ + static void makeDefault(); + + /*! \brief Create the default platform from a provided platform. + * + * This sets @c default_. It does not throw + * @c cl::Error. + */ + static void makeDefaultProvided(const Device &p) { + default_ = p; + } + public: +#ifdef CL_HPP_UNIT_TEST_ENABLE + /*! \brief Reset the default. + * + * This sets @c default_ to an empty value to support cleanup in + * the unit test framework. + * This function is not thread safe. + */ + static void unitTestClearDefault() { + default_ = Device(); + } +#endif // #ifdef CL_HPP_UNIT_TEST_ENABLE + //! \brief Default constructor - initializes to NULL. Device() : detail::Wrapper() { } @@ -2008,13 +1999,37 @@ public: * * This simply copies the device ID value, which is an inexpensive operation. */ - __CL_EXPLICIT_CONSTRUCTORS Device(const cl_device_id &device) : detail::Wrapper(device) { } + explicit Device(const cl_device_id &device, bool retainObject = false) : + detail::Wrapper(device, retainObject) { } /*! \brief Returns the first device on the default context. * * \see Context::getDefault() */ - static Device getDefault(cl_int * err = NULL); + static Device getDefault( + cl_int *errResult = NULL) + { + std::call_once(default_initialized_, makeDefault); + detail::errHandler(default_error_); + if (errResult != NULL) { + *errResult = default_error_; + } + return default_; + } + + /** + * Modify the default device to be used by + * subsequent operations. + * Will only set the default if no default was previously created. + * @return updated default device. + * Should be compared to the passed value to ensure that it was updated. + */ + static Device setDefault(const Device &default_device) + { + std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_device)); + detail::errHandler(default_error_); + return default_; + } /*! \brief Assignment operator from cl_device_id. * @@ -2027,34 +2042,32 @@ public: } /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ + * Required for MSVC. + */ Device(const Device& dev) : detail::Wrapper(dev) {} /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ + * Required for MSVC. + */ Device& operator = (const Device &dev) { detail::Wrapper::operator=(dev); return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - Device(Device&& dev) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(dev)) {} + * Required for MSVC. + */ + Device(Device&& dev) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(dev)) {} /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ + * Required for MSVC. + */ Device& operator = (Device &&dev) { detail::Wrapper::operator=(std::move(dev)); return *this; } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) //! \brief Wrapper for clGetDeviceInfo(). template @@ -2082,37 +2095,48 @@ public: /** * CL 1.2 version */ -#if defined(CL_VERSION_1_2) - //! \brief Wrapper for clCreateSubDevicesEXT(). +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + //! \brief Wrapper for clCreateSubDevices(). cl_int createSubDevices( const cl_device_partition_property * properties, - VECTOR_CLASS* devices) + vector* devices) { cl_uint n = 0; cl_int err = clCreateSubDevices(object_, properties, 0, NULL, &n); if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); + return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); } - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = clCreateSubDevices(object_, properties, n, ids, NULL); + vector ids(n); + err = clCreateSubDevices(object_, properties, n, ids.data(), NULL); if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); + return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); } - devices->assign(&ids[0], &ids[n]); - return CL_SUCCESS; + // Cannot trivially assign because we need to capture intermediates + // with safe construction + if (devices) { + devices->resize(ids.size()); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < ids.size(); i++) { + // We do not need to retain because this device is being created + // by the runtime + (*devices)[i] = Device(ids[i], false); + } + } + + return CL_SUCCESS; } -#endif // #if defined(CL_VERSION_1_2) +#elif defined(CL_HPP_USE_CL_DEVICE_FISSION) /** - * CL 1.1 version that uses device fission. + * CL 1.1 version that uses device fission extension. */ -#if defined(CL_VERSION_1_1) -#if defined(USE_CL_DEVICE_FISSION) cl_int createSubDevices( const cl_device_partition_property_ext * properties, - VECTOR_CLASS* devices) + vector* devices) { typedef CL_API_ENTRY cl_int ( CL_API_CALL * PFN_clCreateSubDevicesEXT)( @@ -2123,27 +2147,41 @@ public: cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1; static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL; - __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateSubDevicesEXT); cl_uint n = 0; cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n); if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); + return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); } - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL); + vector ids(n); + err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids.data(), NULL); if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_SUB_DEVICES); + return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); + } + // Cannot trivially assign because we need to capture intermediates + // with safe construction + if (devices) { + devices->resize(ids.size()); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < ids.size(); i++) { + // We do not need to retain because this device is being created + // by the runtime + (*devices)[i] = Device(ids[i], false); + } } - - devices->assign(&ids[0], &ids[n]); return CL_SUCCESS; } -#endif // #if defined(USE_CL_DEVICE_FISSION) -#endif // #if defined(CL_VERSION_1_1) +#endif // defined(CL_HPP_USE_CL_DEVICE_FISSION) }; +CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag Device::default_initialized_; +CL_HPP_DEFINE_STATIC_MEMBER_ Device Device::default_; +CL_HPP_DEFINE_STATIC_MEMBER_ cl_int Device::default_error_ = CL_SUCCESS; + /*! \brief Class interface for cl_platform_id. * * \note Copies of these objects are inexpensive, since they don't 'own' @@ -2153,15 +2191,88 @@ public: */ class Platform : public detail::Wrapper { +private: + static std::once_flag default_initialized_; + static Platform default_; + static cl_int default_error_; + + /*! \brief Create the default context. + * + * This sets @c default_ and @c default_error_. It does not throw + * @c cl::Error. + */ + static void makeDefault() { + /* Throwing an exception from a call_once invocation does not do + * what we wish, so we catch it and save the error. + */ +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + try +#endif + { + // If default wasn't passed ,generate one + // Otherwise set it + cl_uint n = 0; + + cl_int err = ::clGetPlatformIDs(0, NULL, &n); + if (err != CL_SUCCESS) { + default_error_ = err; + return; + } + if (n == 0) { + default_error_ = CL_INVALID_PLATFORM; + return; + } + + vector ids(n); + err = ::clGetPlatformIDs(n, ids.data(), NULL); + if (err != CL_SUCCESS) { + default_error_ = err; + return; + } + + default_ = Platform(ids[0]); + } +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + catch (cl::Error &e) { + default_error_ = e.err(); + } +#endif + } + + /*! \brief Create the default platform from a provided platform. + * + * This sets @c default_. It does not throw + * @c cl::Error. + */ + static void makeDefaultProvided(const Platform &p) { + default_ = p; + } + public: +#ifdef CL_HPP_UNIT_TEST_ENABLE + /*! \brief Reset the default. + * + * This sets @c default_ to an empty value to support cleanup in + * the unit test framework. + * This function is not thread safe. + */ + static void unitTestClearDefault() { + default_ = Platform(); + } +#endif // #ifdef CL_HPP_UNIT_TEST_ENABLE + //! \brief Default constructor - initializes to NULL. Platform() : detail::Wrapper() { } /*! \brief Constructor from cl_platform_id. * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. * This simply copies the platform ID value, which is an inexpensive operation. */ - __CL_EXPLICIT_CONSTRUCTORS Platform(const cl_platform_id &platform) : detail::Wrapper(platform) { } + explicit Platform(const cl_platform_id &platform, bool retainObject = false) : + detail::Wrapper(platform, retainObject) { } /*! \brief Assignment operator from cl_platform_id. * @@ -2173,8 +2284,33 @@ public: return *this; } + static Platform getDefault( + cl_int *errResult = NULL) + { + std::call_once(default_initialized_, makeDefault); + detail::errHandler(default_error_); + if (errResult != NULL) { + *errResult = default_error_; + } + return default_; + } + + /** + * Modify the default platform to be used by + * subsequent operations. + * Will only set the default if no default was previously created. + * @return updated default platform. + * Should be compared to the passed value to ensure that it was updated. + */ + static Platform setDefault(const Platform &default_platform) + { + std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_platform)); + detail::errHandler(default_error_); + return default_; + } + //! \brief Wrapper for clGetPlatformInfo(). - cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const + cl_int getInfo(cl_platform_info name, string* param) const { return detail::errHandler( detail::getInfo(&::clGetPlatformInfo, object_, name, param), @@ -2201,7 +2337,7 @@ public: */ cl_int getDevices( cl_device_type type, - VECTOR_CLASS* devices) const + vector* devices) const { cl_uint n = 0; if( devices == NULL ) { @@ -2212,17 +2348,29 @@ public: return detail::errHandler(err, __GET_DEVICE_IDS_ERR); } - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); - err = ::clGetDeviceIDs(object_, type, n, ids, NULL); + vector ids(n); + err = ::clGetDeviceIDs(object_, type, n, ids.data(), NULL); if (err != CL_SUCCESS) { return detail::errHandler(err, __GET_DEVICE_IDS_ERR); } - devices->assign(&ids[0], &ids[n]); + // Cannot trivially assign because we need to capture intermediates + // with safe construction + // We must retain things we obtain from the API to avoid releasing + // API-owned objects. + if (devices) { + devices->resize(ids.size()); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < ids.size(); i++) { + (*devices)[i] = Device(ids[i], true); + } + } return CL_SUCCESS; } -#if defined(USE_DX_INTEROP) +#if defined(CL_HPP_USE_DX_INTEROP) /*! \brief Get the list of available D3D10 devices. * * \param d3d_device_source. @@ -2250,7 +2398,7 @@ public: cl_d3d10_device_source_khr d3d_device_source, void * d3d_object, cl_d3d10_device_set_khr d3d_device_set, - VECTOR_CLASS* devices) const + vector* devices) const { typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( cl_platform_id platform, @@ -2266,7 +2414,7 @@ public: } static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL; - __INIT_CL_EXT_FCN_PTR_PLATFORM(object_, clGetDeviceIDsFromD3D10KHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(object_, clGetDeviceIDsFromD3D10KHR); cl_uint n = 0; cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( @@ -2281,20 +2429,32 @@ public: return detail::errHandler(err, __GET_DEVICE_IDS_ERR); } - cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id)); + vector ids(n); err = pfn_clGetDeviceIDsFromD3D10KHR( object_, d3d_device_source, d3d_object, d3d_device_set, n, - ids, + ids.data(), NULL); if (err != CL_SUCCESS) { return detail::errHandler(err, __GET_DEVICE_IDS_ERR); } - devices->assign(&ids[0], &ids[n]); + // Cannot trivially assign because we need to capture intermediates + // with safe construction + // We must retain things we obtain from the API to avoid releasing + // API-owned objects. + if (devices) { + devices->resize(ids.size()); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < ids.size(); i++) { + (*devices)[i] = Device(ids[i], true); + } + } return CL_SUCCESS; } #endif @@ -2304,7 +2464,7 @@ public: * Wraps clGetPlatformIDs(). */ static cl_int get( - VECTOR_CLASS* platforms) + vector* platforms) { cl_uint n = 0; @@ -2317,14 +2477,20 @@ public: return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); } - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); + vector ids(n); + err = ::clGetPlatformIDs(n, ids.data(), NULL); if (err != CL_SUCCESS) { return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); } - platforms->assign(&ids[0], &ids[n]); + if (platforms) { + platforms->resize(ids.size()); + + // Platforms don't reference count + for (size_type i = 0; i < ids.size(); i++) { + (*platforms)[i] = Platform(ids[i]); + } + } return CL_SUCCESS; } @@ -2335,83 +2501,52 @@ public: static cl_int get( Platform * platform) { - cl_uint n = 0; - - if( platform == NULL ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); - } - - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); + cl_int err; + Platform default_platform = Platform::getDefault(&err); + if (platform) { + *platform = default_platform; } - - *platform = ids[0]; - return CL_SUCCESS; + return err; } /*! \brief Gets the first available platform, returning it by value. - * - * Wraps clGetPlatformIDs(), returning the first result. + * + * \return Returns a valid platform if one is available. + * If no platform is available will return a null platform. + * Throws an exception if no platforms are available + * or an error condition occurs. + * Wraps clGetPlatformIDs(), returning the first result. */ static Platform get( cl_int * errResult = NULL) { - Platform platform; - cl_uint n = 0; - cl_int err = ::clGetPlatformIDs(0, NULL, &n); - if (err != CL_SUCCESS) { - detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - if (errResult != NULL) { - *errResult = err; - } - return Platform(); + cl_int err; + Platform default_platform = Platform::getDefault(&err); + if (errResult) { + *errResult = err; } - - cl_platform_id* ids = (cl_platform_id*) alloca( - n * sizeof(cl_platform_id)); - err = ::clGetPlatformIDs(n, ids, NULL); - - if (err != CL_SUCCESS) { - detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - if (errResult != NULL) { - *errResult = err; - } - return Platform(); - } - - - return Platform(ids[0]); - } - - static Platform getDefault( - cl_int *errResult = NULL ) - { - return get(errResult); - } - + return default_platform; + } -#if defined(CL_VERSION_1_2) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 //! \brief Wrapper for clUnloadCompiler(). cl_int unloadCompiler() { return ::clUnloadPlatformCompiler(object_); } -#endif // #if defined(CL_VERSION_1_2) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 }; // class Platform +CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag Platform::default_initialized_; +CL_HPP_DEFINE_STATIC_MEMBER_ Platform Platform::default_; +CL_HPP_DEFINE_STATIC_MEMBER_ cl_int Platform::default_error_ = CL_SUCCESS; + + /** * Deprecated APIs for 1.2 */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) /** * Unload the OpenCL compiler. * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead. @@ -2423,7 +2558,7 @@ UnloadCompiler() { return ::clUnloadCompiler(); } -#endif // #if defined(CL_VERSION_1_1) +#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) /*! \brief Class interface for cl_context. * @@ -2437,41 +2572,97 @@ class Context : public detail::Wrapper { private: - -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED - static std::atomic default_initialized_; -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED - static volatile int default_initialized_; -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED + static std::once_flag default_initialized_; static Context default_; - static volatile cl_int default_error_; + static cl_int default_error_; + + /*! \brief Create the default context from the default device type in the default platform. + * + * This sets @c default_ and @c default_error_. It does not throw + * @c cl::Error. + */ + static void makeDefault() { + /* Throwing an exception from a call_once invocation does not do + * what we wish, so we catch it and save the error. + */ +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + try +#endif + { +#if !defined(__APPLE__) && !defined(__MACOS) + const Platform &p = Platform::getDefault(); + cl_platform_id defaultPlatform = p(); + cl_context_properties properties[3] = { + CL_CONTEXT_PLATFORM, (cl_context_properties)defaultPlatform, 0 + }; +#else // #if !defined(__APPLE__) && !defined(__MACOS) + cl_context_properties *properties = nullptr; +#endif // #if !defined(__APPLE__) && !defined(__MACOS) + + default_ = Context( + CL_DEVICE_TYPE_DEFAULT, + properties, + NULL, + NULL, + &default_error_); + } +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + catch (cl::Error &e) { + default_error_ = e.err(); + } +#endif + } + + + /*! \brief Create the default context from a provided Context. + * + * This sets @c default_. It does not throw + * @c cl::Error. + */ + static void makeDefaultProvided(const Context &c) { + default_ = c; + } + public: +#ifdef CL_HPP_UNIT_TEST_ENABLE + /*! \brief Reset the default. + * + * This sets @c default_ to an empty value to support cleanup in + * the unit test framework. + * This function is not thread safe. + */ + static void unitTestClearDefault() { + default_ = Context(); + } +#endif // #ifdef CL_HPP_UNIT_TEST_ENABLE + /*! \brief Constructs a context including a list of specified devices. * * Wraps clCreateContext(). */ Context( - const VECTOR_CLASS& devices, + const vector& devices, cl_context_properties* properties = NULL, void (CL_CALLBACK * notifyFptr)( const char *, const void *, - ::size_t, + size_type, void *) = NULL, void* data = NULL, cl_int* err = NULL) { cl_int error; - ::size_t numDevices = devices.size(); - cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); - for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + size_type numDevices = devices.size(); + vector deviceIDs(numDevices); + + for( size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { deviceIDs[deviceIndex] = (devices[deviceIndex])(); } object_ = ::clCreateContext( properties, (cl_uint) numDevices, - deviceIDs, + deviceIDs.data(), notifyFptr, data, &error); detail::errHandler(error, __CREATE_CONTEXT_ERR); @@ -2486,7 +2677,7 @@ public: void (CL_CALLBACK * notifyFptr)( const char *, const void *, - ::size_t, + size_type, void *) = NULL, void* data = NULL, cl_int* err = NULL) @@ -2505,7 +2696,7 @@ public: *err = error; } } - + /*! \brief Constructs a context including all or a subset of devices of a specified type. * * Wraps clCreateContextFromType(). @@ -2516,7 +2707,7 @@ public: void (CL_CALLBACK * notifyFptr)( const char *, const void *, - ::size_t, + size_type, void *) = NULL, void* data = NULL, cl_int* err = NULL) @@ -2528,7 +2719,7 @@ public: if (properties == NULL) { // Get a valid platform ID as we cannot send in a blank one - VECTOR_CLASS platforms; + vector platforms; error = Platform::get(&platforms); if (error != CL_SUCCESS) { detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); @@ -2542,15 +2733,15 @@ public: cl_context_properties platform_id = 0; for (unsigned int i = 0; i < platforms.size(); i++) { - VECTOR_CLASS devices; + vector devices; -#if defined(__CL_ENABLE_EXCEPTIONS) +#if defined(CL_HPP_ENABLE_EXCEPTIONS) try { #endif error = platforms[i].getDevices(type, &devices); -#if defined(__CL_ENABLE_EXCEPTIONS) +#if defined(CL_HPP_ENABLE_EXCEPTIONS) } catch (Error) {} // Catch if exceptions are enabled as we don't want to exit if first platform has no devices of type // We do error checking next anyway, and can throw there if needed @@ -2605,11 +2796,10 @@ public: return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) /*! \brief Move constructor to forward move to the superclass correctly. * Required for MSVC. */ - Context(Context&& ctx) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(ctx)) {} + Context(Context&& ctx) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(ctx)) {} /*! \brief Move assignment to forward move to the superclass correctly. * Required for MSVC. @@ -2619,7 +2809,7 @@ public: detail::Wrapper::operator=(std::move(ctx)); return *this; } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Returns a singleton context including all devices of CL_DEVICE_TYPE_DEFAULT. * @@ -2627,50 +2817,26 @@ public: */ static Context getDefault(cl_int * err = NULL) { - int state = detail::compare_exchange( - &default_initialized_, - __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); - - if (state & __DEFAULT_INITIALIZED) { - if (err != NULL) { - *err = default_error_; - } - return default_; - } - - if (state & __DEFAULT_BEING_INITIALIZED) { - // Assume writes will propagate eventually... - while(default_initialized_ != __DEFAULT_INITIALIZED) { - detail::fence(); - } - - if (err != NULL) { - *err = default_error_; - } - return default_; - } - - cl_int error; - default_ = Context( - CL_DEVICE_TYPE_DEFAULT, - NULL, - NULL, - NULL, - &error); - - detail::fence(); - - default_error_ = error; - // Assume writes will propagate eventually... - default_initialized_ = __DEFAULT_INITIALIZED; - - detail::fence(); - + std::call_once(default_initialized_, makeDefault); + detail::errHandler(default_error_); if (err != NULL) { *err = default_error_; } return default_; + } + /** + * Modify the default context to be used by + * subsequent operations. + * Will only set the default if no default was previously created. + * @return updated default context. + * Should be compared to the passed value to ensure that it was updated. + */ + static Context setDefault(const Context &default_context) + { + std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_context)); + detail::errHandler(default_error_); + return default_; } //! \brief Default constructor - initializes to NULL. @@ -2681,7 +2847,8 @@ public: * This effectively transfers ownership of a refcount on the cl_context * into the new Context object. */ - __CL_EXPLICIT_CONSTRUCTORS Context(const cl_context& context) : detail::Wrapper(context) { } + explicit Context(const cl_context& context, bool retainObject = false) : + detail::Wrapper(context, retainObject) { } /*! \brief Assignment operator from cl_context - takes ownership. * @@ -2724,9 +2891,14 @@ public: cl_int getSupportedImageFormats( cl_mem_flags flags, cl_mem_object_type type, - VECTOR_CLASS* formats) const + vector* formats) const { cl_uint numEntries; + + if (!formats) { + return CL_SUCCESS; + } + cl_int err = ::clGetSupportedImageFormats( object_, flags, @@ -2738,65 +2910,62 @@ public: return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); } - ImageFormat* value = (ImageFormat*) - alloca(numEntries * sizeof(ImageFormat)); - err = ::clGetSupportedImageFormats( - object_, - flags, - type, - numEntries, - (cl_image_format*) value, - NULL); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); + if (numEntries > 0) { + vector value(numEntries); + err = ::clGetSupportedImageFormats( + object_, + flags, + type, + numEntries, + (cl_image_format*)value.data(), + NULL); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); + } + + formats->assign(begin(value), end(value)); + } + else { + // If no values are being returned, ensure an empty vector comes back + formats->clear(); } - formats->assign(&value[0], &value[numEntries]); return CL_SUCCESS; } }; -inline Device Device::getDefault(cl_int * err) +inline void Device::makeDefault() { - cl_int error; - Device device; + /* Throwing an exception from a call_once invocation does not do + * what we wish, so we catch it and save the error. + */ +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + try +#endif + { + cl_int error = 0; - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_CONTEXT_ERR); + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_CONTEXT_ERR); - if (error != CL_SUCCESS) { - if (err != NULL) { - *err = error; + if (error != CL_SUCCESS) { + default_error_ = error; } - } - else { - device = context.getInfo()[0]; - if (err != NULL) { - *err = CL_SUCCESS; + else { + default_ = context.getInfo()[0]; + default_error_ = CL_SUCCESS; } } - - return device; +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + catch (cl::Error &e) { + default_error_ = e.err(); + } +#endif } - -#ifdef _WIN32 -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED -__declspec(selectany) std::atomic Context::default_initialized_; -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED -__declspec(selectany) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED -__declspec(selectany) Context Context::default_; -__declspec(selectany) volatile cl_int Context::default_error_ = CL_SUCCESS; -#else // !_WIN32 -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED -__attribute__((weak)) std::atomic Context::default_initialized_; -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED -__attribute__((weak)) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED; -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED -__attribute__((weak)) Context Context::default_; -__attribute__((weak)) volatile cl_int Context::default_error_ = CL_SUCCESS; -#endif // !_WIN32 +CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag Context::default_initialized_; +CL_HPP_DEFINE_STATIC_MEMBER_ Context Context::default_; +CL_HPP_DEFINE_STATIC_MEMBER_ cl_int Context::default_error_ = CL_SUCCESS; /*! \brief Class interface for cl_event. * @@ -2814,10 +2983,14 @@ public: /*! \brief Constructor from cl_event - takes ownership. * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. * This effectively transfers ownership of a refcount on the cl_event * into the new Event object. */ - __CL_EXPLICIT_CONSTRUCTORS Event(const cl_event& event) : detail::Wrapper(event) { } + explicit Event(const cl_event& event, bool retainObject = false) : + detail::Wrapper(event, retainObject) { } /*! \brief Assignment operator from cl_event - takes ownership. * @@ -2887,7 +3060,7 @@ public: __WAIT_FOR_EVENTS_ERR); } -#if defined(CL_VERSION_1_1) +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 /*! \brief Registers a user callback function for a specific command execution status. * * Wraps clSetEventCallback(). @@ -2905,14 +3078,14 @@ public: user_data), __SET_EVENT_CALLBACK_ERR); } -#endif +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 /*! \brief Blocks the calling thread until every event specified is complete. * * Wraps clWaitForEvents(). */ static cl_int - waitForEvents(const VECTOR_CLASS& events) + waitForEvents(const vector& events) { return detail::errHandler( ::clWaitForEvents( @@ -2921,7 +3094,7 @@ public: } }; -#if defined(CL_VERSION_1_1) +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 /*! \brief Class interface for user events (a subset of cl_event's). * * See Event for details about copy semantics, etc. @@ -2962,14 +3135,14 @@ public: __SET_USER_EVENT_STATUS_ERR); } }; -#endif +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 /*! \brief Blocks the calling thread until every event specified is complete. * * Wraps clWaitForEvents(). */ inline static cl_int -WaitForEvents(const VECTOR_CLASS& events) +WaitForEvents(const vector& events) { return detail::errHandler( ::clWaitForEvents( @@ -2992,11 +3165,18 @@ public: Memory() : detail::Wrapper() { } /*! \brief Constructor from cl_mem - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_mem + * + * Optionally transfer ownership of a refcount on the cl_mem * into the new Memory object. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * + * See Memory for further details. */ - __CL_EXPLICIT_CONSTRUCTORS Memory(const cl_mem& memory) : detail::Wrapper(memory) { } + explicit Memory(const cl_mem& memory, bool retainObject) : + detail::Wrapper(memory, retainObject) { } /*! \brief Assignment operator from cl_mem - takes ownership. * @@ -3023,11 +3203,10 @@ public: return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) /*! \brief Move constructor to forward move to the superclass correctly. * Required for MSVC. */ - Memory(Memory&& mem) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(mem)) {} + Memory(Memory&& mem) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(mem)) {} /*! \brief Move assignment to forward move to the superclass correctly. * Required for MSVC. @@ -3037,7 +3216,7 @@ public: detail::Wrapper::operator=(std::move(mem)); return *this; } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + //! \brief Wrapper for clGetMemObjectInfo(). template @@ -3062,7 +3241,7 @@ public: return param; } -#if defined(CL_VERSION_1_1) +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 /*! \brief Registers a callback function to be called when the memory object * is no longer needed. * @@ -3087,7 +3266,7 @@ public: user_data), __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); } -#endif +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 }; @@ -3103,57 +3282,417 @@ template< typename IteratorType > cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); -/*! \brief Class interface for Buffer Memory Objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Buffer : public Memory +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +namespace detail { -public: - - /*! \brief Constructs a Buffer in a specified context. - * - * Wraps clCreateBuffer(). - * - * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was - * specified. Note alignment & exclusivity requirements. - */ - Buffer( - const Context& context, - cl_mem_flags flags, - ::size_t size, - void* host_ptr = NULL, - cl_int* err = NULL) + class SVMTraitNull { - cl_int error; - object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; + public: + static cl_svm_mem_flags getSVMMemFlags() + { + return 0; } + }; +} // namespace detail + +template +class SVMTraitReadWrite +{ +public: + static cl_svm_mem_flags getSVMMemFlags() + { + return CL_MEM_READ_WRITE | + Trait::getSVMMemFlags(); } +}; - /*! \brief Constructs a Buffer in the default context. - * - * Wraps clCreateBuffer(). - * - * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was - * specified. Note alignment & exclusivity requirements. - * - * \see Context::getDefault() - */ - Buffer( - cl_mem_flags flags, - ::size_t size, - void* host_ptr = NULL, - cl_int* err = NULL) +template +class SVMTraitReadOnly +{ +public: + static cl_svm_mem_flags getSVMMemFlags() { - cl_int error; + return CL_MEM_READ_ONLY | + Trait::getSVMMemFlags(); + } +}; - Context context = Context::getDefault(err); +template +class SVMTraitWriteOnly +{ +public: + static cl_svm_mem_flags getSVMMemFlags() + { + return CL_MEM_WRITE_ONLY | + Trait::getSVMMemFlags(); + } +}; + +template> +class SVMTraitCoarse +{ +public: + static cl_svm_mem_flags getSVMMemFlags() + { + return Trait::getSVMMemFlags(); + } +}; + +template> +class SVMTraitFine +{ +public: + static cl_svm_mem_flags getSVMMemFlags() + { + return CL_MEM_SVM_FINE_GRAIN_BUFFER | + Trait::getSVMMemFlags(); + } +}; + +template> +class SVMTraitAtomic +{ +public: + static cl_svm_mem_flags getSVMMemFlags() + { + return + CL_MEM_SVM_FINE_GRAIN_BUFFER | + CL_MEM_SVM_ATOMICS | + Trait::getSVMMemFlags(); + } +}; + +// Pre-declare SVM map function +template +inline cl_int enqueueMapSVM( + T* ptr, + cl_bool blocking, + cl_map_flags flags, + size_type size, + const vector* events = NULL, + Event* event = NULL); + +/** + * STL-like allocator class for managing SVM objects provided for convenience. + * + * Note that while this behaves like an allocator for the purposes of constructing vectors and similar objects, + * care must be taken when using with smart pointers. + * The allocator should not be used to construct a unique_ptr if we are using coarse-grained SVM mode because + * the coarse-grained management behaviour would behave incorrectly with respect to reference counting. + * + * Instead the allocator embeds a Deleter which may be used with unique_ptr and is used + * with the allocate_shared and allocate_ptr supplied operations. + */ +template +class SVMAllocator { +private: + Context context_; + +public: + typedef T value_type; + typedef value_type* pointer; + typedef const value_type* const_pointer; + typedef value_type& reference; + typedef const value_type& const_reference; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + template + struct rebind + { + typedef SVMAllocator other; + }; + + template + friend class SVMAllocator; + + SVMAllocator() : + context_(Context::getDefault()) + { + } + + explicit SVMAllocator(cl::Context context) : + context_(context) + { + } + + + SVMAllocator(const SVMAllocator &other) : + context_(other.context_) + { + } + + template + SVMAllocator(const SVMAllocator &other) : + context_(other.context_) + { + } + + ~SVMAllocator() + { + } + + pointer address(reference r) CL_HPP_NOEXCEPT_ + { + return std::addressof(r); + } + + const_pointer address(const_reference r) CL_HPP_NOEXCEPT_ + { + return std::addressof(r); + } + + /** + * Allocate an SVM pointer. + * + * If the allocator is coarse-grained, this will take ownership to allow + * containers to correctly construct data in place. + */ + pointer allocate( + size_type size, + typename cl::SVMAllocator::const_pointer = 0) + { + // Allocate memory with default alignment matching the size of the type + void* voidPointer = + clSVMAlloc( + context_(), + SVMTrait::getSVMMemFlags(), + size*sizeof(T), + 0); + pointer retValue = reinterpret_cast( + voidPointer); +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + if (!retValue) { + std::bad_alloc excep; + throw excep; + } +#endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS) + + // If allocation was coarse-grained then map it + if (!(SVMTrait::getSVMMemFlags() & CL_MEM_SVM_FINE_GRAIN_BUFFER)) { + cl_int err = enqueueMapSVM(retValue, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, size*sizeof(T)); + if (err != CL_SUCCESS) { + std::bad_alloc excep; + throw excep; + } + } + + // If exceptions disabled, return null pointer from allocator + return retValue; + } + + void deallocate(pointer p, size_type) + { + clSVMFree(context_(), p); + } + + /** + * Return the maximum possible allocation size. + * This is the minimum of the maximum sizes of all devices in the context. + */ + size_type max_size() const CL_HPP_NOEXCEPT_ + { + size_type maxSize = std::numeric_limits::max() / sizeof(T); + + for (Device &d : context_.getInfo()) { + maxSize = std::min( + maxSize, + static_cast(d.getInfo())); + } + + return maxSize; + } + + template< class U, class... Args > + void construct(U* p, Args&&... args) + { + new(p)T(args...); + } + + template< class U > + void destroy(U* p) + { + p->~U(); + } + + /** + * Returns true if the contexts match. + */ + inline bool operator==(SVMAllocator const& rhs) + { + return (context_==rhs.context_); + } + + inline bool operator!=(SVMAllocator const& a) + { + return !operator==(a); + } +}; // class SVMAllocator return cl::pointer(tmp, detail::Deleter{alloc, copies}); + + +template +class SVMAllocator { +public: + typedef void value_type; + typedef value_type* pointer; + typedef const value_type* const_pointer; + + template + struct rebind + { + typedef SVMAllocator other; + }; + + template + friend class SVMAllocator; +}; + +#if !defined(CL_HPP_NO_STD_UNIQUE_PTR) +namespace detail +{ + template + class Deleter { + private: + Alloc alloc_; + size_type copies_; + + public: + typedef typename std::allocator_traits::pointer pointer; + + Deleter(const Alloc &alloc, size_type copies) : alloc_{ alloc }, copies_{ copies } + { + } + + void operator()(pointer ptr) const { + Alloc tmpAlloc{ alloc_ }; + std::allocator_traits::destroy(tmpAlloc, std::addressof(*ptr)); + std::allocator_traits::deallocate(tmpAlloc, ptr, copies_); + } + }; +} // namespace detail + +/** + * Allocation operation compatible with std::allocate_ptr. + * Creates a unique_ptr by default. + * This requirement is to ensure that the control block is not + * allocated in memory inaccessible to the host. + */ +template +cl::pointer> allocate_pointer(const Alloc &alloc_, Args&&... args) +{ + Alloc alloc(alloc_); + static const size_type copies = 1; + + // Ensure that creation of the management block and the + // object are dealt with separately such that we only provide a deleter + + T* tmp = std::allocator_traits::allocate(alloc, copies); + if (!tmp) { + std::bad_alloc excep; + throw excep; + } + try { + std::allocator_traits::construct( + alloc, + std::addressof(*tmp), + std::forward(args)...); + + return cl::pointer>(tmp, detail::Deleter{alloc, copies}); + } + catch (std::bad_alloc b) + { + std::allocator_traits::deallocate(alloc, tmp, copies); + throw; + } +} + +template< class T, class SVMTrait, class... Args > +cl::pointer>> allocate_svm(Args... args) +{ + SVMAllocator alloc; + return cl::allocate_pointer(alloc, args...); +} + +template< class T, class SVMTrait, class... Args > +cl::pointer>> allocate_svm(const cl::Context &c, Args... args) +{ + SVMAllocator alloc(c); + return cl::allocate_pointer(alloc, args...); +} +#endif // #if !defined(CL_HPP_NO_STD_UNIQUE_PTR) + +/*! \brief Vector alias to simplify contruction of coarse-grained SVM containers. + * + */ +template < class T > +using coarse_svm_vector = vector>>; + +/*! \brief Vector alias to simplify contruction of fine-grained SVM containers. +* +*/ +template < class T > +using fine_svm_vector = vector>>; + +/*! \brief Vector alias to simplify contruction of fine-grained SVM containers that support platform atomics. +* +*/ +template < class T > +using atomic_svm_vector = vector>>; + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + + +/*! \brief Class interface for Buffer Memory Objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Buffer : public Memory +{ +public: + + /*! \brief Constructs a Buffer in a specified context. + * + * Wraps clCreateBuffer(). + * + * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was + * specified. Note alignment & exclusivity requirements. + */ + Buffer( + const Context& context, + cl_mem_flags flags, + size_type size, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructs a Buffer in the default context. + * + * Wraps clCreateBuffer(). + * + * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was + * specified. Note alignment & exclusivity requirements. + * + * \see Context::getDefault() + */ + Buffer( + cl_mem_flags flags, + size_type size, + void* host_ptr = NULL, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(err); object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); @@ -3190,7 +3729,7 @@ public: flags |= CL_MEM_USE_HOST_PTR; } - ::size_t size = sizeof(DataType)*(endIterator - startIterator); + size_type size = sizeof(DataType)*(endIterator - startIterator); Context context = Context::getDefault(err); @@ -3222,10 +3761,10 @@ public: template< typename IteratorType > Buffer(const Context &context, IteratorType startIterator, IteratorType endIterator, bool readOnly, bool useHostPtr = false, cl_int* err = NULL); - + /*! * \brief Construct a Buffer from a host container via iterators using a specified queue. - * If useHostPtr is specified iterators must represent contiguous data. + * If useHostPtr is specified iterators must be random access. */ template< typename IteratorType > Buffer(const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, @@ -3235,21 +3774,25 @@ public: Buffer() : Memory() { } /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with earlier versions. * * See Memory for further details. */ - __CL_EXPLICIT_CONSTRUCTORS Buffer(const cl_mem& buffer) : Memory(buffer) { } + explicit Buffer(const cl_mem& buffer, bool retainObject = false) : + Memory(buffer, retainObject) { } /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ + * + * See Memory for further details. + */ Buffer& operator = (const cl_mem& rhs) { Memory::operator=(rhs); return *this; } - + /*! \brief Copy constructor to forward copy to the superclass correctly. * Required for MSVC. */ @@ -3263,12 +3806,11 @@ public: Memory::operator=(buf); return *this; } - -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + /*! \brief Move constructor to forward move to the superclass correctly. * Required for MSVC. */ - Buffer(Buffer&& buf) CL_HPP_NOEXCEPT : Memory(std::move(buf)) {} + Buffer(Buffer&& buf) CL_HPP_NOEXCEPT_ : Memory(std::move(buf)) {} /*! \brief Move assignment to forward move to the superclass correctly. * Required for MSVC. @@ -3278,9 +3820,8 @@ public: Memory::operator=(std::move(buf)); return *this; } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -#if defined(CL_VERSION_1_1) +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 /*! \brief Creates a new buffer object from this. * * Wraps clCreateSubBuffer(). @@ -3307,10 +3848,10 @@ public: return result; } -#endif +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 }; -#if defined (USE_DX_INTEROP) +#if defined (CL_HPP_USE_DX_INTEROP) /*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's. * * This is provided to facilitate interoperability with Direct3D. @@ -3322,9 +3863,7 @@ public: class BufferD3D10 : public Buffer { public: - typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( - cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, - cl_int* errcode_ret); + /*! \brief Constructs a BufferD3D10, in a specified context, from a * given ID3D10Buffer. @@ -3335,11 +3874,13 @@ public: const Context& context, cl_mem_flags flags, ID3D10Buffer* bufobj, - cl_int * err = NULL) + cl_int * err = NULL) : pfn_clCreateFromD3D10BufferKHR(nullptr) { - static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL; - -#if defined(CL_VERSION_1_2) + typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( + cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, + cl_int* errcode_ret); + PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR; +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 vector props = context.getInfo(); cl_platform platform = -1; for( int i = 0; i < props.size(); ++i ) { @@ -3347,10 +3888,9 @@ public: platform = props[i+1]; } } - __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clCreateFromD3D10BufferKHR); -#endif -#if defined(CL_VERSION_1_1) - __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCreateFromD3D10BufferKHR); +#elif CL_HPP_TARGET_OPENCL_VERSION >= 110 + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateFromD3D10BufferKHR); #endif cl_int error; @@ -3371,9 +3911,13 @@ public: /*! \brief Constructor from cl_mem - takes ownership. * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. * See Memory for further details. */ - __CL_EXPLICIT_CONSTRUCTORS BufferD3D10(const cl_mem& buffer) : Buffer(buffer) { } + explicit BufferD3D10(const cl_mem& buffer, bool retainObject = false) : + Buffer(buffer, retainObject) { } /*! \brief Assignment from cl_mem - performs shallow copy. * @@ -3386,34 +3930,33 @@ public: } /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferD3D10(const BufferD3D10& buf) : Buffer(buf) {} + * Required for MSVC. + */ + BufferD3D10(const BufferD3D10& buf) : + Buffer(buf) {} /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ + * Required for MSVC. + */ BufferD3D10& operator = (const BufferD3D10 &buf) { Buffer::operator=(buf); return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferD3D10(BufferD3D10&& buf) CL_HPP_NOEXCEPT : Buffer(std::move(buf)) {} + * Required for MSVC. + */ + BufferD3D10(BufferD3D10&& buf) CL_HPP_NOEXCEPT_ : Buffer(std::move(buf)) {} /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ + * Required for MSVC. + */ BufferD3D10& operator = (BufferD3D10 &&buf) { Buffer::operator=(std::move(buf)); return *this; } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) }; #endif @@ -3457,9 +4000,13 @@ public: /*! \brief Constructor from cl_mem - takes ownership. * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. * See Memory for further details. */ - __CL_EXPLICIT_CONSTRUCTORS BufferGL(const cl_mem& buffer) : Buffer(buffer) { } + explicit BufferGL(const cl_mem& buffer, bool retainObject = false) : + Buffer(buffer, retainObject) { } /*! \brief Assignment from cl_mem - performs shallow copy. * @@ -3472,34 +4019,32 @@ public: } /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ + * Required for MSVC. + */ BufferGL(const BufferGL& buf) : Buffer(buf) {} /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ + * Required for MSVC. + */ BufferGL& operator = (const BufferGL &buf) { Buffer::operator=(buf); return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferGL(BufferGL&& buf) CL_HPP_NOEXCEPT : Buffer(std::move(buf)) {} + * Required for MSVC. + */ + BufferGL(BufferGL&& buf) CL_HPP_NOEXCEPT_ : Buffer(std::move(buf)) {} /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ + * Required for MSVC. + */ BufferGL& operator = (BufferGL &&buf) { Buffer::operator=(std::move(buf)); return *this; } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) //! \brief Wrapper for clGetGLObjectInfo(). cl_int getObjectInfo( @@ -3512,30 +4057,131 @@ public: } }; -/*! \brief C++ base class for Image Memory objects. +/*! \brief Class interface for GL Render Buffer Memory Objects. * + * This is provided to facilitate interoperability with OpenGL. + * * See Memory for details about copy semantics, etc. * * \see Memory */ -class Image : public Memory +class BufferRenderGL : public Buffer { -protected: - //! \brief Default constructor - initializes to NULL. - Image() : Memory() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ - __CL_EXPLICIT_CONSTRUCTORS Image(const cl_mem& image) : Memory(image) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. +public: + /*! \brief Constructs a BufferRenderGL in a specified context, from a given + * GL Renderbuffer. * - * See Memory for further details. + * Wraps clCreateFromGLRenderbuffer(). */ - Image& operator = (const cl_mem& rhs) - { + BufferRenderGL( + const Context& context, + cl_mem_flags flags, + cl_GLuint bufobj, + cl_int * err = NULL) + { + cl_int error; + object_ = ::clCreateFromGLRenderbuffer( + context(), + flags, + bufobj, + &error); + + detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + } + + //! \brief Default constructor - initializes to NULL. + BufferRenderGL() : Buffer() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit BufferRenderGL(const cl_mem& buffer, bool retainObject = false) : + Buffer(buffer, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferRenderGL& operator = (const cl_mem& rhs) + { + Buffer::operator=(rhs); + return *this; + } + + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + BufferRenderGL(const BufferRenderGL& buf) : Buffer(buf) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + BufferRenderGL& operator = (const BufferRenderGL &buf) + { + Buffer::operator=(buf); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + BufferRenderGL(BufferRenderGL&& buf) CL_HPP_NOEXCEPT_ : Buffer(std::move(buf)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + BufferRenderGL& operator = (BufferRenderGL &&buf) + { + Buffer::operator=(std::move(buf)); + return *this; + } + + //! \brief Wrapper for clGetGLObjectInfo(). + cl_int getObjectInfo( + cl_gl_object_type *type, + cl_GLuint * gl_object_name) + { + return detail::errHandler( + ::clGetGLObjectInfo(object_,type,gl_object_name), + __GET_GL_OBJECT_INFO_ERR); + } +}; + +/*! \brief C++ base class for Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ +class Image : public Memory +{ +protected: + //! \brief Default constructor - initializes to NULL. + Image() : Memory() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image(const cl_mem& image, bool retainObject = false) : + Memory(image, retainObject) { } + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image& operator = (const cl_mem& rhs) + { Memory::operator=(rhs); return *this; } @@ -3554,11 +4200,10 @@ protected: return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) /*! \brief Move constructor to forward move to the superclass correctly. * Required for MSVC. */ - Image(Image&& img) CL_HPP_NOEXCEPT : Memory(std::move(img)) {} + Image(Image&& img) CL_HPP_NOEXCEPT_ : Memory(std::move(img)) {} /*! \brief Move assignment to forward move to the superclass correctly. * Required for MSVC. @@ -3568,7 +4213,7 @@ protected: Memory::operator=(std::move(img)); return *this; } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + public: //! \brief Wrapper for clGetImageInfo(). @@ -3595,7 +4240,7 @@ public: } }; -#if defined(CL_VERSION_1_2) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 /*! \brief Class interface for 1D Image Memory objects. * * See Memory for details about copy semantics, etc. @@ -3613,7 +4258,7 @@ public: const Context& context, cl_mem_flags flags, ImageFormat format, - ::size_t width, + size_type width, void* host_ptr = NULL, cl_int* err = NULL) { @@ -3643,9 +4288,13 @@ public: /*! \brief Constructor from cl_mem - takes ownership. * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. * See Memory for further details. */ - __CL_EXPLICIT_CONSTRUCTORS Image1D(const cl_mem& image1D) : Image(image1D) { } + explicit Image1D(const cl_mem& image1D, bool retainObject = false) : + Image(image1D, retainObject) { } /*! \brief Assignment from cl_mem - performs shallow copy. * @@ -3671,11 +4320,10 @@ public: return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) /*! \brief Move constructor to forward move to the superclass correctly. * Required for MSVC. */ - Image1D(Image1D&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} + Image1D(Image1D&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} /*! \brief Move assignment to forward move to the superclass correctly. * Required for MSVC. @@ -3685,7 +4333,7 @@ public: Image::operator=(std::move(img)); return *this; } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + }; /*! \class Image1DBuffer @@ -3698,7 +4346,7 @@ public: const Context& context, cl_mem_flags flags, ImageFormat format, - ::size_t width, + size_type width, const Buffer &buffer, cl_int* err = NULL) { @@ -3726,14 +4374,22 @@ public: Image1DBuffer() { } - __CL_EXPLICIT_CONSTRUCTORS Image1DBuffer(const cl_mem& image1D) : Image(image1D) { } + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image1DBuffer(const cl_mem& image1D, bool retainObject = false) : + Image(image1D, retainObject) { } Image1DBuffer& operator = (const cl_mem& rhs) { Image::operator=(rhs); return *this; } - + /*! \brief Copy constructor to forward copy to the superclass correctly. * Required for MSVC. */ @@ -3748,11 +4404,10 @@ public: return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) /*! \brief Move constructor to forward move to the superclass correctly. * Required for MSVC. */ - Image1DBuffer(Image1DBuffer&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} + Image1DBuffer(Image1DBuffer&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} /*! \brief Move assignment to forward move to the superclass correctly. * Required for MSVC. @@ -3762,7 +4417,7 @@ public: Image::operator=(std::move(img)); return *this; } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + }; /*! \class Image1DArray @@ -3775,9 +4430,9 @@ public: const Context& context, cl_mem_flags flags, ImageFormat format, - ::size_t arraySize, - ::size_t width, - ::size_t rowPitch, + size_type arraySize, + size_type width, + size_type rowPitch, void* host_ptr = NULL, cl_int* err = NULL) { @@ -3806,15 +4461,24 @@ public: } Image1DArray() { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image1DArray(const cl_mem& imageArray, bool retainObject = false) : + Image(imageArray, retainObject) { } - __CL_EXPLICIT_CONSTRUCTORS Image1DArray(const cl_mem& imageArray) : Image(imageArray) { } Image1DArray& operator = (const cl_mem& rhs) { Image::operator=(rhs); return *this; } - + /*! \brief Copy constructor to forward copy to the superclass correctly. * Required for MSVC. */ @@ -3829,11 +4493,10 @@ public: return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) /*! \brief Move constructor to forward move to the superclass correctly. * Required for MSVC. */ - Image1DArray(Image1DArray&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} + Image1DArray(Image1DArray&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} /*! \brief Move assignment to forward move to the superclass correctly. * Required for MSVC. @@ -3843,9 +4506,9 @@ public: Image::operator=(std::move(img)); return *this; } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + }; -#endif // #if defined(CL_VERSION_1_2) +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 120 /*! \brief Class interface for 2D Image Memory objects. @@ -3857,7 +4520,7 @@ public: class Image2D : public Image { public: - /*! \brief Constructs a 1D Image in a specified context. + /*! \brief Constructs a 2D Image in a specified context. * * Wraps clCreateImage(). */ @@ -3865,28 +4528,28 @@ public: const Context& context, cl_mem_flags flags, ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t row_pitch = 0, + size_type width, + size_type height, + size_type row_pitch = 0, void* host_ptr = NULL, cl_int* err = NULL) { cl_int error; bool useCreateImage; -#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 120 // Run-time decision based on the actual platform { cl_uint version = detail::getContextPlatformVersion(context()); useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above } -#elif defined(CL_VERSION_1_2) +#elif CL_HPP_TARGET_OPENCL_VERSION >= 120 useCreateImage = true; #else useCreateImage = false; #endif -#if defined(CL_VERSION_1_2) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 if (useCreateImage) { cl_image_desc desc = @@ -3911,8 +4574,8 @@ public: *err = error; } } -#endif // #if defined(CL_VERSION_1_2) -#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 120 if (!useCreateImage) { object_ = ::clCreateImage2D( @@ -3923,17 +4586,130 @@ public: *err = error; } } -#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + /*! \brief Constructs a 2D Image from a buffer. + * \note This will share storage with the underlying buffer. + * + * Wraps clCreateImage(). + */ + Image2D( + const Context& context, + ImageFormat format, + const Buffer &sourceBuffer, + size_type width, + size_type height, + size_type row_pitch = 0, + cl_int* err = nullptr) + { + cl_int error; + + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE2D, + width, + height, + 0, 0, // depth, array size (unused) + row_pitch, + 0, 0, 0, + // Use buffer as input to image + sourceBuffer() + }; + object_ = ::clCreateImage( + context(), + 0, // flags inherited from buffer + &format, + &desc, + nullptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != nullptr) { + *err = error; + } + } +#endif //#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + /*! \brief Constructs a 2D Image from an image. + * \note This will share storage with the underlying image but may + * reinterpret the channel order and type. + * + * The image will be created matching with a descriptor matching the source. + * + * \param order is the channel order to reinterpret the image data as. + * The channel order may differ as described in the OpenCL + * 2.0 API specification. + * + * Wraps clCreateImage(). + */ + Image2D( + const Context& context, + cl_channel_order order, + const Image &sourceImage, + cl_int* err = nullptr) + { + cl_int error; + + // Descriptor fields have to match source image + size_type sourceWidth = + sourceImage.getImageInfo(); + size_type sourceHeight = + sourceImage.getImageInfo(); + size_type sourceRowPitch = + sourceImage.getImageInfo(); + cl_uint sourceNumMIPLevels = + sourceImage.getImageInfo(); + cl_uint sourceNumSamples = + sourceImage.getImageInfo(); + cl_image_format sourceFormat = + sourceImage.getImageInfo(); + + // Update only the channel order. + // Channel format inherited from source. + sourceFormat.image_channel_order = order; + cl_image_desc desc = + { + CL_MEM_OBJECT_IMAGE2D, + sourceWidth, + sourceHeight, + 0, 0, // depth (unused), array size (unused) + sourceRowPitch, + 0, // slice pitch (unused) + sourceNumMIPLevels, + sourceNumSamples, + // Use buffer as input to image + sourceImage() + }; + object_ = ::clCreateImage( + context(), + 0, // flags should be inherited from mem_object + &sourceFormat, + &desc, + nullptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != nullptr) { + *err = error; + } } +#endif //#if CL_HPP_TARGET_OPENCL_VERSION >= 200 //! \brief Default constructor - initializes to NULL. Image2D() { } /*! \brief Constructor from cl_mem - takes ownership. * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. * See Memory for further details. */ - __CL_EXPLICIT_CONSTRUCTORS Image2D(const cl_mem& image2D) : Image(image2D) { } + explicit Image2D(const cl_mem& image2D, bool retainObject = false) : + Image(image2D, retainObject) { } /*! \brief Assignment from cl_mem - performs shallow copy. * @@ -3959,11 +4735,10 @@ public: return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) /*! \brief Move constructor to forward move to the superclass correctly. * Required for MSVC. */ - Image2D(Image2D&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} + Image2D(Image2D&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} /*! \brief Move assignment to forward move to the superclass correctly. * Required for MSVC. @@ -3973,11 +4748,11 @@ public: Image::operator=(std::move(img)); return *this; } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + }; -#if !defined(CL_VERSION_1_2) +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) /*! \brief Class interface for GL 2D Image Memory objects. * * This is provided to facilitate interoperability with OpenGL. @@ -3987,7 +4762,7 @@ public: * \see Memory * \note Deprecated for OpenCL 1.2. Please use ImageGL instead. */ -class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED : public Image2D +class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL : public Image2D { public: /*! \brief Constructs an Image2DGL in a specified context, from a given @@ -4024,12 +4799,16 @@ public: /*! \brief Constructor from cl_mem - takes ownership. * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. * See Memory for further details. */ - __CL_EXPLICIT_CONSTRUCTORS Image2DGL(const cl_mem& image) : Image2D(image) { } + explicit Image2DGL(const cl_mem& image, bool retainObject = false) : + Image2D(image, retainObject) { } /*! \brief Assignment from cl_mem - performs shallow copy. - * + *c * See Memory for further details. */ Image2DGL& operator = (const cl_mem& rhs) @@ -4052,11 +4831,10 @@ public: return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) /*! \brief Move constructor to forward move to the superclass correctly. * Required for MSVC. */ - Image2DGL(Image2DGL&& img) CL_HPP_NOEXCEPT : Image2D(std::move(img)) {} + Image2DGL(Image2DGL&& img) CL_HPP_NOEXCEPT_ : Image2D(std::move(img)) {} /*! \brief Move assignment to forward move to the superclass correctly. * Required for MSVC. @@ -4066,11 +4844,11 @@ public: Image2D::operator=(std::move(img)); return *this; } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) -}; -#endif // #if !defined(CL_VERSION_1_2) -#if defined(CL_VERSION_1_2) +} CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; +#endif // CL_USE_DEPRECATED_OPENCL_1_1_APIS + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 /*! \class Image2DArray * \brief Image interface for arrays of 2D images. */ @@ -4081,11 +4859,11 @@ public: const Context& context, cl_mem_flags flags, ImageFormat format, - ::size_t arraySize, - ::size_t width, - ::size_t height, - ::size_t rowPitch, - ::size_t slicePitch, + size_type arraySize, + size_type width, + size_type height, + size_type rowPitch, + size_type slicePitch, void* host_ptr = NULL, cl_int* err = NULL) { @@ -4116,15 +4894,22 @@ public: } Image2DArray() { } - - __CL_EXPLICIT_CONSTRUCTORS Image2DArray(const cl_mem& imageArray) : Image(imageArray) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image2DArray(const cl_mem& imageArray, bool retainObject = false) : Image(imageArray, retainObject) { } Image2DArray& operator = (const cl_mem& rhs) { Image::operator=(rhs); return *this; } - + /*! \brief Copy constructor to forward copy to the superclass correctly. * Required for MSVC. */ @@ -4139,11 +4924,10 @@ public: return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) /*! \brief Move constructor to forward move to the superclass correctly. * Required for MSVC. */ - Image2DArray(Image2DArray&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} + Image2DArray(Image2DArray&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} /*! \brief Move assignment to forward move to the superclass correctly. * Required for MSVC. @@ -4153,9 +4937,8 @@ public: Image::operator=(std::move(img)); return *this; } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) }; -#endif // #if defined(CL_VERSION_1_2) +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 120 /*! \brief Class interface for 3D Image Memory objects. * @@ -4174,30 +4957,30 @@ public: const Context& context, cl_mem_flags flags, ImageFormat format, - ::size_t width, - ::size_t height, - ::size_t depth, - ::size_t row_pitch = 0, - ::size_t slice_pitch = 0, + size_type width, + size_type height, + size_type depth, + size_type row_pitch = 0, + size_type slice_pitch = 0, void* host_ptr = NULL, cl_int* err = NULL) { cl_int error; bool useCreateImage; -#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 120 // Run-time decision based on the actual platform { cl_uint version = detail::getContextPlatformVersion(context()); useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above } -#elif defined(CL_VERSION_1_2) +#elif CL_HPP_TARGET_OPENCL_VERSION >= 120 useCreateImage = true; #else useCreateImage = false; #endif -#if defined(CL_VERSION_1_2) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 if (useCreateImage) { cl_image_desc desc = @@ -4224,8 +5007,8 @@ public: *err = error; } } -#endif // #if defined(CL_VERSION_1_2) -#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#if CL_HPP_MINIMUM_OPENCL_VERSION < 120 if (!useCreateImage) { object_ = ::clCreateImage3D( @@ -4237,7 +5020,7 @@ public: *err = error; } } -#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 } //! \brief Default constructor - initializes to NULL. @@ -4245,9 +5028,13 @@ public: /*! \brief Constructor from cl_mem - takes ownership. * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. * See Memory for further details. */ - __CL_EXPLICIT_CONSTRUCTORS Image3D(const cl_mem& image3D) : Image(image3D) { } + explicit Image3D(const cl_mem& image3D, bool retainObject = false) : + Image(image3D, retainObject) { } /*! \brief Assignment from cl_mem - performs shallow copy. * @@ -4273,11 +5060,10 @@ public: return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) /*! \brief Move constructor to forward move to the superclass correctly. * Required for MSVC. */ - Image3D(Image3D&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} + Image3D(Image3D&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} /*! \brief Move assignment to forward move to the superclass correctly. * Required for MSVC. @@ -4287,10 +5073,9 @@ public: Image::operator=(std::move(img)); return *this; } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) }; -#if !defined(CL_VERSION_1_2) +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) /*! \brief Class interface for GL 3D Image Memory objects. * * This is provided to facilitate interoperability with OpenGL. @@ -4335,9 +5120,13 @@ public: /*! \brief Constructor from cl_mem - takes ownership. * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. * See Memory for further details. */ - __CL_EXPLICIT_CONSTRUCTORS Image3DGL(const cl_mem& image) : Image3D(image) { } + explicit Image3DGL(const cl_mem& image, bool retainObject = false) : + Image3D(image, retainObject) { } /*! \brief Assignment from cl_mem - performs shallow copy. * @@ -4363,11 +5152,10 @@ public: return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) /*! \brief Move constructor to forward move to the superclass correctly. * Required for MSVC. */ - Image3DGL(Image3DGL&& img) CL_HPP_NOEXCEPT : Image3D(std::move(img)) {} + Image3DGL(Image3DGL&& img) CL_HPP_NOEXCEPT_ : Image3D(std::move(img)) {} /*! \brief Move assignment to forward move to the superclass correctly. * Required for MSVC. @@ -4377,11 +5165,10 @@ public: Image3D::operator=(std::move(img)); return *this; } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) }; -#endif // #if !defined(CL_VERSION_1_2) +#endif // CL_USE_DEPRECATED_OPENCL_1_1_APIS -#if defined(CL_VERSION_1_2) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 /*! \class ImageGL * \brief general image interface for GL interop. * We abstract the 2D and 3D GL images into a single instance here @@ -4415,8 +5202,16 @@ public: } ImageGL() : Image() { } - - __CL_EXPLICIT_CONSTRUCTORS ImageGL(const cl_mem& image) : Image(image) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit ImageGL(const cl_mem& image, bool retainObject = false) : + Image(image, retainObject) { } ImageGL& operator = (const cl_mem& rhs) { @@ -4438,11 +5233,10 @@ public: return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) /*! \brief Move constructor to forward move to the superclass correctly. * Required for MSVC. */ - ImageGL(ImageGL&& img) CL_HPP_NOEXCEPT : Image(std::move(img)) {} + ImageGL(ImageGL&& img) CL_HPP_NOEXCEPT_ : Image(std::move(img)) {} /*! \brief Move assignment to forward move to the superclass correctly. * Required for MSVC. @@ -4452,141 +5246,150 @@ public: Image::operator=(std::move(img)); return *this; } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) }; -#endif // #if defined(CL_VERSION_1_2) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -/*! \brief Class interface for GL Render Buffer Memory Objects. -* -* This is provided to facilitate interoperability with OpenGL. + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +/*! \brief Class interface for Pipe Memory Objects. * * See Memory for details about copy semantics, etc. * * \see Memory */ -class BufferRenderGL : -#if defined(CL_VERSION_1_2) - public ImageGL -#else // #if defined(CL_VERSION_1_2) - public Image2DGL -#endif //#if defined(CL_VERSION_1_2) +class Pipe : public Memory { public: - /*! \brief Constructs a BufferRenderGL in a specified context, from a given - * GL Renderbuffer. - * - * Wraps clCreateFromGLRenderbuffer(). - */ - BufferRenderGL( + + /*! \brief Constructs a Pipe in a specified context. + * + * Wraps clCreatePipe(). + * @param context Context in which to create the pipe. + * @param flags Bitfield. Only CL_MEM_READ_WRITE and CL_MEM_HOST_NO_ACCESS are valid. + * @param packet_size Size in bytes of a single packet of the pipe. + * @param max_packets Number of packets that may be stored in the pipe. + * + */ + Pipe( const Context& context, - cl_mem_flags flags, - cl_GLuint bufobj, - cl_int * err = NULL) + cl_uint packet_size, + cl_uint max_packets, + cl_int* err = NULL) { cl_int error; - object_ = ::clCreateFromGLRenderbuffer( - context(), - flags, - bufobj, - &error); - detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR); + cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS; + object_ = ::clCreatePipe(context(), flags, packet_size, max_packets, nullptr, &error); + + detail::errHandler(error, __CREATE_PIPE_ERR); + if (err != NULL) { + *err = error; + } + } + + /*! \brief Constructs a Pipe in a the default context. + * + * Wraps clCreatePipe(). + * @param flags Bitfield. Only CL_MEM_READ_WRITE and CL_MEM_HOST_NO_ACCESS are valid. + * @param packet_size Size in bytes of a single packet of the pipe. + * @param max_packets Number of packets that may be stored in the pipe. + * + */ + Pipe( + cl_uint packet_size, + cl_uint max_packets, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(err); + + cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS; + object_ = ::clCreatePipe(context(), flags, packet_size, max_packets, nullptr, &error); + + detail::errHandler(error, __CREATE_PIPE_ERR); if (err != NULL) { *err = error; } } //! \brief Default constructor - initializes to NULL. -#if defined(CL_VERSION_1_2) - BufferRenderGL() : ImageGL() {}; -#else // #if defined(CL_VERSION_1_2) - BufferRenderGL() : Image2DGL() {}; -#endif //#if defined(CL_VERSION_1_2) + Pipe() : Memory() { } /*! \brief Constructor from cl_mem - takes ownership. - * - * See Memory for further details. - */ -#if defined(CL_VERSION_1_2) - __CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem& buffer) : ImageGL(buffer) { } -#else // #if defined(CL_VERSION_1_2) - __CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem& buffer) : Image2DGL(buffer) { } -#endif //#if defined(CL_VERSION_1_2) - + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with earlier versions. + * + * See Memory for further details. + */ + explicit Pipe(const cl_mem& pipe, bool retainObject = false) : + Memory(pipe, retainObject) { } /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferRenderGL& operator = (const cl_mem& rhs) + * + * See Memory for further details. + */ + Pipe& operator = (const cl_mem& rhs) { -#if defined(CL_VERSION_1_2) - ImageGL::operator=(rhs); -#else // #if defined(CL_VERSION_1_2) - Image2DGL::operator=(rhs); -#endif //#if defined(CL_VERSION_1_2) - + Memory::operator=(rhs); return *this; } /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ -#if defined(CL_VERSION_1_2) - BufferRenderGL(const BufferRenderGL& buf) : ImageGL(buf) {} -#else // #if defined(CL_VERSION_1_2) - BufferRenderGL(const BufferRenderGL& buf) : Image2DGL(buf) {} -#endif //#if defined(CL_VERSION_1_2) + * Required for MSVC. + */ + Pipe(const Pipe& pipe) : Memory(pipe) {} /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - BufferRenderGL& operator = (const BufferRenderGL &rhs) + * Required for MSVC. + */ + Pipe& operator = (const Pipe &pipe) { -#if defined(CL_VERSION_1_2) - ImageGL::operator=(rhs); -#else // #if defined(CL_VERSION_1_2) - Image2DGL::operator=(rhs); -#endif //#if defined(CL_VERSION_1_2) + Memory::operator=(pipe); return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ -#if defined(CL_VERSION_1_2) - BufferRenderGL(BufferRenderGL&& buf) CL_HPP_NOEXCEPT : ImageGL(std::move(buf)) {} -#else // #if defined(CL_VERSION_1_2) - BufferRenderGL(BufferRenderGL&& buf) CL_HPP_NOEXCEPT : Image2DGL(std::move(buf)) {} -#endif //#if defined(CL_VERSION_1_2) - + * Required for MSVC. + */ + Pipe(Pipe&& pipe) CL_HPP_NOEXCEPT_ : Memory(std::move(pipe)) {} /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. - */ - BufferRenderGL& operator = (BufferRenderGL &&buf) + * Required for MSVC. + */ + Pipe& operator = (Pipe &&pipe) { -#if defined(CL_VERSION_1_2) - ImageGL::operator=(std::move(buf)); -#else // #if defined(CL_VERSION_1_2) - Image2DGL::operator=(std::move(buf)); -#endif //#if defined(CL_VERSION_1_2) - + Memory::operator=(std::move(pipe)); return *this; } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - //! \brief Wrapper for clGetGLObjectInfo(). - cl_int getObjectInfo( - cl_gl_object_type *type, - cl_GLuint * gl_object_name) + //! \brief Wrapper for clGetMemObjectInfo(). + template + cl_int getInfo(cl_pipe_info name, T* param) const { return detail::errHandler( - ::clGetGLObjectInfo(object_, type, gl_object_name), - __GET_GL_OBJECT_INFO_ERR); + detail::getInfo(&::clGetPipeInfo, object_, name, param), + __GET_PIPE_INFO_ERR); } -}; + + //! \brief Wrapper for clGetMemObjectInfo() that returns by value. + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_pipe_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +}; // class Pipe +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 + /*! \brief Class interface for cl_sampler. * @@ -4614,8 +5417,25 @@ public: cl_int* err = NULL) { cl_int error; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + cl_sampler_properties sampler_properties[] = { + CL_SAMPLER_NORMALIZED_COORDS, normalized_coords, + CL_SAMPLER_ADDRESSING_MODE, addressing_mode, + CL_SAMPLER_FILTER_MODE, filter_mode, + 0 }; + object_ = ::clCreateSamplerWithProperties( + context(), + sampler_properties, + &error); + + detail::errHandler(error, __CREATE_SAMPLER_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } +#else object_ = ::clCreateSampler( - context(), + context(), normalized_coords, addressing_mode, filter_mode, @@ -4625,14 +5445,19 @@ public: if (err != NULL) { *err = error; } +#endif } /*! \brief Constructor from cl_sampler - takes ownership. * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. * This effectively transfers ownership of a refcount on the cl_sampler * into the new Sampler object. */ - __CL_EXPLICIT_CONSTRUCTORS Sampler(const cl_sampler& sampler) : detail::Wrapper(sampler) { } + explicit Sampler(const cl_sampler& sampler, bool retainObject = false) : + detail::Wrapper(sampler, retainObject) { } /*! \brief Assignment operator from cl_sampler - takes ownership. * @@ -4659,11 +5484,10 @@ public: return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) /*! \brief Move constructor to forward move to the superclass correctly. * Required for MSVC. */ - Sampler(Sampler&& sam) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(sam)) {} + Sampler(Sampler&& sam) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(sam)) {} /*! \brief Move assignment to forward move to the superclass correctly. * Required for MSVC. @@ -4673,7 +5497,6 @@ public: detail::Wrapper::operator=(std::move(sam)); return *this; } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) //! \brief Wrapper for clGetSamplerInfo(). template @@ -4701,38 +5524,46 @@ public: class Program; class CommandQueue; +class DeviceCommandQueue; class Kernel; //! \brief Class interface for specifying NDRange values. class NDRange { private: - size_t<3> sizes_; + size_type sizes_[3]; cl_uint dimensions_; public: //! \brief Default constructor - resulting range has zero dimensions. NDRange() : dimensions_(0) - { } + { + sizes_[0] = 0; + sizes_[1] = 0; + sizes_[2] = 0; + } //! \brief Constructs one-dimensional range. - NDRange(::size_t size0) + NDRange(size_type size0) : dimensions_(1) { sizes_[0] = size0; + sizes_[1] = 1; + sizes_[2] = 1; } //! \brief Constructs two-dimensional range. - NDRange(::size_t size0, ::size_t size1) + NDRange(size_type size0, size_type size1) : dimensions_(2) { sizes_[0] = size0; sizes_[1] = size1; + sizes_[2] = 1; } //! \brief Constructs three-dimensional range. - NDRange(::size_t size0, ::size_t size1, ::size_t size2) + NDRange(size_type size0, size_type size1, size_type size2) : dimensions_(3) { sizes_[0] = size0; @@ -4740,16 +5571,36 @@ public: sizes_[2] = size2; } - /*! \brief Conversion operator to const ::size_t *. + /*! \brief Conversion operator to const size_type *. * * \returns a pointer to the size of the first dimension. */ - operator const ::size_t*() const { - return (const ::size_t*) sizes_; + operator const size_type*() const { + return sizes_; } //! \brief Queries the number of dimensions in the range. - ::size_t dimensions() const { return dimensions_; } + size_type dimensions() const + { + return dimensions_; + } + + //! \brief Returns the size of the object in bytes based on the + // runtime number of dimensions + size_type size() const + { + return dimensions_*sizeof(size_type); + } + + size_type* get() + { + return sizes_; + } + + const size_type* get() const + { + return sizes_; + } }; //! \brief A zero-dimensional range. @@ -4758,53 +5609,54 @@ static const NDRange NullRange; //! \brief Local address wrapper for use with Kernel::setArg struct LocalSpaceArg { - ::size_t size_; + size_type size_; }; namespace detail { +template +struct KernelArgumentHandler; + +// Enable for objects that are not subclasses of memory +// Pointers, constants etc template -struct KernelArgumentHandler +struct KernelArgumentHandler::value>::type> { - static ::size_t size(const T&) { return sizeof(T); } + static size_type size(const T&) { return sizeof(T); } static const T* ptr(const T& value) { return &value; } }; +// Enable for subclasses of memory where we want to get a reference to the cl_mem out +// and pass that in for safety +template +struct KernelArgumentHandler::value>::type> +{ + static size_type size(const T&) { return sizeof(cl_mem); } + static const cl_mem* ptr(const T& value) { return &(value()); } +}; + +// Specialization for DeviceCommandQueue defined later + template <> -struct KernelArgumentHandler +struct KernelArgumentHandler { - static ::size_t size(const LocalSpaceArg& value) { return value.size_; } + static size_type size(const LocalSpaceArg& value) { return value.size_; } static const void* ptr(const LocalSpaceArg&) { return NULL; } }; } //! \endcond -/*! __local - * \brief Helper function for generating LocalSpaceArg objects. - * Deprecated. Replaced with Local. - */ -inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED LocalSpaceArg -__local(::size_t size) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED; -inline LocalSpaceArg -__local(::size_t size) -{ - LocalSpaceArg ret = { size }; - return ret; -} - /*! Local * \brief Helper function for generating LocalSpaceArg objects. */ inline LocalSpaceArg -Local(::size_t size) +Local(size_type size) { LocalSpaceArg ret = { size }; return ret; } -//class KernelFunctor; - /*! \brief Class interface for cl_kernel. * * \note Copies of these objects are shallow, meaning that the copy will refer @@ -4823,10 +5675,14 @@ public: /*! \brief Constructor from cl_kernel - takes ownership. * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. * This effectively transfers ownership of a refcount on the cl_kernel * into the new Kernel object. */ - __CL_EXPLICIT_CONSTRUCTORS Kernel(const cl_kernel& kernel) : detail::Wrapper(kernel) { } + explicit Kernel(const cl_kernel& kernel, bool retainObject = false) : + detail::Wrapper(kernel, retainObject) { } /*! \brief Assignment operator from cl_kernel - takes ownership. * @@ -4853,11 +5709,10 @@ public: return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) /*! \brief Move constructor to forward move to the superclass correctly. * Required for MSVC. */ - Kernel(Kernel&& kernel) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(kernel)) {} + Kernel(Kernel&& kernel) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(kernel)) {} /*! \brief Move assignment to forward move to the superclass correctly. * Required for MSVC. @@ -4867,7 +5722,6 @@ public: detail::Wrapper::operator=(std::move(kernel)); return *this; } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) template cl_int getInfo(cl_kernel_info name, T* param) const @@ -4890,7 +5744,7 @@ public: return param; } -#if defined(CL_VERSION_1_2) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 template cl_int getArgInfo(cl_uint argIndex, cl_kernel_arg_info name, T* param) const { @@ -4911,7 +5765,7 @@ public: } return param; } -#endif // #if defined(CL_VERSION_1_2) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 template cl_int getWorkGroupInfo( @@ -4935,9 +5789,71 @@ public: } return param; } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +#if defined(CL_HPP_USE_CL_SUB_GROUPS_KHR) + cl_int getSubGroupInfo(const cl::Device &dev, cl_kernel_sub_group_info name, const cl::NDRange &range, size_type* param) const + { + typedef clGetKernelSubGroupInfoKHR_fn PFN_clGetKernelSubGroupInfoKHR; + static PFN_clGetKernelSubGroupInfoKHR pfn_clGetKernelSubGroupInfoKHR = NULL; + CL_HPP_INIT_CL_EXT_FCN_PTR_(clGetKernelSubGroupInfoKHR); + + return detail::errHandler( + pfn_clGetKernelSubGroupInfoKHR(object_, dev(), name, range.size(), range.get(), sizeof(size_type), param, nullptr), + __GET_KERNEL_ARG_INFO_ERR); + } + + template + size_type getSubGroupInfo(const cl::Device &dev, const cl::NDRange &range, cl_int* err = NULL) const + { + size_type param; + cl_int result = getSubGroupInfo(dev, name, range, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } +#endif // #if defined(CL_HPP_USE_CL_SUB_GROUPS_KHR) +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + /*! \brief setArg overload taking a shared_ptr type + */ + template + cl_int setArg(cl_uint index, const cl::pointer &argPtr) + { + return detail::errHandler( + ::clSetKernelArgSVMPointer(object_, index, argPtr.get()), + __SET_KERNEL_ARGS_ERR); + } + + /*! \brief setArg overload taking a vector type. + */ + template + cl_int setArg(cl_uint index, const cl::vector &argPtr) + { + return detail::errHandler( + ::clSetKernelArgSVMPointer(object_, index, argPtr.data()), + __SET_KERNEL_ARGS_ERR); + } + + /*! \brief setArg overload taking a pointer type + */ + template + typename std::enable_if::value, cl_int>::type + setArg(cl_uint index, const T argPtr) + { + return detail::errHandler( + ::clSetKernelArgSVMPointer(object_, index, argPtr), + __SET_KERNEL_ARGS_ERR); + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + + /*! \brief setArg overload taking a POD type + */ template - cl_int setArg(cl_uint index, const T &value) + typename std::enable_if::value, cl_int>::type + setArg(cl_uint index, const T &value) { return detail::errHandler( ::clSetKernelArg( @@ -4948,12 +5864,109 @@ public: __SET_KERNEL_ARGS_ERR); } - cl_int setArg(cl_uint index, ::size_t size, const void* argPtr) + cl_int setArg(cl_uint index, size_type size, const void* argPtr) { return detail::errHandler( ::clSetKernelArg(object_, index, size, argPtr), __SET_KERNEL_ARGS_ERR); } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + /*! + * Specify a vector of SVM pointers that the kernel may access in + * addition to its arguments. + */ + cl_int setSVMPointers(const vector &pointerList) + { + return detail::errHandler( + ::clSetKernelExecInfo( + object_, + CL_KERNEL_EXEC_INFO_SVM_PTRS, + sizeof(void*)*pointerList.size(), + pointerList.data())); + } + + /*! + * Specify a std::array of SVM pointers that the kernel may access in + * addition to its arguments. + */ + template + cl_int setSVMPointers(const std::array &pointerList) + { + return detail::errHandler( + ::clSetKernelExecInfo( + object_, + CL_KERNEL_EXEC_INFO_SVM_PTRS, + sizeof(void*)*pointerList.size(), + pointerList.data())); + } + + /*! \brief Enable fine-grained system SVM. + * + * \note It is only possible to enable fine-grained system SVM if all devices + * in the context associated with kernel support it. + * + * \param svmEnabled True if fine-grained system SVM is requested. False otherwise. + * \return CL_SUCCESS if the function was executed succesfully. CL_INVALID_OPERATION + * if no devices in the context support fine-grained system SVM. + * + * \see clSetKernelExecInfo + */ + cl_int enableFineGrainedSystemSVM(bool svmEnabled) + { + cl_bool svmEnabled_ = svmEnabled ? CL_TRUE : CL_FALSE; + return detail::errHandler( + ::clSetKernelExecInfo( + object_, + CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM, + sizeof(cl_bool), + &svmEnabled_ + ) + ); + } + + template + void setSVMPointersHelper(std::array &pointerList, const pointer &t0, Ts... ts) + { + pointerList[index] = static_cast(t0.get()); + setSVMPointersHelper(ts...); + } + + template + typename std::enable_if::value, void>::type + setSVMPointersHelper(std::array &pointerList, T0 t0, Ts... ts) + { + pointerList[index] = static_cast(t0); + setSVMPointersHelper(ts...); + } + + template + void setSVMPointersHelper(std::array &pointerList, const pointer &t0) + { + pointerList[index] = static_cast(t0.get()); + } + + template + typename std::enable_if::value, void>::type + setSVMPointersHelper(std::array &pointerList, T0 t0) + { + pointerList[index] = static_cast(t0); + } + + template + cl_int setSVMPointers(const T0 &t0, Ts... ts) + { + std::array pointerList; + + setSVMPointersHelper<0, 1 + sizeof...(Ts)>(pointerList, t0, ts...); + return detail::errHandler( + ::clSetKernelExecInfo( + object_, + CL_KERNEL_EXEC_INFO_SVM_PTRS, + sizeof(void*)*(1 + sizeof...(Ts)), + pointerList.data())); + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 }; /*! \class Program @@ -4962,18 +5975,23 @@ public: class Program : public detail::Wrapper { public: - typedef VECTOR_CLASS > Binaries; - typedef VECTOR_CLASS > Sources; - +#if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + typedef vector> Binaries; + typedef vector Sources; +#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + typedef vector > Binaries; + typedef vector > Sources; +#endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + Program( - const STRING_CLASS& source, + const string& source, bool build = false, cl_int* err = NULL) { cl_int error; const char * strings = source.c_str(); - const ::size_t length = source.size(); + const size_type length = source.size(); Context context = Context::getDefault(err); @@ -4988,11 +6006,15 @@ public: object_, 0, NULL, +#if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + "-cl-std=CL2.0", +#else "", +#endif // #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) NULL, NULL); - detail::errHandler(error, __BUILD_PROGRAM_ERR); + detail::buildErrHandler(error, __BUILD_PROGRAM_ERR, getBuildInfo()); } if (err != NULL) { @@ -5002,14 +6024,14 @@ public: Program( const Context& context, - const STRING_CLASS& source, + const string& source, bool build = false, cl_int* err = NULL) { cl_int error; const char * strings = source.c_str(); - const ::size_t length = source.size(); + const size_type length = source.size(); object_ = ::clCreateProgramWithSource( context(), (cl_uint)1, &strings, &length, &error); @@ -5017,23 +6039,65 @@ public: detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); if (error == CL_SUCCESS && build) { - error = ::clBuildProgram( object_, 0, NULL, +#if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + "-cl-std=CL2.0", +#else "", +#endif // #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) NULL, NULL); + + detail::buildErrHandler(error, __BUILD_PROGRAM_ERR, getBuildInfo()); + } + + if (err != NULL) { + *err = error; + } + } + + /** + * Create a program from a vector of source strings and the default context. + * Does not compile or link the program. + */ + Program( + const Sources& sources, + cl_int* err = NULL) + { + cl_int error; + Context context = Context::getDefault(err); - detail::errHandler(error, __BUILD_PROGRAM_ERR); + const size_type n = (size_type)sources.size(); + + vector lengths(n); + vector strings(n); + + for (size_type i = 0; i < n; ++i) { +#if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + strings[i] = sources[(int)i].data(); + lengths[i] = sources[(int)i].length(); +#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + strings[i] = sources[(int)i].first; + lengths[i] = sources[(int)i].second; +#endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) } + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)n, strings.data(), lengths.data(), &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); if (err != NULL) { *err = error; } } + /** + * Create a program from a vector of source strings and a provided context. + * Does not compile or link the program. + */ Program( const Context& context, const Sources& sources, @@ -5041,17 +6105,23 @@ public: { cl_int error; - const ::size_t n = (::size_t)sources.size(); - ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t)); - const char** strings = (const char**) alloca(n * sizeof(const char*)); + const size_type n = (size_type)sources.size(); + + vector lengths(n); + vector strings(n); - for (::size_t i = 0; i < n; ++i) { + for (size_type i = 0; i < n; ++i) { +#if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + strings[i] = sources[(int)i].data(); + lengths[i] = sources[(int)i].length(); +#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) strings[i] = sources[(int)i].first; lengths[i] = sources[(int)i].second; +#endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) } object_ = ::clCreateProgramWithSource( - context(), (cl_uint)n, strings, lengths, &error); + context(), (cl_uint)n, strings.data(), lengths.data(), &error); detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); if (err != NULL) { @@ -5080,14 +6150,14 @@ public: */ Program( const Context& context, - const VECTOR_CLASS& devices, + const vector& devices, const Binaries& binaries, - VECTOR_CLASS* binaryStatus = NULL, + vector* binaryStatus = NULL, cl_int* err = NULL) { cl_int error; - const ::size_t numDevices = devices.size(); + const size_type numDevices = devices.size(); // Catch size mismatch early and return if(binaries.size() != numDevices) { @@ -5099,16 +6169,23 @@ public: return; } - ::size_t* lengths = (::size_t*) alloca(numDevices * sizeof(::size_t)); - const unsigned char** images = (const unsigned char**) alloca(numDevices * sizeof(const unsigned char**)); - for (::size_t i = 0; i < numDevices; ++i) { + vector lengths(numDevices); + vector images(numDevices); +#if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + for (size_type i = 0; i < numDevices; ++i) { + images[i] = binaries[i].data(); + lengths[i] = binaries[(int)i].size(); + } +#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + for (size_type i = 0; i < numDevices; ++i) { images[i] = (const unsigned char*)binaries[i].first; lengths[i] = binaries[(int)i].second; } - - cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); - for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { +#endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + + vector deviceIDs(numDevices); + for( size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { deviceIDs[deviceIndex] = (devices[deviceIndex])(); } @@ -5118,8 +6195,8 @@ public: object_ = ::clCreateProgramWithBinary( context(), (cl_uint) devices.size(), - deviceIDs, - lengths, images, (binaryStatus != NULL && numDevices > 0) + deviceIDs.data(), + lengths.data(), images.data(), (binaryStatus != NULL && numDevices > 0) ? &binaryStatus->front() : NULL, &error); @@ -5130,30 +6207,30 @@ public: } -#if defined(CL_VERSION_1_2) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 /** * Create program using builtin kernels. * \param kernelNames Semi-colon separated list of builtin kernel names */ Program( const Context& context, - const VECTOR_CLASS& devices, - const STRING_CLASS& kernelNames, + const vector& devices, + const string& kernelNames, cl_int* err = NULL) { cl_int error; - ::size_t numDevices = devices.size(); - cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); - for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + size_type numDevices = devices.size(); + vector deviceIDs(numDevices); + for( size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { deviceIDs[deviceIndex] = (devices[deviceIndex])(); } object_ = ::clCreateProgramWithBuiltInKernels( context(), (cl_uint) devices.size(), - deviceIDs, + deviceIDs.data(), kernelNames.c_str(), &error); @@ -5162,11 +6239,19 @@ public: *err = error; } } -#endif // #if defined(CL_VERSION_1_2) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 Program() { } + - __CL_EXPLICIT_CONSTRUCTORS Program(const cl_program& program) : detail::Wrapper(program) { } + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + */ + explicit Program(const cl_program& program, bool retainObject = false) : + detail::Wrapper(program, retainObject) { } Program& operator = (const cl_program& rhs) { @@ -5188,11 +6273,10 @@ public: return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) /*! \brief Move constructor to forward move to the superclass correctly. * Required for MSVC. */ - Program(Program&& program) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(program)) {} + Program(Program&& program) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(program)) {} /*! \brief Move assignment to forward move to the superclass correctly. * Required for MSVC. @@ -5202,30 +6286,30 @@ public: detail::Wrapper::operator=(std::move(program)); return *this; } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) cl_int build( - const VECTOR_CLASS& devices, + const vector& devices, const char* options = NULL, void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, void* data = NULL) const { - ::size_t numDevices = devices.size(); - cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id)); - for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { + size_type numDevices = devices.size(); + vector deviceIDs(numDevices); + + for( size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { deviceIDs[deviceIndex] = (devices[deviceIndex])(); } - return detail::errHandler( - ::clBuildProgram( - object_, - (cl_uint) - devices.size(), - deviceIDs, - options, - notifyFptr, - data), - __BUILD_PROGRAM_ERR); + cl_int buildError = ::clBuildProgram( + object_, + (cl_uint) + devices.size(), + deviceIDs.data(), + options, + notifyFptr, + data); + + return detail::buildErrHandler(buildError, __BUILD_PROGRAM_ERR, getBuildInfo()); } cl_int build( @@ -5233,37 +6317,37 @@ public: void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, void* data = NULL) const { - return detail::errHandler( - ::clBuildProgram( - object_, - 0, - NULL, - options, - notifyFptr, - data), - __BUILD_PROGRAM_ERR); + cl_int buildError = ::clBuildProgram( + object_, + 0, + NULL, + options, + notifyFptr, + data); + + + return detail::buildErrHandler(buildError, __BUILD_PROGRAM_ERR, getBuildInfo()); } -#if defined(CL_VERSION_1_2) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 cl_int compile( const char* options = NULL, void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, void* data = NULL) const { - return detail::errHandler( - ::clCompileProgram( - object_, - 0, - NULL, - options, - 0, - NULL, - NULL, - notifyFptr, - data), - __COMPILE_PROGRAM_ERR); + cl_int error = ::clCompileProgram( + object_, + 0, + NULL, + options, + 0, + NULL, + NULL, + notifyFptr, + data); + return detail::buildErrHandler(error, __COMPILE_PROGRAM_ERR, getBuildInfo()); } -#endif +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 template cl_int getInfo(cl_program_info name, T* param) const @@ -5308,8 +6392,52 @@ public: } return param; } + + /** + * Build info function that returns a vector of device/info pairs for the specified + * info type and for all devices in the program. + * On an error reading the info for any device, an empty vector of info will be returned. + */ + template + vector::param_type>> + getBuildInfo(cl_int *err = NULL) const + { + cl_int result = CL_SUCCESS; + + auto devs = getInfo(&result); + vector::param_type>> + devInfo; + + // If there was an initial error from getInfo return the error + if (result != CL_SUCCESS) { + if (err != NULL) { + *err = result; + } + return devInfo; + } + + for (cl::Device d : devs) { + typename detail::param_traits< + detail::cl_program_build_info, name>::param_type param; + result = getBuildInfo(d, name, ¶m); + devInfo.push_back( + std::pair::param_type> + (d, param)); + if (result != CL_SUCCESS) { + // On error, leave the loop and return the error code + break; + } + } + if (err != NULL) { + *err = result; + } + if (result != CL_SUCCESS) { + devInfo.clear(); + } + return devInfo; + } - cl_int createKernels(VECTOR_CLASS* kernels) + cl_int createKernels(vector* kernels) { cl_uint numKernels; cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels); @@ -5317,19 +6445,30 @@ public: return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); } - Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel)); + vector value(numKernels); + err = ::clCreateKernelsInProgram( - object_, numKernels, (cl_kernel*) value, NULL); + object_, numKernels, value.data(), NULL); if (err != CL_SUCCESS) { return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); } - kernels->assign(&value[0], &value[numKernels]); + if (kernels) { + kernels->resize(value.size()); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < value.size(); i++) { + // We do not need to retain because this kernel is being created + // by the runtime + (*kernels)[i] = Kernel(value[i], false); + } + } return CL_SUCCESS; } }; -#if defined(CL_VERSION_1_2) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 inline Program linkProgram( Program input1, Program input2, @@ -5367,7 +6506,7 @@ inline Program linkProgram( } inline Program linkProgram( - VECTOR_CLASS inputPrograms, + vector inputPrograms, const char* options = NULL, void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL, void* data = NULL, @@ -5375,14 +6514,12 @@ inline Program linkProgram( { cl_int error_local = CL_SUCCESS; - cl_program * programs = (cl_program*) alloca(inputPrograms.size() * sizeof(cl_program)); - - if (programs != NULL) { - for (unsigned int i = 0; i < inputPrograms.size(); i++) { - programs[i] = inputPrograms[i](); - } - } + vector programs(inputPrograms.size()); + for (unsigned int i = 0; i < inputPrograms.size(); i++) { + programs[i] = inputPrograms[i](); + } + Context ctx; if(inputPrograms.size() > 0) { ctx = inputPrograms[0].getInfo(&error_local); @@ -5396,7 +6533,7 @@ inline Program linkProgram( NULL, options, (cl_uint)inputPrograms.size(), - programs, + programs.data(), notifyFptr, data, &error_local); @@ -5406,28 +6543,48 @@ inline Program linkProgram( *err = error_local; } - return Program(prog); + return Program(prog, false); } -#endif +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -template<> -inline VECTOR_CLASS cl::Program::getInfo(cl_int* err) const +// Template specialization for CL_PROGRAM_BINARIES +template <> +inline cl_int cl::Program::getInfo(cl_program_info name, vector>* param) const { - VECTOR_CLASS< ::size_t> sizes = getInfo(); - VECTOR_CLASS binaries; - for (VECTOR_CLASS< ::size_t>::iterator s = sizes.begin(); s != sizes.end(); ++s) - { - char *ptr = NULL; - if (*s != 0) - ptr = new char[*s]; - binaries.push_back(ptr); + if (name != CL_PROGRAM_BINARIES) { + return CL_INVALID_VALUE; } - - cl_int result = getInfo(CL_PROGRAM_BINARIES, &binaries); + if (param) { + // Resize the parameter array appropriately for each allocation + // and pass down to the helper + + vector sizes = getInfo(); + size_type numBinaries = sizes.size(); + + // Resize the parameter array and constituent arrays + param->resize(numBinaries); + for (size_type i = 0; i < numBinaries; ++i) { + (*param)[i].resize(sizes[i]); + } + + return detail::errHandler( + detail::getInfo(&::clGetProgramInfo, object_, name, param), + __GET_PROGRAM_INFO_ERR); + } + + return CL_SUCCESS; +} + +template<> +inline vector> cl::Program::getInfo(cl_int* err) const +{ + vector> binariesVectors; + + cl_int result = getInfo(CL_PROGRAM_BINARIES, &binariesVectors); if (err != NULL) { *err = result; } - return binaries; + return binariesVectors; } inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) @@ -5443,37 +6600,120 @@ inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) } +enum class QueueProperties : cl_command_queue_properties +{ + None = 0, + Profiling = CL_QUEUE_PROFILING_ENABLE, + OutOfOrder = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, +}; + +inline QueueProperties operator|(QueueProperties lhs, QueueProperties rhs) +{ + return static_cast(static_cast(lhs) | static_cast(rhs)); +} + /*! \class CommandQueue * \brief CommandQueue interface for cl_command_queue. */ class CommandQueue : public detail::Wrapper { private: -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED - static std::atomic default_initialized_; -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED - static volatile int default_initialized_; -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED + static std::once_flag default_initialized_; static CommandQueue default_; - static volatile cl_int default_error_; -public: - CommandQueue( - cl_command_queue_properties properties, - cl_int* err = NULL) - { - cl_int error; + static cl_int default_error_; - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_CONTEXT_ERR); + /*! \brief Create the default command queue returned by @ref getDefault. + * + * It sets default_error_ to indicate success or failure. It does not throw + * @c cl::Error. + */ + static void makeDefault() + { + /* We don't want to throw an error from this function, so we have to + * catch and set the error flag. + */ +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + try +#endif + { + int error; + Context context = Context::getDefault(&error); - if (error != CL_SUCCESS) { - if (err != NULL) { - *err = error; + if (error != CL_SUCCESS) { + default_error_ = error; + } + else { + Device device = Device::getDefault(); + default_ = CommandQueue(context, device, 0, &default_error_); + } + } +#if defined(CL_HPP_ENABLE_EXCEPTIONS) + catch (cl::Error &e) { + default_error_ = e.err(); + } +#endif + } + + /*! \brief Create the default command queue. + * + * This sets @c default_. It does not throw + * @c cl::Error. + */ + static void makeDefaultProvided(const CommandQueue &c) { + default_ = c; + } + +public: +#ifdef CL_HPP_UNIT_TEST_ENABLE + /*! \brief Reset the default. + * + * This sets @c default_ to an empty value to support cleanup in + * the unit test framework. + * This function is not thread safe. + */ + static void unitTestClearDefault() { + default_ = CommandQueue(); + } +#endif // #ifdef CL_HPP_UNIT_TEST_ENABLE + + + /*! + * \brief Constructs a CommandQueue based on passed properties. + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + CommandQueue( + cl_command_queue_properties properties, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; } } else { Device device = context.getInfo()[0]; +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, properties, 0 }; + if ((properties & CL_QUEUE_ON_DEVICE) == 0) { + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + } + else { + error = CL_INVALID_QUEUE_PROPERTIES; + } + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } +#else object_ = ::clCreateCommandQueue( context(), device(), properties, &error); @@ -5481,18 +6721,66 @@ public: if (err != NULL) { *err = error; } +#endif } } - /*! - * \brief Constructs a CommandQueue for an implementation defined device in the given context + + /*! + * \brief Constructs a CommandQueue based on passed properties. + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. */ + CommandQueue( + QueueProperties properties, + cl_int* err = NULL) + { + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) { + if (err != NULL) { + *err = error; + } + } + else { + Device device = context.getInfo()[0]; + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, static_cast(properties), 0 }; + + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } +#else + object_ = ::clCreateCommandQueue( + context(), device(), static_cast(properties), &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; + } +#endif + } + } + + /*! + * \brief Constructs a CommandQueue for an implementation defined device in the given context + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ explicit CommandQueue( const Context& context, cl_command_queue_properties properties = 0, cl_int* err = NULL) { cl_int error; - VECTOR_CLASS devices; + vector devices; error = context.getInfo(CL_CONTEXT_DEVICES, &devices); detail::errHandler(error, __CREATE_CONTEXT_ERR); @@ -5505,126 +6793,183 @@ public: return; } - object_ = ::clCreateCommandQueue(context(), devices[0](), properties, &error); +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, properties, 0 }; + if ((properties & CL_QUEUE_ON_DEVICE) == 0) { + object_ = ::clCreateCommandQueueWithProperties( + context(), devices[0](), queue_properties, &error); + } + else { + error = CL_INVALID_QUEUE_PROPERTIES; + } - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } +#else + object_ = ::clCreateCommandQueue( + context(), devices[0](), properties, &error); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); if (err != NULL) { *err = error; } +#endif } - CommandQueue( + /*! + * \brief Constructs a CommandQueue for an implementation defined device in the given context + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + explicit CommandQueue( const Context& context, - const Device& device, - cl_command_queue_properties properties = 0, + QueueProperties properties, cl_int* err = NULL) { cl_int error; + vector devices; + error = context.getInfo(CL_CONTEXT_DEVICES, &devices); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) + { + if (err != NULL) { + *err = error; + } + return; + } + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, static_cast(properties), 0 }; + object_ = ::clCreateCommandQueueWithProperties( + context(), devices[0](), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } +#else object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); + context(), devices[0](), static_cast(properties), &error); detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); if (err != NULL) { *err = error; } - } - - /*! \brief Copy constructor to forward copy to the superclass correctly. - * Required for MSVC. - */ - CommandQueue(const CommandQueue& queue) : detail::Wrapper(queue) {} +#endif - /*! \brief Copy assignment to forward copy to the superclass correctly. - * Required for MSVC. - */ - CommandQueue& operator = (const CommandQueue &queue) - { - detail::Wrapper::operator=(queue); - return *this; } -#if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) - /*! \brief Move constructor to forward move to the superclass correctly. - * Required for MSVC. - */ - CommandQueue(CommandQueue&& queue) CL_HPP_NOEXCEPT : detail::Wrapper(std::move(queue)) {} - - /*! \brief Move assignment to forward move to the superclass correctly. - * Required for MSVC. + /*! + * \brief Constructs a CommandQueue for a passed device and context + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. */ - CommandQueue& operator = (CommandQueue &&queue) + CommandQueue( + const Context& context, + const Device& device, + cl_command_queue_properties properties = 0, + cl_int* err = NULL) { - detail::Wrapper::operator=(std::move(queue)); - return *this; - } -#endif // #if defined(CL_HPP_RVALUE_REFERENCES_SUPPORTED) + cl_int error; - static CommandQueue getDefault(cl_int * err = NULL) - { - int state = detail::compare_exchange( - &default_initialized_, - __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED); +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, properties, 0 }; + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); - if (state & __DEFAULT_INITIALIZED) { - if (err != NULL) { - *err = default_error_; - } - return default_; + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; } +#else + object_ = ::clCreateCommandQueue( + context(), device(), properties, &error); - if (state & __DEFAULT_BEING_INITIALIZED) { - // Assume writes will propagate eventually... - while(default_initialized_ != __DEFAULT_INITIALIZED) { - detail::fence(); - } - - if (err != NULL) { - *err = default_error_; - } - return default_; + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != NULL) { + *err = error; } +#endif + } - cl_int error; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + /*! + * \brief Constructs a CommandQueue for a passed device and context + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + CommandQueue( + const Context& context, + const Device& device, + QueueProperties properties, + cl_int* err = NULL) + { + cl_int error; - if (error != CL_SUCCESS) { +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, static_cast(properties), 0 }; + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); if (err != NULL) { *err = error; } - } - else { - Device device = context.getInfo()[0]; - - default_ = CommandQueue(context, device, 0, &error); +#else + object_ = ::clCreateCommandQueue( + context(), device(), static_cast(properties), &error); detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); if (err != NULL) { *err = error; } +#endif } - detail::fence(); - - default_error_ = error; - // Assume writes will propagate eventually... - default_initialized_ = __DEFAULT_INITIALIZED; - - detail::fence(); - + static CommandQueue getDefault(cl_int * err = NULL) + { + std::call_once(default_initialized_, makeDefault); +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + detail::errHandler(default_error_, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); +#else // CL_HPP_TARGET_OPENCL_VERSION >= 200 + detail::errHandler(default_error_, __CREATE_COMMAND_QUEUE_ERR); +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 if (err != NULL) { *err = default_error_; } return default_; + } + /** + * Modify the default command queue to be used by + * subsequent operations. + * Will only set the default if no default was previously created. + * @return updated default command queue. + * Should be compared to the passed value to ensure that it was updated. + */ + static CommandQueue setDefault(const CommandQueue &default_queue) + { + std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_queue)); + detail::errHandler(default_error_); + return default_; } CommandQueue() { } - __CL_EXPLICIT_CONSTRUCTORS CommandQueue(const cl_command_queue& commandQueue) : detail::Wrapper(commandQueue) { } + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + */ + explicit CommandQueue(const cl_command_queue& commandQueue, bool retainObject = false) : + detail::Wrapper(commandQueue, retainObject) { } CommandQueue& operator = (const cl_command_queue& rhs) { @@ -5632,6 +6977,34 @@ public: return *this; } + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + CommandQueue(const CommandQueue& queue) : detail::Wrapper(queue) {} + + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + CommandQueue& operator = (const CommandQueue &queue) + { + detail::Wrapper::operator=(queue); + return *this; + } + + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + CommandQueue(CommandQueue&& queue) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(queue)) {} + + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + CommandQueue& operator = (CommandQueue &&queue) + { + detail::Wrapper::operator=(std::move(queue)); + return *this; + } + template cl_int getInfo(cl_command_queue_info name, T* param) const { @@ -5657,10 +7030,10 @@ public: cl_int enqueueReadBuffer( const Buffer& buffer, cl_bool blocking, - ::size_t offset, - ::size_t size, + size_type offset, + size_type size, void* ptr, - const VECTOR_CLASS* events = NULL, + const vector* events = NULL, Event* event = NULL) const { cl_event tmp; @@ -5682,10 +7055,10 @@ public: cl_int enqueueWriteBuffer( const Buffer& buffer, cl_bool blocking, - ::size_t offset, - ::size_t size, + size_type offset, + size_type size, const void* ptr, - const VECTOR_CLASS* events = NULL, + const vector* events = NULL, Event* event = NULL) const { cl_event tmp; @@ -5707,10 +7080,10 @@ public: cl_int enqueueCopyBuffer( const Buffer& src, const Buffer& dst, - ::size_t src_offset, - ::size_t dst_offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, + size_type src_offset, + size_type dst_offset, + size_type size, + const vector* events = NULL, Event* event = NULL) const { cl_event tmp; @@ -5731,15 +7104,15 @@ public: cl_int enqueueReadBufferRect( const Buffer& buffer, cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, + const array& buffer_offset, + const array& host_offset, + const array& region, + size_type buffer_row_pitch, + size_type buffer_slice_pitch, + size_type host_row_pitch, + size_type host_slice_pitch, void *ptr, - const VECTOR_CLASS* events = NULL, + const vector* events = NULL, Event* event = NULL) const { cl_event tmp; @@ -5747,10 +7120,10 @@ public: ::clEnqueueReadBufferRect( object_, buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, + blocking, + buffer_offset.data(), + host_offset.data(), + region.data(), buffer_row_pitch, buffer_slice_pitch, host_row_pitch, @@ -5770,15 +7143,15 @@ public: cl_int enqueueWriteBufferRect( const Buffer& buffer, cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, + const array& buffer_offset, + const array& host_offset, + const array& region, + size_type buffer_row_pitch, + size_type buffer_slice_pitch, + size_type host_row_pitch, + size_type host_slice_pitch, + const void *ptr, + const vector* events = NULL, Event* event = NULL) const { cl_event tmp; @@ -5786,10 +7159,10 @@ public: ::clEnqueueWriteBufferRect( object_, buffer(), - blocking, - (const ::size_t *)buffer_offset, - (const ::size_t *)host_offset, - (const ::size_t *)region, + blocking, + buffer_offset.data(), + host_offset.data(), + region.data(), buffer_row_pitch, buffer_slice_pitch, host_row_pitch, @@ -5809,14 +7182,14 @@ public: cl_int enqueueCopyBufferRect( const Buffer& src, const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - ::size_t src_row_pitch, - ::size_t src_slice_pitch, - ::size_t dst_row_pitch, - ::size_t dst_slice_pitch, - const VECTOR_CLASS* events = NULL, + const array& src_origin, + const array& dst_origin, + const array& region, + size_type src_row_pitch, + size_type src_slice_pitch, + size_type dst_row_pitch, + size_type dst_slice_pitch, + const vector* events = NULL, Event* event = NULL) const { cl_event tmp; @@ -5825,9 +7198,9 @@ public: object_, src(), dst(), - (const ::size_t *)src_origin, - (const ::size_t *)dst_origin, - (const ::size_t *)region, + src_origin.data(), + dst_origin.data(), + region.data(), src_row_pitch, src_slice_pitch, dst_row_pitch, @@ -5843,20 +7216,25 @@ public: return err; } -#if defined(CL_VERSION_1_2) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 /** * Enqueue a command to fill a buffer object with a pattern - * of a given size. The pattern is specified a as vector. + * of a given size. The pattern is specified as a vector type. * \tparam PatternType The datatype of the pattern field. * The pattern type must be an accepted OpenCL data type. + * \tparam offset Is the offset in bytes into the buffer at + * which to start filling. This must be a multiple of + * the pattern size. + * \tparam size Is the size in bytes of the region to fill. + * This must be a multiple of the pattern size. */ template cl_int enqueueFillBuffer( const Buffer& buffer, PatternType pattern, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, + size_type offset, + size_type size, + const vector* events = NULL, Event* event = NULL) const { cl_event tmp; @@ -5878,24 +7256,30 @@ public: return err; } -#endif // #if defined(CL_VERSION_1_2) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 cl_int enqueueReadImage( const Image& image, cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, + const array& origin, + const array& region, + size_type row_pitch, + size_type slice_pitch, void* ptr, - const VECTOR_CLASS* events = NULL, + const vector* events = NULL, Event* event = NULL) const { cl_event tmp; cl_int err = detail::errHandler( ::clEnqueueReadImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, + object_, + image(), + blocking, + origin.data(), + region.data(), + row_pitch, + slice_pitch, + ptr, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (event != NULL) ? &tmp : NULL), @@ -5910,19 +7294,25 @@ public: cl_int enqueueWriteImage( const Image& image, cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, + const array& origin, + const array& region, + size_type row_pitch, + size_type slice_pitch, + const void* ptr, + const vector* events = NULL, Event* event = NULL) const { cl_event tmp; cl_int err = detail::errHandler( ::clEnqueueWriteImage( - object_, image(), blocking, (const ::size_t *) origin, - (const ::size_t *) region, row_pitch, slice_pitch, ptr, + object_, + image(), + blocking, + origin.data(), + region.data(), + row_pitch, + slice_pitch, + ptr, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (event != NULL) ? &tmp : NULL), @@ -5937,17 +7327,21 @@ public: cl_int enqueueCopyImage( const Image& src, const Image& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, + const array& src_origin, + const array& dst_origin, + const array& region, + const vector* events = NULL, Event* event = NULL) const { cl_event tmp; cl_int err = detail::errHandler( ::clEnqueueCopyImage( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *)dst_origin, (const ::size_t *) region, + object_, + src(), + dst(), + src_origin.data(), + dst_origin.data(), + region.data(), (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (event != NULL) ? &tmp : NULL), @@ -5959,7 +7353,7 @@ public: return err; } -#if defined(CL_VERSION_1_2) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 /** * Enqueue a command to fill an image object with a specified color. * \param fillColor is the color to use to fill the image. @@ -5970,9 +7364,9 @@ public: cl_int enqueueFillImage( const Image& image, cl_float4 fillColor, - const size_t<3>& origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, + const array& origin, + const array& region, + const vector* events = NULL, Event* event = NULL) const { cl_event tmp; @@ -5981,8 +7375,8 @@ public: object_, image(), static_cast(&fillColor), - (const ::size_t *) origin, - (const ::size_t *) region, + origin.data(), + region.data(), (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (event != NULL) ? &tmp : NULL), @@ -6004,9 +7398,9 @@ public: cl_int enqueueFillImage( const Image& image, cl_int4 fillColor, - const size_t<3>& origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, + const array& origin, + const array& region, + const vector* events = NULL, Event* event = NULL) const { cl_event tmp; @@ -6015,8 +7409,8 @@ public: object_, image(), static_cast(&fillColor), - (const ::size_t *) origin, - (const ::size_t *) region, + origin.data(), + region.data(), (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (event != NULL) ? &tmp : NULL), @@ -6038,9 +7432,9 @@ public: cl_int enqueueFillImage( const Image& image, cl_uint4 fillColor, - const size_t<3>& origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, + const array& origin, + const array& region, + const vector* events = NULL, Event* event = NULL) const { cl_event tmp; @@ -6049,8 +7443,8 @@ public: object_, image(), static_cast(&fillColor), - (const ::size_t *) origin, - (const ::size_t *) region, + origin.data(), + region.data(), (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (event != NULL) ? &tmp : NULL), @@ -6061,22 +7455,26 @@ public: return err; } -#endif // #if defined(CL_VERSION_1_2) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 cl_int enqueueCopyImageToBuffer( const Image& src, const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& region, - ::size_t dst_offset, - const VECTOR_CLASS* events = NULL, + const array& src_origin, + const array& region, + size_type dst_offset, + const vector* events = NULL, Event* event = NULL) const { cl_event tmp; cl_int err = detail::errHandler( ::clEnqueueCopyImageToBuffer( - object_, src(), dst(), (const ::size_t *) src_origin, - (const ::size_t *) region, dst_offset, + object_, + src(), + dst(), + src_origin.data(), + region.data(), + dst_offset, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (event != NULL) ? &tmp : NULL), @@ -6091,17 +7489,21 @@ public: cl_int enqueueCopyBufferToImage( const Buffer& src, const Image& dst, - ::size_t src_offset, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, + size_type src_offset, + const array& dst_origin, + const array& region, + const vector* events = NULL, Event* event = NULL) const { cl_event tmp; cl_int err = detail::errHandler( ::clEnqueueCopyBufferToImage( - object_, src(), dst(), src_offset, - (const ::size_t *) dst_origin, (const ::size_t *) region, + object_, + src(), + dst(), + src_offset, + dst_origin.data(), + region.data(), (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (event != NULL) ? &tmp : NULL), @@ -6117,9 +7519,9 @@ public: const Buffer& buffer, cl_bool blocking, cl_map_flags flags, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, + size_type offset, + size_type size, + const vector* events = NULL, Event* event = NULL, cl_int* err = NULL) const { @@ -6146,11 +7548,11 @@ public: const Image& buffer, cl_bool blocking, cl_map_flags flags, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t * row_pitch, - ::size_t * slice_pitch, - const VECTOR_CLASS* events = NULL, + const array& origin, + const array& region, + size_type * row_pitch, + size_type * slice_pitch, + const vector* events = NULL, Event* event = NULL, cl_int* err = NULL) const { @@ -6158,7 +7560,8 @@ public: cl_int error; void * result = ::clEnqueueMapImage( object_, buffer(), blocking, flags, - (const ::size_t *) origin, (const ::size_t *) region, + origin.data(), + region.data(), row_pitch, slice_pitch, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, @@ -6174,20 +7577,27 @@ public: return result; } - cl_int enqueueUnmapMemObject( - const Memory& memory, - void* mapped_ptr, - const VECTOR_CLASS* events = NULL, +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + /** + * Enqueues a command that will allow the host to update a region of a coarse-grained SVM buffer. + * This variant takes a raw SVM pointer. + */ + template + cl_int enqueueMapSVM( + T* ptr, + cl_bool blocking, + cl_map_flags flags, + size_type size, + const vector* events = NULL, Event* event = NULL) const { cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueUnmapMemObject( - object_, memory(), mapped_ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + cl_int err = detail::errHandler(::clEnqueueSVMMap( + object_, blocking, flags, static_cast(ptr), size, + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MAP_BUFFER_ERR); if (event != NULL && err == CL_SUCCESS) *event = tmp; @@ -6195,30 +7605,27 @@ public: return err; } -#if defined(CL_VERSION_1_2) + /** - * Enqueues a marker command which waits for either a list of events to complete, - * or all previously enqueued commands to complete. - * - * Enqueues a marker command which waits for either a list of events to complete, - * or if the list is empty it waits for all commands previously enqueued in command_queue - * to complete before it completes. This command returns an event which can be waited on, - * i.e. this event can be waited on to insure that all events either in the event_wait_list - * or all previously enqueued commands, queued before this command to command_queue, - * have completed. + * Enqueues a command that will allow the host to update a region of a coarse-grained SVM buffer. + * This variant takes a cl::pointer instance. */ - cl_int enqueueMarkerWithWaitList( - const VECTOR_CLASS *events = 0, - Event *event = 0) + template + cl_int enqueueMapSVM( + cl::pointer &ptr, + cl_bool blocking, + cl_map_flags flags, + size_type size, + const vector* events = NULL, + Event* event = NULL) const { cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueMarkerWithWaitList( - object_, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_MARKER_WAIT_LIST_ERR); + cl_int err = detail::errHandler(::clEnqueueSVMMap( + object_, blocking, flags, static_cast(ptr.get()), size, + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MAP_BUFFER_ERR); if (event != NULL && err == CL_SUCCESS) *event = tmp; @@ -6227,18 +7634,175 @@ public: } /** - * A synchronization point that enqueues a barrier operation. - * - * Enqueues a barrier command which waits for either a list of events to complete, - * or if the list is empty it waits for all commands previously enqueued in command_queue - * to complete before it completes. This command blocks command execution, that is, any - * following commands enqueued after it do not execute until it completes. This command + * Enqueues a command that will allow the host to update a region of a coarse-grained SVM buffer. + * This variant takes a cl::vector instance. + */ + template + cl_int enqueueMapSVM( + cl::vector &container, + cl_bool blocking, + cl_map_flags flags, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler(::clEnqueueSVMMap( + object_, blocking, flags, static_cast(container.data()), container.size(), + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MAP_BUFFER_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + + cl_int enqueueUnmapMemObject( + const Memory& memory, + void* mapped_ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueUnmapMemObject( + object_, memory(), mapped_ptr, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + /** + * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL runtime. + * This variant takes a raw SVM pointer. + */ + template + cl_int enqueueUnmapSVM( + T* ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMUnmap( + object_, static_cast(ptr), + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL runtime. + * This variant takes a cl::pointer instance. + */ + template + cl_int enqueueUnmapSVM( + cl::pointer &ptr, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMUnmap( + object_, static_cast(ptr.get()), + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL runtime. + * This variant takes a cl::vector instance. + */ + template + cl_int enqueueUnmapSVM( + cl::vector &container, + const vector* events = NULL, + Event* event = NULL) const + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMUnmap( + object_, static_cast(container.data()), + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + /** + * Enqueues a marker command which waits for either a list of events to complete, + * or all previously enqueued commands to complete. + * + * Enqueues a marker command which waits for either a list of events to complete, + * or if the list is empty it waits for all commands previously enqueued in command_queue + * to complete before it completes. This command returns an event which can be waited on, + * i.e. this event can be waited on to insure that all events either in the event_wait_list + * or all previously enqueued commands, queued before this command to command_queue, + * have completed. + */ + cl_int enqueueMarkerWithWaitList( + const vector *events = 0, + Event *event = 0) + { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueMarkerWithWaitList( + object_, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_MARKER_WAIT_LIST_ERR); + + if (event != NULL && err == CL_SUCCESS) + *event = tmp; + + return err; + } + + /** + * A synchronization point that enqueues a barrier operation. + * + * Enqueues a barrier command which waits for either a list of events to complete, + * or if the list is empty it waits for all commands previously enqueued in command_queue + * to complete before it completes. This command blocks command execution, that is, any + * following commands enqueued after it do not execute until it completes. This command * returns an event which can be waited on, i.e. this event can be waited on to insure that * all events either in the event_wait_list or all previously enqueued commands, queued * before this command to command_queue, have completed. */ cl_int enqueueBarrierWithWaitList( - const VECTOR_CLASS *events = 0, + const vector *events = 0, Event *event = 0) { cl_event tmp; @@ -6261,15 +7825,16 @@ public: * should be associated. */ cl_int enqueueMigrateMemObjects( - const VECTOR_CLASS &memObjects, + const vector &memObjects, cl_mem_migration_flags flags, - const VECTOR_CLASS* events = NULL, + const vector* events = NULL, Event* event = NULL ) { cl_event tmp; - cl_mem* localMemObjects = static_cast(alloca(memObjects.size() * sizeof(cl_mem))); + vector localMemObjects(memObjects.size()); + for( int i = 0; i < (int)memObjects.size(); ++i ) { localMemObjects[i] = memObjects[i](); } @@ -6279,7 +7844,7 @@ public: ::clEnqueueMigrateMemObjects( object_, (cl_uint)memObjects.size(), - static_cast(localMemObjects), + localMemObjects.data(), flags, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, @@ -6291,23 +7856,23 @@ public: return err; } -#endif // #if defined(CL_VERSION_1_2) +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 cl_int enqueueNDRangeKernel( const Kernel& kernel, const NDRange& offset, const NDRange& global, const NDRange& local = NullRange, - const VECTOR_CLASS* events = NULL, + const vector* events = NULL, Event* event = NULL) const { cl_event tmp; cl_int err = detail::errHandler( ::clEnqueueNDRangeKernel( object_, kernel(), (cl_uint) global.dimensions(), - offset.dimensions() != 0 ? (const ::size_t*) offset : NULL, - (const ::size_t*) global, - local.dimensions() != 0 ? (const ::size_t*) local : NULL, + offset.dimensions() != 0 ? (const size_type*) offset : NULL, + (const size_type*) global, + local.dimensions() != 0 ? (const size_type*) local : NULL, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, (event != NULL) ? &tmp : NULL), @@ -6319,10 +7884,11 @@ public: return err; } - cl_int enqueueTask( +#if defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) + CL_EXT_PREFIX__VERSION_1_2_DEPRECATED cl_int enqueueTask( const Kernel& kernel, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) const + const vector* events = NULL, + Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED { cl_event tmp; cl_int err = detail::errHandler( @@ -6338,31 +7904,31 @@ public: return err; } +#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) cl_int enqueueNativeKernel( void (CL_CALLBACK *userFptr)(void *), - std::pair args, - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* mem_locs = NULL, - const VECTOR_CLASS* events = NULL, + std::pair args, + const vector* mem_objects = NULL, + const vector* mem_locs = NULL, + const vector* events = NULL, Event* event = NULL) const { - cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) - ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem)) - : NULL; - - if (mems != NULL) { - for (unsigned int i = 0; i < mem_objects->size(); i++) { - mems[i] = ((*mem_objects)[i])(); - } + size_type elements = 0; + if (mem_objects != NULL) { + elements = mem_objects->size(); } - + vector mems(elements); + for (unsigned int i = 0; i < elements; i++) { + mems[i] = ((*mem_objects)[i])(); + } + cl_event tmp; cl_int err = detail::errHandler( ::clEnqueueNativeKernel( object_, userFptr, args.first, args.second, (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0, - mems, + mems.data(), (mem_locs != NULL && mem_locs->size() > 0) ? (const void **) &mem_locs->front() : NULL, (events != NULL) ? (cl_uint) events->size() : 0, (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, @@ -6378,7 +7944,7 @@ public: /** * Deprecated APIs for 1.2 */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int enqueueMarker(Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED { @@ -6396,7 +7962,7 @@ public: } CL_EXT_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueWaitForEvents(const VECTOR_CLASS& events) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED + cl_int enqueueWaitForEvents(const vector& events) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED { return detail::errHandler( ::clEnqueueWaitForEvents( @@ -6405,11 +7971,11 @@ public: events.size() > 0 ? (const cl_event*) &events.front() : NULL), __ENQUEUE_WAIT_FOR_EVENTS_ERR); } -#endif // #if defined(CL_VERSION_1_1) +#endif // defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) cl_int enqueueAcquireGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, + const vector* mem_objects = NULL, + const vector* events = NULL, Event* event = NULL) const { cl_event tmp; @@ -6430,8 +7996,8 @@ public: } cl_int enqueueReleaseGLObjects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, + const vector* mem_objects = NULL, + const vector* events = NULL, Event* event = NULL) const { cl_event tmp; @@ -6451,7 +8017,7 @@ public: return err; } -#if defined (USE_DX_INTEROP) +#if defined (CL_HPP_USE_DX_INTEROP) typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( cl_command_queue command_queue, cl_uint num_objects, const cl_mem* mem_objects, cl_uint num_events_in_wait_list, @@ -6462,19 +8028,19 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( const cl_event* event_wait_list, cl_event* event); cl_int enqueueAcquireD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, + const vector* mem_objects = NULL, + const vector* events = NULL, Event* event = NULL) const { static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL; -#if defined(CL_VERSION_1_2) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 cl_context context = getInfo(); cl::Device device(getInfo()); cl_platform_id platform = device.getInfo(); - __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueAcquireD3D10ObjectsKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueAcquireD3D10ObjectsKHR); #endif -#if defined(CL_VERSION_1_1) - __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR); +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 + CL_HPP_INIT_CL_EXT_FCN_PTR_(clEnqueueAcquireD3D10ObjectsKHR); #endif cl_event tmp; @@ -6495,20 +8061,20 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( } cl_int enqueueReleaseD3D10Objects( - const VECTOR_CLASS* mem_objects = NULL, - const VECTOR_CLASS* events = NULL, + const vector* mem_objects = NULL, + const vector* events = NULL, Event* event = NULL) const { static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL; -#if defined(CL_VERSION_1_2) +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 cl_context context = getInfo(); cl::Device device(getInfo()); cl_platform_id platform = device.getInfo(); - __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueReleaseD3D10ObjectsKHR); -#endif // #if defined(CL_VERSION_1_2) -#if defined(CL_VERSION_1_1) - __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR); -#endif // #if defined(CL_VERSION_1_1) + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueReleaseD3D10ObjectsKHR); +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 + CL_HPP_INIT_CL_EXT_FCN_PTR_(clEnqueueReleaseD3D10ObjectsKHR); +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 cl_event tmp; cl_int err = detail::errHandler( @@ -6531,7 +8097,7 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( /** * Deprecated APIs for 1.2 */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) +#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int enqueueBarrier() const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED { @@ -6539,7 +8105,7 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( ::clEnqueueBarrier(object_), __ENQUEUE_BARRIER_ERR); } -#endif // #if defined(CL_VERSION_1_1) +#endif // CL_USE_DEPRECATED_OPENCL_1_1_APIS cl_int flush() const { @@ -6550,6268 +8116,1374 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( { return detail::errHandler(::clFinish(object_), __FINISH_ERR); } +}; // CommandQueue + +CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag CommandQueue::default_initialized_; +CL_HPP_DEFINE_STATIC_MEMBER_ CommandQueue CommandQueue::default_; +CL_HPP_DEFINE_STATIC_MEMBER_ cl_int CommandQueue::default_error_ = CL_SUCCESS; + + +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +enum class DeviceQueueProperties : cl_command_queue_properties +{ + None = 0, + Profiling = CL_QUEUE_PROFILING_ENABLE, }; -#ifdef _WIN32 -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED -__declspec(selectany) std::atomic CommandQueue::default_initialized_; -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED -__declspec(selectany) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED -__declspec(selectany) CommandQueue CommandQueue::default_; -__declspec(selectany) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS; -#else // !_WIN32 -#ifdef CL_HPP_CPP11_ATOMICS_SUPPORTED -__attribute__((weak)) std::atomic CommandQueue::default_initialized_; -#else // !CL_HPP_CPP11_ATOMICS_SUPPORTED -__attribute__((weak)) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED; -#endif // !CL_HPP_CPP11_ATOMICS_SUPPORTED -__attribute__((weak)) CommandQueue CommandQueue::default_; -__attribute__((weak)) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS; -#endif // !_WIN32 +inline DeviceQueueProperties operator|(DeviceQueueProperties lhs, DeviceQueueProperties rhs) +{ + return static_cast(static_cast(lhs) | static_cast(rhs)); +} -template< typename IteratorType > -Buffer::Buffer( - const Context &context, - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr, - cl_int* err) +/*! \class DeviceCommandQueue + * \brief DeviceCommandQueue interface for device cl_command_queues. + */ +class DeviceCommandQueue : public detail::Wrapper { - typedef typename std::iterator_traits::value_type DataType; - cl_int error; +public: - cl_mem_flags flags = 0; - if( readOnly ) { - flags |= CL_MEM_READ_ONLY; - } - else { - flags |= CL_MEM_READ_WRITE; - } - if( useHostPtr ) { - flags |= CL_MEM_USE_HOST_PTR; - } - - ::size_t size = sizeof(DataType)*(endIterator - startIterator); + /*! + * Trivial empty constructor to create a null queue. + */ + DeviceCommandQueue() { } - if( useHostPtr ) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); - } else { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); - } + /*! + * Default construct device command queue on default context and device + */ + DeviceCommandQueue(DeviceQueueProperties properties, cl_int* err = NULL) + { + cl_int error; + cl::Context context = cl::Context::getDefault(); + cl::Device device = cl::Device::getDefault(); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } + cl_command_queue_properties mergedProperties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | static_cast(properties); - if( !useHostPtr ) { - CommandQueue queue(context, 0, &error); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, mergedProperties, 0 }; + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); - error = cl::copy(queue, startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); if (err != NULL) { *err = error; } } -} - -template< typename IteratorType > -Buffer::Buffer( - const CommandQueue &queue, - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr, - cl_int* err) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - cl_mem_flags flags = 0; - if (readOnly) { - flags |= CL_MEM_READ_ONLY; - } - else { - flags |= CL_MEM_READ_WRITE; - } - if (useHostPtr) { - flags |= CL_MEM_USE_HOST_PTR; - } - ::size_t size = sizeof(DataType)*(endIterator - startIterator); + /*! + * Create a device command queue for a specified device in the passed context. + */ + DeviceCommandQueue( + const Context& context, + const Device& device, + DeviceQueueProperties properties = DeviceQueueProperties::None, + cl_int* err = NULL) + { + cl_int error; - Context context = queue.getInfo(); + cl_command_queue_properties mergedProperties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | static_cast(properties); + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, mergedProperties, 0 }; + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); - if (useHostPtr) { - object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); - } - else { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } } - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != NULL) { - *err = error; - } + /*! + * Create a device command queue for a specified device in the passed context. + */ + DeviceCommandQueue( + const Context& context, + const Device& device, + cl_uint queueSize, + DeviceQueueProperties properties = DeviceQueueProperties::None, + cl_int* err = NULL) + { + cl_int error; - if (!useHostPtr) { - error = cl::copy(queue, startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); + cl_command_queue_properties mergedProperties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | static_cast(properties); + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, mergedProperties, + CL_QUEUE_SIZE, queueSize, + 0 }; + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); if (err != NULL) { *err = error; } } -} -inline cl_int enqueueReadBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); + /*! \brief Constructor from cl_command_queue - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + */ + explicit DeviceCommandQueue(const cl_command_queue& commandQueue, bool retainObject = false) : + detail::Wrapper(commandQueue, retainObject) { } - if (error != CL_SUCCESS) { - return error; + DeviceCommandQueue& operator = (const cl_command_queue& rhs) + { + detail::Wrapper::operator=(rhs); + return *this; } - return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event); -} + /*! \brief Copy constructor to forward copy to the superclass correctly. + * Required for MSVC. + */ + DeviceCommandQueue(const DeviceCommandQueue& queue) : detail::Wrapper(queue) {} -inline cl_int enqueueWriteBuffer( - const Buffer& buffer, - cl_bool blocking, - ::size_t offset, - ::size_t size, - const void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event); -} - -inline void* enqueueMapBuffer( - const Buffer& buffer, - cl_bool blocking, - cl_map_flags flags, - ::size_t offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL, - cl_int* err = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - - void * result = ::clEnqueueMapBuffer( - queue(), buffer(), blocking, flags, offset, size, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != NULL) { - *err = error; - } - return result; -} - -inline cl_int enqueueUnmapMemObject( - const Memory& memory, - void* mapped_ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (error != CL_SUCCESS) { - return error; - } - - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueUnmapMemObject( - queue(), memory(), mapped_ptr, - (events != NULL) ? (cl_uint) events->size() : 0, - (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, - (event != NULL) ? &tmp : NULL), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != NULL && err == CL_SUCCESS) - *event = tmp; - - return err; -} - -inline cl_int enqueueCopyBuffer( - const Buffer& src, - const Buffer& dst, - ::size_t src_offset, - ::size_t dst_offset, - ::size_t size, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Host to Device. - * Uses default command queue. - */ -template< typename IteratorType > -inline cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) - return error; - - return cl::copy(queue, startIterator, endIterator, buffer); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Device to Host. - * Uses default command queue. - */ -template< typename IteratorType > -inline cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) - return error; - - return cl::copy(queue, buffer, startIterator, endIterator); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Host to Device. - * Uses specified queue. - */ -template< typename IteratorType > -inline cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - ::size_t length = endIterator-startIterator; - ::size_t byteLength = length*sizeof(DataType); - - DataType *pointer = - static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_WRITE, 0, byteLength, 0, 0, &error)); - // if exceptions enabled, enqueueMapBuffer will throw - if( error != CL_SUCCESS ) { - return error; - } -#if defined(_MSC_VER) - std::copy( - startIterator, - endIterator, - stdext::checked_array_iterator( - pointer, length)); -#else - std::copy(startIterator, endIterator, pointer); -#endif - Event endEvent; - error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); - // if exceptions enabled, enqueueUnmapMemObject will throw - if( error != CL_SUCCESS ) { - return error; - } - endEvent.wait(); - return CL_SUCCESS; -} - -/** - * Blocking copy operation between iterators and a buffer. - * Device to Host. - * Uses specified queue. - */ -template< typename IteratorType > -inline cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - ::size_t length = endIterator-startIterator; - ::size_t byteLength = length*sizeof(DataType); - - DataType *pointer = - static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, byteLength, 0, 0, &error)); - // if exceptions enabled, enqueueMapBuffer will throw - if( error != CL_SUCCESS ) { - return error; - } - std::copy(pointer, pointer + length, startIterator); - Event endEvent; - error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); - // if exceptions enabled, enqueueUnmapMemObject will throw - if( error != CL_SUCCESS ) { - return error; - } - endEvent.wait(); - return CL_SUCCESS; -} - -#if defined(CL_VERSION_1_1) -inline cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueReadBufferRect( - buffer, - blocking, - buffer_offset, - host_offset, - region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const size_t<3>& buffer_offset, - const size_t<3>& host_offset, - const size_t<3>& region, - ::size_t buffer_row_pitch, - ::size_t buffer_slice_pitch, - ::size_t host_row_pitch, - ::size_t host_slice_pitch, - void *ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteBufferRect( - buffer, - blocking, - buffer_offset, - host_offset, - region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - ::size_t src_row_pitch, - ::size_t src_slice_pitch, - ::size_t dst_row_pitch, - ::size_t dst_slice_pitch, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyBufferRect( - src, - dst, - src_origin, - dst_origin, - region, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - events, - event); -} -#endif - -inline cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueReadImage( - image, - blocking, - origin, - region, - row_pitch, - slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const size_t<3>& origin, - const size_t<3>& region, - ::size_t row_pitch, - ::size_t slice_pitch, - void* ptr, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteImage( - image, - blocking, - origin, - region, - row_pitch, - slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const size_t<3>& src_origin, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyImage( - src, - dst, - src_origin, - dst_origin, - region, - events, - event); -} - -inline cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const size_t<3>& src_origin, - const size_t<3>& region, - ::size_t dst_offset, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; + /*! \brief Copy assignment to forward copy to the superclass correctly. + * Required for MSVC. + */ + DeviceCommandQueue& operator = (const DeviceCommandQueue &queue) + { + detail::Wrapper::operator=(queue); + return *this; } - return queue.enqueueCopyImageToBuffer( - src, - dst, - src_origin, - region, - dst_offset, - events, - event); -} - -inline cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - ::size_t src_offset, - const size_t<3>& dst_origin, - const size_t<3>& region, - const VECTOR_CLASS* events = NULL, - Event* event = NULL) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); + /*! \brief Move constructor to forward move to the superclass correctly. + * Required for MSVC. + */ + DeviceCommandQueue(DeviceCommandQueue&& queue) CL_HPP_NOEXCEPT_ : detail::Wrapper(std::move(queue)) {} - if (error != CL_SUCCESS) { - return error; + /*! \brief Move assignment to forward move to the superclass correctly. + * Required for MSVC. + */ + DeviceCommandQueue& operator = (DeviceCommandQueue &&queue) + { + detail::Wrapper::operator=(std::move(queue)); + return *this; } - return queue.enqueueCopyBufferToImage( - src, - dst, - src_offset, - dst_origin, - region, - events, - event); -} - - -inline cl_int flush(void) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; + template + cl_int getInfo(cl_command_queue_info name, T* param) const + { + return detail::errHandler( + detail::getInfo( + &::clGetCommandQueueInfo, object_, name, param), + __GET_COMMAND_QUEUE_INFO_ERR); } - return queue.flush(); -} - -inline cl_int finish(void) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - - return queue.finish(); -} - -// Kernel Functor support -// New interface as of September 2011 -// Requires the C++11 std::tr1::function (note do not support TR1) -// Visual Studio 2010 and GCC 4.2 - -struct EnqueueArgs -{ - CommandQueue queue_; - const NDRange offset_; - const NDRange global_; - const NDRange local_; - VECTOR_CLASS events_; + template typename + detail::param_traits::param_type + getInfo(cl_int* err = NULL) const + { + typename detail::param_traits< + detail::cl_command_queue_info, name>::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != NULL) { + *err = result; + } + return param; + } - EnqueueArgs(NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange) + /*! + * Create a new default device command queue for the default device, + * in the default context and of the default size. + * If there is already a default queue for the specified device this + * function will return the pre-existing queue. + */ + static DeviceCommandQueue makeDefault( + cl_int *err = nullptr) { + cl_int error; + cl::Context context = cl::Context::getDefault(); + cl::Device device = cl::Device::getDefault(); + + cl_command_queue_properties properties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, properties, + 0 }; + DeviceCommandQueue deviceQueue( + ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error)); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + return deviceQueue; } - EnqueueArgs(NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local) + /*! + * Create a new default device command queue for the specified device + * and of the default size. + * If there is already a default queue for the specified device this + * function will return the pre-existing queue. + */ + static DeviceCommandQueue makeDefault( + const Context &context, const Device &device, cl_int *err = nullptr) { + cl_int error; - } + cl_command_queue_properties properties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, properties, + 0 }; + DeviceCommandQueue deviceQueue( + ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error)); - EnqueueArgs(NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local) - { + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } + return deviceQueue; } - EnqueueArgs(Event e, NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange) + /*! + * Create a new default device command queue for the specified device + * and of the requested size in bytes. + * If there is already a default queue for the specified device this + * function will return the pre-existing queue. + */ + static DeviceCommandQueue makeDefault( + const Context &context, const Device &device, cl_uint queueSize, cl_int *err = nullptr) { - events_.push_back(e); - } + cl_int error; - EnqueueArgs(Event e, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local) - { - events_.push_back(e); - } + cl_command_queue_properties properties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, properties, + CL_QUEUE_SIZE, queueSize, + 0 }; + DeviceCommandQueue deviceQueue( + ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error)); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != NULL) { + *err = error; + } - EnqueueArgs(Event e, NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local) - { - events_.push_back(e); + return deviceQueue; } +}; // DeviceCommandQueue - EnqueueArgs(const VECTOR_CLASS &events, NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange), - events_(events) +namespace detail +{ + // Specialization for device command queue + template <> + struct KernelArgumentHandler { + static size_type size(const cl::DeviceCommandQueue&) { return sizeof(cl_command_queue); } + static const cl_command_queue* ptr(const cl::DeviceCommandQueue& value) { return &(value()); } + }; +} // namespace detail - } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - EnqueueArgs(const VECTOR_CLASS &events, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local), - events_(events) - { +template< typename IteratorType > +Buffer::Buffer( + const Context &context, + IteratorType startIterator, + IteratorType endIterator, + bool readOnly, + bool useHostPtr, + cl_int* err) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if( readOnly ) { + flags |= CL_MEM_READ_ONLY; + } + else { + flags |= CL_MEM_READ_WRITE; } + if( useHostPtr ) { + flags |= CL_MEM_USE_HOST_PTR; + } + + size_type size = sizeof(DataType)*(endIterator - startIterator); - EnqueueArgs(const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local), - events_(events) - { + if( useHostPtr ) { + object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + } else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; } - EnqueueArgs(CommandQueue &queue, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange) - { + if( !useHostPtr ) { + CommandQueue queue(context, 0, &error); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } + error = cl::copy(queue, startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } } +} - EnqueueArgs(CommandQueue &queue, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local) - { +template< typename IteratorType > +Buffer::Buffer( + const CommandQueue &queue, + IteratorType startIterator, + IteratorType endIterator, + bool readOnly, + bool useHostPtr, + cl_int* err) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + cl_mem_flags flags = 0; + if (readOnly) { + flags |= CL_MEM_READ_ONLY; + } + else { + flags |= CL_MEM_READ_WRITE; + } + if (useHostPtr) { + flags |= CL_MEM_USE_HOST_PTR; } - EnqueueArgs(CommandQueue &queue, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local) - { + size_type size = sizeof(DataType)*(endIterator - startIterator); + Context context = queue.getInfo(); + + if (useHostPtr) { + object_ = ::clCreateBuffer(context(), flags, size, static_cast(&*startIterator), &error); + } + else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); } - EnqueueArgs(CommandQueue &queue, Event e, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange) - { - events_.push_back(e); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; } - EnqueueArgs(CommandQueue &queue, Event e, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local) - { - events_.push_back(e); + if (!useHostPtr) { + error = cl::copy(queue, startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != NULL) { + *err = error; + } } +} - EnqueueArgs(CommandQueue &queue, Event e, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local) - { - events_.push_back(e); +inline cl_int enqueueReadBuffer( + const Buffer& buffer, + cl_bool blocking, + size_type offset, + size_type size, + void* ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; } - EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange), - events_(events) - { + return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event); +} + +inline cl_int enqueueWriteBuffer( + const Buffer& buffer, + cl_bool blocking, + size_type offset, + size_type size, + const void* ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return error; } - EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local), - events_(events) - { + return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event); +} +inline void* enqueueMapBuffer( + const Buffer& buffer, + cl_bool blocking, + cl_map_flags flags, + size_type offset, + size_type size, + const vector* events = NULL, + Event* event = NULL, + cl_int* err = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; } - EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS &events, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local), - events_(events) - { + void * result = ::clEnqueueMapBuffer( + queue(), buffer(), blocking, flags, offset, size, + (events != NULL) ? (cl_uint) events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL, + (cl_event*) event, + &error); + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != NULL) { + *err = error; } -}; - -namespace detail { + return result; +} -class NullType {}; -template -struct SetArg +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +/** + * Enqueues to the default queue a command that will allow the host to + * update a region of a coarse-grained SVM buffer. + * This variant takes a raw SVM pointer. + */ +template +inline cl_int enqueueMapSVM( + T* ptr, + cl_bool blocking, + cl_map_flags flags, + size_type size, + const vector* events, + Event* event) { - static void set (Kernel kernel, T0 arg) - { - kernel.setArg(index, arg); + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); } -}; -template -struct SetArg -{ - static void set (Kernel, NullType) - { - } -}; + return queue.enqueueMapSVM( + ptr, blocking, flags, size, events, event); +} -template < - typename T0, typename T1, typename T2, typename T3, - typename T4, typename T5, typename T6, typename T7, - typename T8, typename T9, typename T10, typename T11, - typename T12, typename T13, typename T14, typename T15, - typename T16, typename T17, typename T18, typename T19, - typename T20, typename T21, typename T22, typename T23, - typename T24, typename T25, typename T26, typename T27, - typename T28, typename T29, typename T30, typename T31 -> -class KernelFunctorGlobal +/** + * Enqueues to the default queue a command that will allow the host to + * update a region of a coarse-grained SVM buffer. + * This variant takes a cl::pointer instance. + */ +template +inline cl_int enqueueMapSVM( + cl::pointer ptr, + cl_bool blocking, + cl_map_flags flags, + size_type size, + const vector* events = NULL, + Event* event = NULL) { -private: - Kernel kernel_; - -public: - KernelFunctorGlobal( - Kernel kernel) : - kernel_(kernel) - {} - - KernelFunctorGlobal( - const Program& program, - const STRING_CLASS name, - cl_int * err = NULL) : - kernel_(program, name.c_str(), err) - {} - - Event operator() ( - const EnqueueArgs& args, - T0 t0, - T1 t1 = NullType(), - T2 t2 = NullType(), - T3 t3 = NullType(), - T4 t4 = NullType(), - T5 t5 = NullType(), - T6 t6 = NullType(), - T7 t7 = NullType(), - T8 t8 = NullType(), - T9 t9 = NullType(), - T10 t10 = NullType(), - T11 t11 = NullType(), - T12 t12 = NullType(), - T13 t13 = NullType(), - T14 t14 = NullType(), - T15 t15 = NullType(), - T16 t16 = NullType(), - T17 t17 = NullType(), - T18 t18 = NullType(), - T19 t19 = NullType(), - T20 t20 = NullType(), - T21 t21 = NullType(), - T22 t22 = NullType(), - T23 t23 = NullType(), - T24 t24 = NullType(), - T25 t25 = NullType(), - T26 t26 = NullType(), - T27 t27 = NullType(), - T28 t28 = NullType(), - T29 t29 = NullType(), - T30 t30 = NullType(), - T31 t31 = NullType() - ) - { - Event event; - SetArg<0, T0>::set(kernel_, t0); - SetArg<1, T1>::set(kernel_, t1); - SetArg<2, T2>::set(kernel_, t2); - SetArg<3, T3>::set(kernel_, t3); - SetArg<4, T4>::set(kernel_, t4); - SetArg<5, T5>::set(kernel_, t5); - SetArg<6, T6>::set(kernel_, t6); - SetArg<7, T7>::set(kernel_, t7); - SetArg<8, T8>::set(kernel_, t8); - SetArg<9, T9>::set(kernel_, t9); - SetArg<10, T10>::set(kernel_, t10); - SetArg<11, T11>::set(kernel_, t11); - SetArg<12, T12>::set(kernel_, t12); - SetArg<13, T13>::set(kernel_, t13); - SetArg<14, T14>::set(kernel_, t14); - SetArg<15, T15>::set(kernel_, t15); - SetArg<16, T16>::set(kernel_, t16); - SetArg<17, T17>::set(kernel_, t17); - SetArg<18, T18>::set(kernel_, t18); - SetArg<19, T19>::set(kernel_, t19); - SetArg<20, T20>::set(kernel_, t20); - SetArg<21, T21>::set(kernel_, t21); - SetArg<22, T22>::set(kernel_, t22); - SetArg<23, T23>::set(kernel_, t23); - SetArg<24, T24>::set(kernel_, t24); - SetArg<25, T25>::set(kernel_, t25); - SetArg<26, T26>::set(kernel_, t26); - SetArg<27, T27>::set(kernel_, t27); - SetArg<28, T28>::set(kernel_, t28); - SetArg<29, T29>::set(kernel_, t29); - SetArg<30, T30>::set(kernel_, t30); - SetArg<31, T31>::set(kernel_, t31); - - args.queue_.enqueueNDRangeKernel( - kernel_, - args.offset_, - args.global_, - args.local_, - &args.events_, - &event); - - return event; + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); } -}; + return queue.enqueueMapSVM( + ptr, blocking, flags, size, events, event); +} -//------------------------------------------------------------------------------------------------------ - - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28, - typename T29, - typename T30, - typename T31> -struct functionImplementation_ +/** + * Enqueues to the default queue a command that will allow the host to + * update a region of a coarse-grained SVM buffer. + * This variant takes a cl::vector instance. + */ +template +inline cl_int enqueueMapSVM( + cl::vector container, + cl_bool blocking, + cl_map_flags flags, + const vector* events = NULL, + Event* event = NULL) { - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - T31> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 32)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); } - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - T31); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28, - T29 arg29, - T30 arg30, - T31 arg31) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28, - arg29, - arg30, - arg31); - } - + return queue.enqueueMapSVM( + container, blocking, flags, events, event); +} -}; +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28, - typename T29, - typename T30> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - NullType> +inline cl_int enqueueUnmapMemObject( + const Memory& memory, + void* mapped_ptr, + const vector* events = NULL, + Event* event = NULL) { - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 31)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (error != CL_SUCCESS) { + return error; } - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - T30); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28, - T29 arg29, - T30 arg30) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28, - arg29, - arg30); - } + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueUnmapMemObject( + queue(), memory(), mapped_ptr, + (events != NULL) ? (cl_uint)events->size() : 0, + (events != NULL && events->size() > 0) ? (cl_event*)&events->front() : NULL, + (event != NULL) ? &tmp : NULL), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + if (event != NULL && err == CL_SUCCESS) + *event = tmp; -}; + return err; +} -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28, - typename T29> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - NullType, - NullType> +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +/** + * Enqueues to the default queue a command that will release a coarse-grained + * SVM buffer back to the OpenCL runtime. + * This variant takes a raw SVM pointer. + */ +template +inline cl_int enqueueUnmapSVM( + T* ptr, + const vector* events = NULL, + Event* event = NULL) { - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 30)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_UNMAP_MEM_OBJECT_ERR); } - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - T29); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28, - T29 arg29) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28, - arg29); - } - + return detail::errHandler(queue.enqueueUnmapSVM(ptr, events, event), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); -}; +} -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27, - typename T28> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - NullType, - NullType, - NullType> +/** + * Enqueues to the default queue a command that will release a coarse-grained + * SVM buffer back to the OpenCL runtime. + * This variant takes a cl::pointer instance. + */ +template +inline cl_int enqueueUnmapSVM( + cl::pointer &ptr, + const vector* events = NULL, + Event* event = NULL) { - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 29)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_UNMAP_MEM_OBJECT_ERR); } - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - T28); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27, - T28 arg28) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27, - arg28); - } + return detail::errHandler(queue.enqueueUnmapSVM(ptr, events, event), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); +} +/** + * Enqueues to the default queue a command that will release a coarse-grained + * SVM buffer back to the OpenCL runtime. + * This variant takes a cl::vector instance. + */ +template +inline cl_int enqueueUnmapSVM( + cl::vector &container, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return detail::errHandler(error, __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + } -}; + return detail::errHandler(queue.enqueueUnmapSVM(container, events, event), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); +} + +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26, - typename T27> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - NullType, - NullType, - NullType, - NullType> +inline cl_int enqueueCopyBuffer( + const Buffer& src, + const Buffer& dst, + size_type src_offset, + size_type dst_offset, + size_type size, + const vector* events = NULL, + Event* event = NULL) { - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 28)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { + return error; } - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - T27); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26, - T27 arg27) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26, - arg27); - } + return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Host to Device. + * Uses default command queue. + */ +template< typename IteratorType > +inline cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) + return error; + return cl::copy(queue, startIterator, endIterator, buffer); +} -}; +/** + * Blocking copy operation between iterators and a buffer. + * Device to Host. + * Uses default command queue. + */ +template< typename IteratorType > +inline cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) + return error; -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25, - typename T26> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - NullType, - NullType, - NullType, - NullType, - NullType> + return cl::copy(queue, buffer, startIterator, endIterator); +} + +/** + * Blocking copy operation between iterators and a buffer. + * Host to Device. + * Uses specified queue. + */ +template< typename IteratorType > +inline cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) { - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { + typedef typename std::iterator_traits::value_type DataType; + cl_int error; - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 27)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - + size_type length = endIterator-startIterator; + size_type byteLength = length*sizeof(DataType); + + DataType *pointer = + static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_WRITE, 0, byteLength, 0, 0, &error)); + // if exceptions enabled, enqueueMapBuffer will throw + if( error != CL_SUCCESS ) { + return error; } +#if defined(_MSC_VER) + std::copy( + startIterator, + endIterator, + stdext::checked_array_iterator( + pointer, length)); +#else + std::copy(startIterator, endIterator, pointer); +#endif + Event endEvent; + error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); + // if exceptions enabled, enqueueUnmapMemObject will throw + if( error != CL_SUCCESS ) { + return error; + } + endEvent.wait(); + return CL_SUCCESS; +} + +/** + * Blocking copy operation between iterators and a buffer. + * Device to Host. + * Uses specified queue. + */ +template< typename IteratorType > +inline cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) +{ + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + size_type length = endIterator-startIterator; + size_type byteLength = length*sizeof(DataType); - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - T26); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25, - T26 arg26) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25, - arg26); - } + DataType *pointer = + static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, byteLength, 0, 0, &error)); + // if exceptions enabled, enqueueMapBuffer will throw + if( error != CL_SUCCESS ) { + return error; + } + std::copy(pointer, pointer + length, startIterator); + Event endEvent; + error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); + // if exceptions enabled, enqueueUnmapMemObject will throw + if( error != CL_SUCCESS ) { + return error; + } + endEvent.wait(); + return CL_SUCCESS; +} -}; +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 +/** + * Blocking SVM map operation - performs a blocking map underneath. + */ +template +inline cl_int mapSVM(cl::vector &container) +{ + return enqueueMapSVM(container, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE); +} -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24, - typename T25> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> +/** +* Blocking SVM map operation - performs a blocking map underneath. +*/ +template +inline cl_int unmapSVM(cl::vector &container) { - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 26)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } + return enqueueUnmapSVM(container); +} - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - T25); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24, - T25 arg25) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24, - arg25); - } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 +inline cl_int enqueueReadBufferRect( + const Buffer& buffer, + cl_bool blocking, + const array& buffer_offset, + const array& host_offset, + const array& region, + size_type buffer_row_pitch, + size_type buffer_slice_pitch, + size_type host_row_pitch, + size_type host_slice_pitch, + void *ptr, + const vector* events = NULL, + Event* event = NULL) +{ + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return error; + } -}; + return queue.enqueueReadBufferRect( + buffer, + blocking, + buffer_offset, + host_offset, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); +} -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23, - typename T24> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> +inline cl_int enqueueWriteBufferRect( + const Buffer& buffer, + cl_bool blocking, + const array& buffer_offset, + const array& host_offset, + const array& region, + size_type buffer_row_pitch, + size_type buffer_slice_pitch, + size_type host_row_pitch, + size_type host_slice_pitch, + const void *ptr, + const vector* events = NULL, + Event* event = NULL) { - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 25)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - T24); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23, - T24 arg24) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23, - arg24); - } + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return error; + } -}; + return queue.enqueueWriteBufferRect( + buffer, + blocking, + buffer_offset, + host_offset, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); +} -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22, - typename T23> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> +inline cl_int enqueueCopyBufferRect( + const Buffer& src, + const Buffer& dst, + const array& src_origin, + const array& dst_origin, + const array& region, + size_type src_row_pitch, + size_type src_slice_pitch, + size_type dst_row_pitch, + size_type dst_slice_pitch, + const vector* events = NULL, + Event* event = NULL) { - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 24)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - T23); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22, - T23 arg23) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22, - arg23); - } + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return error; + } -}; + return queue.enqueueCopyBufferRect( + src, + dst, + src_origin, + dst_origin, + region, + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + events, + event); +} +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21, - typename T22> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> +inline cl_int enqueueReadImage( + const Image& image, + cl_bool blocking, + const array& origin, + const array& region, + size_type row_pitch, + size_type slice_pitch, + void* ptr, + const vector* events = NULL, + Event* event = NULL) { - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 23)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - T22); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21, - T22 arg22) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21, - arg22); - } + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return error; + } -}; + return queue.enqueueReadImage( + image, + blocking, + origin, + region, + row_pitch, + slice_pitch, + ptr, + events, + event); +} -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20, - typename T21> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> +inline cl_int enqueueWriteImage( + const Image& image, + cl_bool blocking, + const array& origin, + const array& region, + size_type row_pitch, + size_type slice_pitch, + const void* ptr, + const vector* events = NULL, + Event* event = NULL) { - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 22)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - T21); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20, - T21 arg21) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20, - arg21); - } + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return error; + } -}; + return queue.enqueueWriteImage( + image, + blocking, + origin, + region, + row_pitch, + slice_pitch, + ptr, + events, + event); +} -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19, - typename T20> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> +inline cl_int enqueueCopyImage( + const Image& src, + const Image& dst, + const array& src_origin, + const array& dst_origin, + const array& region, + const vector* events = NULL, + Event* event = NULL) { - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 21)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - T20); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19, - T20 arg20) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19, - arg20); - } + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return error; + } -}; + return queue.enqueueCopyImage( + src, + dst, + src_origin, + dst_origin, + region, + events, + event); +} -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18, - typename T19> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> +inline cl_int enqueueCopyImageToBuffer( + const Image& src, + const Buffer& dst, + const array& src_origin, + const array& region, + size_type dst_offset, + const vector* events = NULL, + Event* event = NULL) { - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 20)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - T19); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18, - T19 arg19) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18, - arg19); - } + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return error; + } -}; + return queue.enqueueCopyImageToBuffer( + src, + dst, + src_origin, + region, + dst_offset, + events, + event); +} -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17, - typename T18> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> +inline cl_int enqueueCopyBufferToImage( + const Buffer& src, + const Image& dst, + size_type src_offset, + const array& dst_origin, + const array& region, + const vector* events = NULL, + Event* event = NULL) { - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 19)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - T18); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17, - T18 arg18) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17, - arg18); - } + if (error != CL_SUCCESS) { + return error; + } + return queue.enqueueCopyBufferToImage( + src, + dst, + src_offset, + dst_origin, + region, + events, + event); +} -}; -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16, - typename T17> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> +inline cl_int flush(void) { - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 18)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - T17); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16, - T17 arg17) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16, - arg17); - } + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { + return error; + } -}; + return queue.flush(); +} -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15, - typename T16> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> +inline cl_int finish(void) { - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 17)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - T16); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15, - T16 arg16) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15, - arg16); - } + if (error != CL_SUCCESS) { + return error; + } -}; + return queue.finish(); +} -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14, - typename T15> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> +class EnqueueArgs { - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) - { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 16)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } +private: + CommandQueue queue_; + const NDRange offset_; + const NDRange global_; + const NDRange local_; + vector events_; - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - T15); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14, - T15 arg15) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14, - arg15); - } + template + friend class KernelFunctor; +public: + EnqueueArgs(NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange) + { -}; + } -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13, - typename T14> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) + EnqueueArgs(NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local) { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 15)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - T14); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13, - T14 arg14) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13, - arg14); - } + } + EnqueueArgs(NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local) + { -}; + } -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12, - typename T13> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) + EnqueueArgs(Event e, NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange) { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 14)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - + events_.push_back(e); } - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - T13); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12, - T13 arg13) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12, - arg13); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11, - typename T12> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) + EnqueueArgs(Event e, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local) { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 13)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - + events_.push_back(e); } - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - T12); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11, - T12 arg12) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11, - arg12); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10, - typename T11> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) + EnqueueArgs(Event e, NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local) { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 12)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - + events_.push_back(e); } - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - T11); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10, - T11 arg11) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10, - arg11); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9, - typename T10> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) + EnqueueArgs(const vector &events, NDRange global) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(NullRange), + events_(events) { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 11)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - T10); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9, - T10 arg10) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9, - arg10); - } + } + EnqueueArgs(const vector &events, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(NullRange), + global_(global), + local_(local), + events_(events) + { -}; + } -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8, - typename T9> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) + EnqueueArgs(const vector &events, NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), + offset_(offset), + global_(global), + local_(local), + events_(events) { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 10)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - T9); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8, - T9 arg9) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8, - arg9); - } + } + EnqueueArgs(CommandQueue &queue, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange) + { -}; + } -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7, - typename T8> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) + EnqueueArgs(CommandQueue &queue, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local) { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 9)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - T8); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7, - T8 arg8) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7, - arg8); - } + } + EnqueueArgs(CommandQueue &queue, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local) + { -}; + } -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6, - typename T7> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) + EnqueueArgs(CommandQueue &queue, Event e, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange) { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 8)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - + events_.push_back(e); } - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6, - T7); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6, - T7 arg7) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6, - arg7); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5, - typename T6> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - T6, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - T6, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) + EnqueueArgs(CommandQueue &queue, Event e, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local) { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 7)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - + events_.push_back(e); } - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5, - T6); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5, - T6 arg6) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5, - arg6); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4, - typename T5> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - T5, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - T5, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) + EnqueueArgs(CommandQueue &queue, Event e, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local) { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 6)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - + events_.push_back(e); } - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4, - T5); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4, - T5 arg5) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4, - arg5); - } - - -}; - -template< - typename T0, - typename T1, - typename T2, - typename T3, - typename T4> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - T4, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - T4, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) + EnqueueArgs(CommandQueue &queue, const vector &events, NDRange global) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(NullRange), + events_(events) { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 5)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - - } - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3, - T4); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3, - T4 arg4) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3, - arg4); - } + } + EnqueueArgs(CommandQueue &queue, const vector &events, NDRange global, NDRange local) : + queue_(queue), + offset_(NullRange), + global_(global), + local_(local), + events_(events) + { -}; + } -template< - typename T0, - typename T1, - typename T2, - typename T3> -struct functionImplementation_ -< T0, - T1, - T2, - T3, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - T3, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) + EnqueueArgs(CommandQueue &queue, const vector &events, NDRange offset, NDRange global, NDRange local) : + queue_(queue), + offset_(offset), + global_(global), + local_(local), + events_(events) { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 4)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - + } +}; - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2, - T3); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2, - T3 arg3) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2, - arg3); - } +//---------------------------------------------------------------------------------------------- -}; -template< - typename T0, - typename T1, - typename T2> -struct functionImplementation_ -< T0, - T1, - T2, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> +/** + * Type safe kernel functor. + * + */ +template +class KernelFunctor { - typedef detail::KernelFunctorGlobal< - T0, - T1, - T2, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) +private: + Kernel kernel_; + + template + void setArgs(T0&& t0, T1s&&... t1s) { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 3)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - + kernel_.setArg(index, t0); + setArgs(std::forward(t1s)...); } - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1, - T2); - - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1, - T2 arg2) - { - return functor_( - enqueueArgs, - arg0, - arg1, - arg2); - } - - -}; - -template< - typename T0, - typename T1> -struct functionImplementation_ -< T0, - T1, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - T1, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) + template + void setArgs(T0&& t0) { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 2)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - + kernel_.setArg(index, t0); } - //! \brief Return type of the functor - typedef Event result_type; + template + void setArgs() + { + } - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0, - T1); - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0, - T1 arg1) - { - return functor_( - enqueueArgs, - arg0, - arg1); - } +public: + KernelFunctor(Kernel kernel) : kernel_(kernel) + {} + KernelFunctor( + const Program& program, + const string name, + cl_int * err = NULL) : + kernel_(program, name.c_str(), err) + {} -}; + //! \brief Return type of the functor + typedef Event result_type; -template< - typename T0> -struct functionImplementation_ -< T0, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> -{ - typedef detail::KernelFunctorGlobal< - T0, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType, - NullType> FunctorType; - - FunctorType functor_; - - functionImplementation_(const FunctorType &functor) : - functor_(functor) + /** + * Enqueue kernel. + * @param args Launch parameters of the kernel. + * @param t0... List of kernel arguments based on the template type of the functor. + */ + Event operator() ( + const EnqueueArgs& args, + Ts... ts) { - - #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 1)) - // Fail variadic expansion for dev11 - static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it."); - #endif - + Event event; + setArgs<0>(std::forward(ts)...); + + args.queue_.enqueueNDRangeKernel( + kernel_, + args.offset_, + args.global_, + args.local_, + &args.events_, + &event); + + return event; } - //! \brief Return type of the functor - typedef Event result_type; + /** + * Enqueue kernel with support for error code. + * @param args Launch parameters of the kernel. + * @param t0... List of kernel arguments based on the template type of the functor. + * @param error Out parameter returning the error code from the execution. + */ + Event operator() ( + const EnqueueArgs& args, + Ts... ts, + cl_int &error) + { + Event event; + setArgs<0>(std::forward(ts)...); - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - T0); + error = args.queue_.enqueueNDRangeKernel( + kernel_, + args.offset_, + args.global_, + args.local_, + &args.events_, + &event); + + return event; + } - Event operator()( - const EnqueueArgs& enqueueArgs, - T0 arg0) - { - return functor_( - enqueueArgs, - arg0); - } +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 + cl_int setSVMPointers(const vector &pointerList) + { + return kernel_.setSVMPointers(pointerList); + } + template + cl_int setSVMPointers(const T0 &t0, T1s... ts) + { + return kernel_.setSVMPointers(t0, ts...); + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + Kernel getKernel() + { + return kernel_; + } }; +namespace compatibility { + /** + * Backward compatibility class to ensure that cl.hpp code works with cl2.hpp. + * Please use KernelFunctor directly. + */ + template + struct make_kernel + { + typedef KernelFunctor FunctorType; + FunctorType functor_; + make_kernel( + const Program& program, + const string name, + cl_int * err = NULL) : + functor_(FunctorType(program, name, err)) + {} + make_kernel( + const Kernel kernel) : + functor_(FunctorType(kernel)) + {} -} // namespace detail - -//---------------------------------------------------------------------------------------------- + //! \brief Return type of the functor + typedef Event result_type; -template < - typename T0, typename T1 = detail::NullType, typename T2 = detail::NullType, - typename T3 = detail::NullType, typename T4 = detail::NullType, - typename T5 = detail::NullType, typename T6 = detail::NullType, - typename T7 = detail::NullType, typename T8 = detail::NullType, - typename T9 = detail::NullType, typename T10 = detail::NullType, - typename T11 = detail::NullType, typename T12 = detail::NullType, - typename T13 = detail::NullType, typename T14 = detail::NullType, - typename T15 = detail::NullType, typename T16 = detail::NullType, - typename T17 = detail::NullType, typename T18 = detail::NullType, - typename T19 = detail::NullType, typename T20 = detail::NullType, - typename T21 = detail::NullType, typename T22 = detail::NullType, - typename T23 = detail::NullType, typename T24 = detail::NullType, - typename T25 = detail::NullType, typename T26 = detail::NullType, - typename T27 = detail::NullType, typename T28 = detail::NullType, - typename T29 = detail::NullType, typename T30 = detail::NullType, - typename T31 = detail::NullType -> -struct make_kernel : - public detail::functionImplementation_< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31 - > -{ -public: - typedef detail::KernelFunctorGlobal< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31 - > FunctorType; - - make_kernel( - const Program& program, - const STRING_CLASS name, - cl_int * err = NULL) : - detail::functionImplementation_< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31 - >( - FunctorType(program, name, err)) - {} + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_( + const EnqueueArgs&, + Ts...); - make_kernel( - const Kernel kernel) : - detail::functionImplementation_< - T0, T1, T2, T3, - T4, T5, T6, T7, - T8, T9, T10, T11, - T12, T13, T14, T15, - T16, T17, T18, T19, - T20, T21, T22, T23, - T24, T25, T26, T27, - T28, T29, T30, T31 - >( - FunctorType(kernel)) - {} -}; + Event operator()( + const EnqueueArgs& enqueueArgs, + Ts... args) + { + return functor_( + enqueueArgs, args...); + } + }; +} // namespace compatibility //---------------------------------------------------------------------------------------------------------------------- -#undef __ERR_STR -#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS) +#undef CL_HPP_ERR_STR_ +#if !defined(CL_HPP_USER_OVERRIDE_ERROR_STRINGS) #undef __GET_DEVICE_INFO_ERR #undef __GET_PLATFORM_INFO_ERR #undef __GET_DEVICE_IDS_ERR @@ -12874,32 +9546,24 @@ public: #undef __ENQUEUE_TASK_ERR #undef __ENQUEUE_NATIVE_KERNEL -#undef __CL_EXPLICIT_CONSTRUCTORS - #undef __UNLOAD_COMPILER_ERR -#endif //__CL_USER_OVERRIDE_ERROR_STRINGS +#undef __CREATE_SUB_DEVICES_ERR -#undef __CL_FUNCTION_TYPE +#undef __CREATE_PIPE_ERR +#undef __GET_PIPE_INFO_ERR -// Extensions -/** - * Deprecated APIs for 1.2 - */ -#if defined(CL_VERSION_1_1) -#undef __INIT_CL_EXT_FCN_PTR -#endif // #if defined(CL_VERSION_1_1) -#undef __CREATE_SUB_DEVICES +#endif //CL_HPP_USER_OVERRIDE_ERROR_STRINGS -#if defined(USE_CL_DEVICE_FISSION) -#undef __PARAM_NAME_DEVICE_FISSION -#endif // USE_CL_DEVICE_FISSION +// Extensions +#undef CL_HPP_INIT_CL_EXT_FCN_PTR_ +#undef CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_ -#undef __DEFAULT_NOT_INITIALIZED -#undef __DEFAULT_BEING_INITIALIZED -#undef __DEFAULT_INITIALIZED +#if defined(CL_HPP_USE_CL_DEVICE_FISSION) +#undef CL_HPP_PARAM_NAME_DEVICE_FISSION_ +#endif // CL_HPP_USE_CL_DEVICE_FISSION -#undef CL_HPP_RVALUE_REFERENCES_SUPPORTED -#undef CL_HPP_NOEXCEPT +#undef CL_HPP_NOEXCEPT_ +#undef CL_HPP_DEFINE_STATIC_MEMBER_ } // namespace cl diff --git a/libethash-cl/ethash_cl_miner.h b/libethash-cl/ethash_cl_miner.h index ae249f93d..45c39eee3 100644 --- a/libethash-cl/ethash_cl_miner.h +++ b/libethash-cl/ethash_cl_miner.h @@ -1,15 +1,24 @@ #pragma once -#define __CL_ENABLE_EXCEPTIONS -#define CL_USE_DEPRECATED_OPENCL_1_2_APIS +#define CL_HPP_TARGET_OPENCL_VERSION 120 +#define CL_HPP_MINIMUM_OPENCL_VERSION 110 +#define CL_HPP_ENABLE_EXCEPTIONS 1 #if defined(__clang__) #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wunused-parameter" +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wignored-qualifiers" +#pragma GCC diagnostic ignored "-Wignored-attributes" +#endif + #include "CL/cl.hpp" + +#if defined(__clang__) #pragma clang diagnostic pop -#else -#include "CL/cl.hpp" +#elif defined(__GNUC__) +#pragma GCC diagnostic pop #endif #include