From 7f6aa13d0bb33fbbdfb7e1a3fa3077c595effc7c Mon Sep 17 00:00:00 2001 From: BlubbFish <git@blubbfish.net> Date: Thu, 9 Mar 2017 20:41:31 +0000 Subject: [PATCH] =?UTF-8?q?OpenCL=20hinzugef=C3=BCgt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- AES/AES.vcxproj | 71 ++ AES/AES.vcxproj.filters | 27 + AES/AES.vcxproj.user | 3 + AES/AESEncryptDecrypt.cpp | 1391 +++++++++++++++++++++++++++++++++++++ AES/AESEncryptDecrypt.hpp | 370 ++++++++++ OpenCL.sln | 20 + 6 files changed, 1882 insertions(+) create mode 100644 AES/AES.vcxproj create mode 100644 AES/AES.vcxproj.filters create mode 100644 AES/AES.vcxproj.user create mode 100644 AES/AESEncryptDecrypt.cpp create mode 100644 AES/AESEncryptDecrypt.hpp create mode 100644 OpenCL.sln diff --git a/AES/AES.vcxproj b/AES/AES.vcxproj new file mode 100644 index 0000000..30b121a --- /dev/null +++ b/AES/AES.vcxproj @@ -0,0 +1,71 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup Label="ProjectConfigurations"> + <ProjectConfiguration Include="Debug|Win32"> + <Configuration>Debug</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + <ProjectConfiguration Include="Release|Win32"> + <Configuration>Release</Configuration> + <Platform>Win32</Platform> + </ProjectConfiguration> + </ItemGroup> + <ItemGroup> + <ClCompile Include="AESEncryptDecrypt.cpp" /> + </ItemGroup> + <ItemGroup> + <ClInclude Include="AESEncryptDecrypt.hpp" /> + </ItemGroup> + <PropertyGroup Label="Globals"> + <ProjectGuid>{3FE30046-FDCA-46D3-82DC-2CDC29964EC0}</ProjectGuid> + <RootNamespace>AES</RootNamespace> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> + <ConfigurationType>Application</ConfigurationType> + <UseDebugLibraries>true</UseDebugLibraries> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> + <ConfigurationType>Application</ConfigurationType> + <UseDebugLibraries>false</UseDebugLibraries> + <WholeProgramOptimization>true</WholeProgramOptimization> + <CharacterSet>MultiByte</CharacterSet> + </PropertyGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> + <ImportGroup Label="ExtensionSettings"> + </ImportGroup> + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + </ImportGroup> + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> + </ImportGroup> + <PropertyGroup Label="UserMacros" /> + <PropertyGroup /> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> + <ClCompile> + <WarningLevel>Level3</WarningLevel> + <Optimization>Disabled</Optimization> + </ClCompile> + <Link> + <GenerateDebugInformation>true</GenerateDebugInformation> + </Link> + </ItemDefinitionGroup> + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> + <ClCompile> + <WarningLevel>Level3</WarningLevel> + <Optimization>MaxSpeed</Optimization> + <FunctionLevelLinking>true</FunctionLevelLinking> + <IntrinsicFunctions>true</IntrinsicFunctions> + </ClCompile> + <Link> + <GenerateDebugInformation>true</GenerateDebugInformation> + <EnableCOMDATFolding>true</EnableCOMDATFolding> + <OptimizeReferences>true</OptimizeReferences> + </Link> + </ItemDefinitionGroup> + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> + <ImportGroup Label="ExtensionTargets"> + </ImportGroup> +</Project> \ No newline at end of file diff --git a/AES/AES.vcxproj.filters b/AES/AES.vcxproj.filters new file mode 100644 index 0000000..aebc003 --- /dev/null +++ b/AES/AES.vcxproj.filters @@ -0,0 +1,27 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> + <ItemGroup> + <Filter Include="Source Files"> + <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier> + <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions> + </Filter> + <Filter Include="Header Files"> + <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier> + <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions> + </Filter> + <Filter Include="Resource Files"> + <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier> + <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions> + </Filter> + </ItemGroup> + <ItemGroup> + <ClCompile Include="AESEncryptDecrypt.cpp"> + <Filter>Source Files</Filter> + </ClCompile> + </ItemGroup> + <ItemGroup> + <ClInclude Include="AESEncryptDecrypt.hpp"> + <Filter>Header Files</Filter> + </ClInclude> + </ItemGroup> +</Project> \ No newline at end of file diff --git a/AES/AES.vcxproj.user b/AES/AES.vcxproj.user new file mode 100644 index 0000000..695b5c7 --- /dev/null +++ b/AES/AES.vcxproj.user @@ -0,0 +1,3 @@ +<?xml version="1.0" encoding="utf-8"?> +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> +</Project> \ No newline at end of file diff --git a/AES/AESEncryptDecrypt.cpp b/AES/AESEncryptDecrypt.cpp new file mode 100644 index 0000000..92aa890 --- /dev/null +++ b/AES/AESEncryptDecrypt.cpp @@ -0,0 +1,1391 @@ +/* ============================================================ + +Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use of this material is permitted under the following +conditions: + +Redistributions must retain the above copyright notice and all terms of this +license. + +In no event shall anyone redistributing or accessing or using this material +commence or participate in any arbitration or legal action relating to this +material against Advanced Micro Devices, Inc. or any copyright holders or +contributors. The foregoing shall survive any expiration or termination of +this license or any agreement or access or use related to this material. + +ANY BREACH OF ANY TERM OF THIS LICENSE SHALL RESULT IN THE IMMEDIATE REVOCATION +OF ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE THIS MATERIAL. + +THIS MATERIAL IS PROVIDED BY ADVANCED MICRO DEVICES, INC. AND ANY COPYRIGHT +HOLDERS AND CONTRIBUTORS "AS IS" IN ITS CURRENT CONDITION AND WITHOUT ANY +REPRESENTATIONS, GUARANTEE, OR WARRANTY OF ANY KIND OR IN ANY WAY RELATED TO +SUPPORT, INDEMNITY, ERROR FREE OR UNINTERRUPTED OPERA TION, OR THAT IT IS FREE +FROM DEFECTS OR VIRUSES. ALL OBLIGATIONS ARE HEREBY DISCLAIMED - WHETHER +EXPRESS, IMPLIED, OR STATUTORY - INCLUDING, BUT NOT LIMITED TO, ANY IMPLIED +WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, +ACCURACY, COMPLETENESS, OPERABILITY, QUALITY OF SERVICE, OR NON-INFRINGEMENT. +IN NO EVENT SHALL ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, PUNITIVE, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, REVENUE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED OR BASED ON ANY THEORY OF LIABILITY +ARISING IN ANY WAY RELATED TO THIS MATERIAL, EVEN IF ADVISED OF THE POSSIBILITY +OF SUCH DAMAGE. THE ENTIRE AND AGGREGATE LIABILITY OF ADVANCED MICRO DEVICES, +INC. AND ANY COPYRIGHT HOLDERS AND CONTRIBUTORS SHALL NOT EXCEED TEN DOLLARS +(US $10.00). ANYONE REDISTRIBUTING OR ACCESSING OR USING THIS MATERIAL ACCEPTS +THIS ALLOCATION OF RISK AND AGREES TO RELEASE ADVANCED MICRO DEVICES, INC. AND +ANY COPYRIGHT HOLDERS AND CONTRIBUTORS FROM ANY AND ALL LIABILITIES, +OBLIGATIONS, CLAIMS, OR DEMANDS IN EXCESS OF TEN DOLLARS (US $10.00). THE +FOREGOING ARE ESSENTIAL TERMS OF THIS LICENSE AND, IF ANY OF THESE TERMS ARE +CONSTRUED AS UNENFORCEABLE, FAIL IN ESSENTIAL PURPOSE, OR BECOME VOID OR +DETRIMENTAL TO ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR +CONTRIBUTORS FOR ANY REASON, THEN ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE +THIS MATERIAL SHALL TERMINATE IMMEDIATELY. MOREOVER, THE FOREGOING SHALL +SURVIVE ANY EXPIRATION OR TERMINATION OF THIS LICENSE OR ANY AGREEMENT OR +ACCESS OR USE RELATED TO THIS MATERIAL. + +NOTICE IS HEREBY PROVIDED, AND BY REDISTRIBUTING OR ACCESSING OR USING THIS +MATERIAL SUCH NOTICE IS ACKNOWLEDGED, THAT THIS MATERIAL MAY BE SUBJECT TO +RESTRICTIONS UNDER THE LAWS AND REGULATIONS OF THE UNITED STATES OR OTHER +COUNTRIES, WHICH INCLUDE BUT ARE NOT LIMITED TO, U.S. EXPORT CONTROL LAWS SUCH +AS THE EXPORT ADMINISTRATION REGULATIONS AND NATIONAL SECURITY CONTROLS AS +DEFINED THEREUNDER, AS WELL AS STATE DEPARTMENT CONTROLS UNDER THE U.S. +MUNITIONS LIST. THIS MATERIAL MAY NOT BE USED, RELEASED, TRANSFERRED, IMPORTED, +EXPORTED AND/OR RE-EXPORTED IN ANY MANNER PROHIBITED UNDER ANY APPLICABLE LAWS, +INCLUDING U.S. EXPORT CONTROL LAWS REGARDING SPECIFICALLY DESIGNATED PERSONS, +COUNTRIES AND NATIONALS OF COUNTRIES SUBJECT TO NATIONAL SECURITY CONTROLS. +MOREOVER, THE FOREGOING SHALL SURVIVE ANY EXPIRATION OR TERMINATION OF ANY +LICENSE OR AGREEMENT OR ACCESS OR USE RELATED TO THIS MATERIAL. + +NOTICE REGARDING THE U.S. GOVERNMENT AND DOD AGENCIES: This material is +provided with "RESTRICTED RIGHTS" and/or "LIMITED RIGHTS" as applicable to +computer software and technical data, respectively. Use, duplication, +distribution or disclosure by the U.S. Government and/or DOD agencies is +subject to the full extent of restrictions in all applicable regulations, +including those found at FAR52.227 and DFARS252.227 et seq. and any successor +regulations thereof. Use of this material by the U.S. Government and/or DOD +agencies is acknowledgment of the proprietary rights of any copyright holders +and contributors, including those of Advanced Micro Devices, Inc., as well as +the provisions of FAR52.227-14 through 23 regarding privately developed and/or +commercial computer software. + +This license forms the entire agreement regarding the subject matter hereof and +supersedes all proposals and prior discussions and writings between the parties +with respect thereto. This license does not affect any ownership, rights, title, +or interest in, or relating to, this material. No terms of this license can be +modified or waived, and no breach of this license can be excused, unless done +so in a writing signed by all affected parties. Each term of this license is +separately enforceable. If any term of this license is determined to be or +becomes unenforceable or illegal, such term shall be reformed to the minimum +extent necessary in order for this license to remain in effect in accordance +with its terms as modified by such reformation. This license shall be governed +by and construed in accordance with the laws of the State of Texas without +regard to rules on conflicts of law of any state or jurisdiction or the United +Nations Convention on the International Sale of Goods. All disputes arising out +of this license shall be subject to the jurisdiction of the federal and state +courts in Austin, Texas, and all defenses are hereby waived concerning personal +jurisdiction and venue of these courts. + +============================================================ */ + + +#include "AESEncryptDecrypt.hpp" + +using namespace AES; + +int AESEncryptDecrypt::setupAESEncryptDecrypt() +{ + cl_uint sizeBytes = width*height*sizeof(cl_uchar); + input = (cl_uchar*)malloc(sizeBytes); + if(input == NULL) + { + sampleCommon->error("Failed to allocate host memory. (input)"); + return SDK_FAILURE; + } + + /* initialize the input array, do NOTHING but assignment when decrypt*/ + if(!decrypt) + convertColorToGray(pixels, input); + else + convertGrayToGray(pixels, input); + + /* 1 Byte = 8 bits */ + keySize = keySizeBits/8; + + /* due to unknown represenation of cl_uchar */ + keySizeBits = keySize*sizeof(cl_uchar); + + key = (cl_uchar*)malloc(keySizeBits); + + /* random initialization of key */ + sampleCommon->fillRandom<cl_uchar>(key, keySize, 1, 0, 255, seed); + + /* expand the key */ + explandedKeySize = (rounds+1)*keySize; + expandedKey = (cl_uchar*)malloc(explandedKeySize*sizeof(cl_uchar)); + roundKey = (cl_uchar*)malloc(explandedKeySize*sizeof(cl_uchar)); + + keyExpansion(key, expandedKey, keySize, explandedKeySize); + for(cl_uint i=0; i< rounds+1; ++i) + { + createRoundKey(expandedKey + keySize*i, roundKey + keySize*i); + } + + output = (cl_uchar*)malloc(sizeBytes); + if(output == NULL) + { + sampleCommon->error("Failed to allocate host memory. (output)"); + return SDK_FAILURE; + } + + if(!quiet) + { + if(decrypt) + { + std::cout << "Decrypting Image ...." << std::endl; + } + else + { + std::cout << "Encrypting Image ...." << std::endl; + } + + std::cout << "Input Image : " << inFilename << std::endl; + std::cout << "Key : "; + for(cl_uint i=0; i < keySize; ++i) + { + std::cout << (cl_uint)key[i] << " "; + } + std::cout << std::endl; + } + + return SDK_SUCCESS; +} + +void +AESEncryptDecrypt::convertColorToGray(const uchar4 *pixels, cl_uchar *gray) +{ + for(cl_int i=0; i< height; ++i) + for(cl_int j=0; j<width; ++j) + { + cl_uint index = i*width + j; + // gray = (0.3*R + 0.59*G + 0.11*B) + gray[index] = cl_uchar (pixels[index].x * 0.3 + + pixels[index].y * 0.59 + + pixels[index].z * 0.11 ); + } +} + +void +AESEncryptDecrypt::convertGrayToGray(const uchar4 *pixels, cl_uchar *gray) +{ + for(cl_int i=0; i< height; ++i) + for(cl_int j=0; j<width; ++j) + { + cl_uint index = i*width + j; + gray[index] = pixels[index].x; + } +} + +void +AESEncryptDecrypt::convertGrayToPixels(const cl_uchar *gray, uchar4 *pixels) +{ + for(cl_int i=0; i< height; ++i) + for(cl_int j=0; j<width; ++j) + { + cl_uint index = i*width + j; + pixels[index].x = gray[index]; + pixels[index].y = gray[index]; + pixels[index].z = gray[index]; + } +} +int +AESEncryptDecrypt::setupCL(void) +{ + cl_int status = 0; + size_t deviceListSize; + + cl_device_type dType; + + if(deviceType.compare("cpu") == 0) + { + dType = CL_DEVICE_TYPE_CPU; + } + else //deviceType = "gpu" + { + dType = CL_DEVICE_TYPE_GPU; + } + + /* + * Have a look at the available platforms and pick either + * the AMD one if available or a reasonable default. + */ + + cl_uint numPlatforms; + cl_platform_id platform = NULL; + status = clGetPlatformIDs(0, NULL, &numPlatforms); + if(!sampleCommon->checkVal(status, + CL_SUCCESS, + "clGetPlatformIDs failed.")) + { + return SDK_FAILURE; + } + if (0 < numPlatforms) + { + cl_platform_id* platforms = new cl_platform_id[numPlatforms]; + status = clGetPlatformIDs(numPlatforms, platforms, NULL); + if(!sampleCommon->checkVal(status, + CL_SUCCESS, + "clGetPlatformIDs failed.")) + { + return SDK_FAILURE; + } + for (unsigned i = 0; i < numPlatforms; ++i) + { + char pbuf[100]; + status = clGetPlatformInfo(platforms[i], + CL_PLATFORM_VENDOR, + sizeof(pbuf), + pbuf, + NULL); + + if(!sampleCommon->checkVal(status, + CL_SUCCESS, + "clGetPlatformInfo failed.")) + { + return SDK_FAILURE; + } + + platform = platforms[i]; + if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) + { + break; + } + } + delete[] platforms; + } + + /* + * If we could find our platform, use it. Otherwise pass a NULL and get whatever the + * implementation thinks we should be using. + */ + + cl_context_properties cps[3] = + { + CL_CONTEXT_PLATFORM, + (cl_context_properties)platform, + 0 + }; + /* Use NULL for backward compatibility */ + cl_context_properties* cprops = (NULL == platform) ? NULL : cps; + + context = clCreateContextFromType( + cprops, + dType, + NULL, + NULL, + &status); + + if(!sampleCommon->checkVal(status, + CL_SUCCESS, + "clCreateContextFromType failed.")) + return SDK_FAILURE; + + /* First, get the size of device list data */ + status = clGetContextInfo( + context, + CL_CONTEXT_DEVICES, + 0, + NULL, + &deviceListSize); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clGetContextInfo failed.")) + return SDK_FAILURE; + + /* Now allocate memory for device list based on the size we got earlier */ + devices = (cl_device_id *)malloc(deviceListSize); + if(devices==NULL) { + sampleCommon->error("Failed to allocate memory (devices)."); + return SDK_FAILURE; + } + + /* Now, get the device list data */ + status = clGetContextInfo( + context, + CL_CONTEXT_DEVICES, + deviceListSize, + devices, + NULL); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clGetGetContextInfo failed.")) + return SDK_FAILURE; + + /* Get Device specific Information */ + status = clGetDeviceInfo( + devices[0], + CL_DEVICE_MAX_WORK_GROUP_SIZE, + sizeof(size_t), + (void *)&maxWorkGroupSize, + NULL); + + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clGetDeviceInfo CL_DEVICE_MAX_WORK_GROUP_SIZE failed.")) + return SDK_FAILURE; + + + status = clGetDeviceInfo( + devices[0], + CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, + sizeof(cl_uint), + (void *)&maxDimensions, + NULL); + + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed.")) + return SDK_FAILURE; + + + maxWorkItemSizes = (size_t *)malloc(maxDimensions*sizeof(size_t)); + + status = clGetDeviceInfo( + devices[0], + CL_DEVICE_MAX_WORK_ITEM_SIZES, + sizeof(size_t)*maxDimensions, + (void *)maxWorkItemSizes, + NULL); + + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed.")) + return SDK_FAILURE; + + + status = clGetDeviceInfo( + devices[0], + CL_DEVICE_LOCAL_MEM_SIZE, + sizeof(cl_ulong), + (void *)&totalLocalMemory, + NULL); + + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clGetDeviceInfo CL_DEVICE_LOCAL_MEM_SIZES failed.")) + return SDK_FAILURE; + + + { + /* The block is to move the declaration of prop closer to its use */ + cl_command_queue_properties prop = 0; + if(timing) + prop |= CL_QUEUE_PROFILING_ENABLE; + + commandQueue = clCreateCommandQueue( + context, + devices[0], + prop, + &status); + if(!sampleCommon->checkVal( + status, + 0, + "clCreateCommandQueue failed.")) + return SDK_FAILURE; + } + inputBuffer = clCreateBuffer( + context, + CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, + sizeof(cl_uchar ) * width * height, + input, + &status); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clCreateBuffer failed. (inputBuffer)")) + return SDK_FAILURE; + + outputBuffer = clCreateBuffer( + context, + CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, + sizeof(cl_uchar ) * width * height, + output, + &status); + + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clCreateBuffer failed. (outputBuffer)")) + return SDK_FAILURE; + + rKeyBuffer = clCreateBuffer( + context, + CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, + sizeof(cl_uchar ) * explandedKeySize, + roundKey, + &status); + + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clCreateBuffer failed. (rKeyBuffer)")) + return SDK_FAILURE; + + cl_uchar * sBox; + sBox = (cl_uchar *)sbox; + sBoxBuffer = clCreateBuffer( + context, + CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, + sizeof(cl_uchar ) * 256, + sBox, + &status); + + cl_uchar * rsBox; + rsBox = (cl_uchar *)rsbox; + rsBoxBuffer = clCreateBuffer( + context, + CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, + sizeof(cl_uchar ) * 256, + rsBox, + &status); + + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clCreateBuffer failed. (sBoxBuffer)")) + return SDK_FAILURE; + + /* create a CL program using the kernel source */ + streamsdk::SDKFile kernelFile; + std::string kernelPath = sampleCommon->getPath(); + kernelPath.append("AESEncryptDecrypt_Kernels.cl"); + kernelFile.open(kernelPath.c_str()); + const char * source = kernelFile.source().c_str(); + size_t sourceSize[] = { strlen(source) }; + program = clCreateProgramWithSource( + context, + 1, + &source, + sourceSize, + &status); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clCreateProgramWithSource failed.")) + return SDK_FAILURE; + + /* create a cl program executable for all the devices specified */ + status = clBuildProgram(program, 1, devices, NULL, NULL, NULL); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clBuildProgram failed.")) + return SDK_FAILURE; + + /* get a kernel object handle for a kernel with the given name */ + if(decrypt) + { + kernel = clCreateKernel(program, "AESDecrypt", &status); + } + else + { + kernel = clCreateKernel(program, "AESEncrypt", &status); + } + + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clCreateKernel failed.")) + return SDK_FAILURE; + + return SDK_SUCCESS; +} + + +int +AESEncryptDecrypt::runCLKernels(void) +{ + cl_int status; + cl_event events[2]; + + size_t globalThreads[2]= {width/4, height}; + size_t localThreads[2] = {1 , 4 }; + + status = clGetKernelWorkGroupInfo( + kernel, + devices[0], + CL_KERNEL_LOCAL_MEM_SIZE, + sizeof(cl_ulong), + &usedLocalMemory, + NULL); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clGetKernelWorkGroupInfo failed.(usedLocalMemory)")) + return SDK_FAILURE; + + availableLocalMemory = totalLocalMemory - usedLocalMemory; + + /* two local memories buffers of sizeof(cl_uchar)*keySize */ + neededLocalMemory = 2*sizeof(cl_uchar)*keySize; + + if(neededLocalMemory > availableLocalMemory) + { + std::cout << "Unsupported: Insufficient local memory on device." << std::endl; + return SDK_SUCCESS; + } + + if(localThreads[0] > maxWorkItemSizes[0] || + localThreads[1] > maxWorkItemSizes[1] || + localThreads[0]*localThreads[1] > maxWorkGroupSize) + { + std::cout << "Unsupported: Device does not support requested number of work items."<<std::endl; + return SDK_SUCCESS; + } + + /* Check group size against kernelWorkGroupSize */ + status = clGetKernelWorkGroupInfo(kernel, + devices[0], + CL_KERNEL_WORK_GROUP_SIZE, + sizeof(size_t), + &kernelWorkGroupSize, + 0); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clGetKernelWorkGroupInfo failed.")) + { + return SDK_FAILURE; + } + + if((cl_uint)(localThreads[0]*localThreads[1]) > kernelWorkGroupSize ) + { + std::cout<<"Out of Resources!" << std::endl; + std::cout<<"Group Size specified : "<<localThreads[0]*localThreads[1]<<std::endl; + std::cout<<"Max Group Size supported on the kernel : " + <<kernelWorkGroupSize<<std::endl; + return SDK_FAILURE; + } + + /*** Set appropriate arguments to the kernel ***/ + status = clSetKernelArg( + kernel, + 0, + sizeof(cl_mem), + (void *)&outputBuffer); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clSetKernelArg failed. (outputBuffer)")) + return SDK_FAILURE; + + status = clSetKernelArg( + kernel, + 1, + sizeof(cl_mem), + (void *)&inputBuffer); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clSetKernelArg failed. (inputBuffer)")) + return SDK_FAILURE; + + status = clSetKernelArg( + kernel, + 2, + sizeof(cl_mem), + (void *)&rKeyBuffer); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clSetKernelArg failed. (rKeyBuffer)")) + return SDK_FAILURE; + + if(decrypt) + { + status = clSetKernelArg( + kernel, + 3, + sizeof(cl_mem), + (void *)&rsBoxBuffer); + } + else + { + status = clSetKernelArg( + kernel, + 3, + sizeof(cl_mem), + (void *)&sBoxBuffer); + } + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clSetKernelArg failed. (SBoxBuffer)")) + return SDK_FAILURE; + + status = clSetKernelArg( + kernel, + 4, + sizeof(cl_uchar)*keySize, + NULL); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clSetKernelArg failed. (block0)")) + return SDK_FAILURE; + + status = clSetKernelArg( + kernel, + 5, + sizeof(cl_uchar)*keySize, + NULL); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clSetKernelArg failed. (block1)")) + return SDK_FAILURE; + + status = clSetKernelArg( + kernel, + 6, + sizeof(cl_uint), + (void *)&width); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clSetKernelArg failed. (width)")) + return SDK_FAILURE; + + status = clSetKernelArg( + kernel, + 7, + sizeof(cl_uint), + (void *)&rounds); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clSetKernelArg failed. (rounds)")) + return SDK_FAILURE; + + + /* + * Enqueue a kernel run call. + */ + status = clEnqueueNDRangeKernel( + commandQueue, + kernel, + 2, + NULL, + globalThreads, + localThreads, + 0, + NULL, + &events[0]); + + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clEnqueueNDRangeKernel failed.")) + return SDK_FAILURE; + + + /* wait for the kernel call to finish execution */ + status = clWaitForEvents(1, &events[0]); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clWaitForEvents failed.")) + return SDK_FAILURE; + + /* Enqueue the results to application pointer*/ + status = clEnqueueReadBuffer( + commandQueue, + outputBuffer, + CL_TRUE, + 0, + width * height * sizeof(cl_uchar), + output, + 0, + NULL, + &events[1]); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clEnqueueReadBuffer failed.")) + return SDK_FAILURE; + + /* Wait for the read buffer to finish execution */ + status = clWaitForEvents(1, &events[1]); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clWaitForEvents failed.")) + return SDK_FAILURE; + + clReleaseEvent(events[0]); + clReleaseEvent(events[1]); + + return SDK_SUCCESS; +} + +cl_uchar +AESEncryptDecrypt::getRconValue(cl_uint num) +{ + return Rcon[num]; +} + +void +AESEncryptDecrypt::rotate(cl_uchar * word) +{ + cl_uchar c = word[0]; + for(cl_uint i=0; i<3; ++i) + { + word[i] = word[i+1]; + } + word[3] = c; +} + +void +AESEncryptDecrypt::core(cl_uchar * word, cl_uint iter) +{ + rotate(word); + + for(cl_uint i=0; i < 4; ++i) + { + word[i] = getSBoxValue(word[i]); + } + + word[0] = word[0]^getRconValue(iter); +} + +void +AESEncryptDecrypt::keyExpansion(cl_uchar * key, cl_uchar * expandedKey, + cl_uint keySize, cl_uint explandedKeySize) +{ + cl_uint currentSize = 0; + cl_uint rConIteration = 1; + cl_uchar temp[4] = {0}; + + for(cl_uint i=0; i < keySize; ++i) + { + expandedKey[i] = key[i]; + } + + currentSize += keySize; + + while(currentSize < explandedKeySize) + { + for(cl_uint i=0; i < 4; ++i) + { + temp[i] = expandedKey[(currentSize - 4) + i]; + } + + if(currentSize%keySize == 0) + { + core(temp, rConIteration++); + } + + //XXX: add extra SBOX here if the keySize is 32 Bytes + + for(cl_uint i=0; i < 4; ++i) + { + expandedKey[currentSize] = expandedKey[currentSize - keySize]^temp[i]; + currentSize++; + } + } +} + +cl_uchar +AESEncryptDecrypt::getSBoxValue(cl_uint num) +{ + return sbox[num]; +} + +cl_uchar +AESEncryptDecrypt::getSBoxInvert(cl_uint num) +{ + return rsbox[num]; +} + +cl_uchar +AESEncryptDecrypt::galoisMultiplication(cl_uchar a, cl_uchar b) +{ + cl_uchar p = 0; + for(cl_uint i=0; i < 8; ++i) + { + if((b&1) == 1) + { + p^=a; + } + cl_uchar hiBitSet = (a & 0x80); + a <<= 1; + if(hiBitSet == 0x80) + { + a ^= 0x1b; + } + b >>= 1; + } + return p; +} + +void +AESEncryptDecrypt::mixColumn(cl_uchar *column) +{ + cl_uchar cpy[4]; + for(cl_uint i=0; i < 4; ++i) + { + cpy[i] = column[i]; + } + column[0] = galoisMultiplication(cpy[0], 2)^ + galoisMultiplication(cpy[3], 1)^ + galoisMultiplication(cpy[2], 1)^ + galoisMultiplication(cpy[1], 3); + + column[1] = galoisMultiplication(cpy[1], 2)^ + galoisMultiplication(cpy[0], 1)^ + galoisMultiplication(cpy[3], 1)^ + galoisMultiplication(cpy[2], 3); + + column[2] = galoisMultiplication(cpy[2], 2)^ + galoisMultiplication(cpy[1], 1)^ + galoisMultiplication(cpy[0], 1)^ + galoisMultiplication(cpy[3], 3); + + column[3] = galoisMultiplication(cpy[3], 2)^ + galoisMultiplication(cpy[2], 1)^ + galoisMultiplication(cpy[1], 1)^ + galoisMultiplication(cpy[0], 3); +} + +void +AESEncryptDecrypt::mixColumnInv(cl_uchar *column) +{ + cl_uchar cpy[4]; + for(cl_uint i=0; i < 4; ++i) + { + cpy[i] = column[i]; + } + column[0] = galoisMultiplication(cpy[0], 14 )^ + galoisMultiplication(cpy[3], 9 )^ + galoisMultiplication(cpy[2], 13)^ + galoisMultiplication(cpy[1], 11); + + column[1] = galoisMultiplication(cpy[1], 14 )^ + galoisMultiplication(cpy[0], 9 )^ + galoisMultiplication(cpy[3], 13)^ + galoisMultiplication(cpy[2], 11); + + column[2] = galoisMultiplication(cpy[2], 14 )^ + galoisMultiplication(cpy[1], 9 )^ + galoisMultiplication(cpy[0], 13)^ + galoisMultiplication(cpy[3], 11); + + column[3] = galoisMultiplication(cpy[3], 14 )^ + galoisMultiplication(cpy[2], 9 )^ + galoisMultiplication(cpy[1], 13)^ + galoisMultiplication(cpy[0], 11); +} + +void +AESEncryptDecrypt::mixColumns(cl_uchar * state, cl_bool inverse) +{ + cl_uchar column[4]; + for(cl_uint i=0; i < 4; ++i) + { + for(cl_uint j=0; j < 4; ++j) + { + column[j] = state[j*4 + i]; + } + + if(inverse) + { + mixColumnInv(column); + } + else + { + mixColumn(column); + } + + for(cl_uint j=0; j < 4; ++j) + { + state[j*4 + i] = column[j]; + } + } +} + +void +AESEncryptDecrypt::subBytes(cl_uchar * state, cl_bool inverse) +{ + for(cl_uint i=0; i < keySize; ++i) + { + state[i] = inverse ? getSBoxInvert(state[i]): getSBoxValue(state[i]); + } +} + +void +AESEncryptDecrypt::shiftRow(cl_uchar *state, cl_uchar nbr) +{ + for(cl_uint i=0; i < nbr; ++i) + { + cl_uchar tmp = state[0]; + for(cl_uint j = 0; j < 3; ++j) + { + state[j] = state[j+1]; + } + state[3] = tmp; + } +} + +void +AESEncryptDecrypt::shiftRowInv(cl_uchar *state, cl_uchar nbr) +{ + for(cl_uint i=0; i < nbr; ++i) + { + cl_uchar tmp = state[3]; + for(cl_uint j = 3; j > 0; --j) + { + state[j] = state[j-1]; + } + state[0] = tmp; + } +} +void +AESEncryptDecrypt::shiftRows(cl_uchar * state, cl_bool inverse) +{ + for(cl_uint i=0; i < 4; ++i) + { + if(inverse) + shiftRowInv(state + i*4, i); + else + shiftRow(state + i*4, i); + } +} + +void +AESEncryptDecrypt::addRoundKey(cl_uchar * state, cl_uchar * rKey) +{ + for(cl_uint i=0; i < keySize; ++i) + { + state[i] = state[i] ^ rKey[i]; + } +} + +void +AESEncryptDecrypt::createRoundKey(cl_uchar * eKey, cl_uchar * rKey) +{ + for(cl_uint i=0; i < 4; ++i) + for(cl_uint j=0; j < 4; ++j) + { + rKey[i+ j*4] = eKey[i*4 + j]; + } +} + +void +AESEncryptDecrypt::aesRound(cl_uchar * state, cl_uchar * rKey) +{ + subBytes(state, decrypt); + shiftRows(state, decrypt); + mixColumns(state, decrypt); + addRoundKey(state, rKey); +} + +void +AESEncryptDecrypt::aesMain(cl_uchar * state, cl_uchar * rKey, cl_uint rounds) +{ + addRoundKey(state, rKey); + for(cl_uint i=1; i < rounds; ++i) + { + aesRound(state, rKey + keySize*i); + } + subBytes(state, decrypt); + shiftRows(state, decrypt); + addRoundKey(state, rKey + keySize*rounds); +} + +void +AESEncryptDecrypt::aesRoundInv(cl_uchar * state, cl_uchar * rKey) +{ + shiftRows(state, decrypt); + subBytes(state, decrypt); + addRoundKey(state, rKey); + mixColumns(state, decrypt); +} + +void +AESEncryptDecrypt::aesMainInv(cl_uchar * state, cl_uchar * rKey, cl_uint rounds) +{ + addRoundKey(state, rKey + keySize*rounds); + for(cl_uint i=rounds-1; i > 0; --i) + { + aesRoundInv(state, rKey + keySize*i); + } + shiftRows(state, decrypt); + subBytes(state, decrypt); + addRoundKey(state, rKey); +} + +/** + * + * + */ +void +AESEncryptDecrypt::AESEncryptDecryptCPUReference(cl_uchar * output , + cl_uchar * input , + cl_uchar * rKey , + cl_uint explandedKeySize, + cl_uint width , + cl_uint height , + cl_bool inverse ) +{ + cl_uchar block[16]; + + for(cl_uint blocky = 0; blocky < height/4; ++blocky) + for(cl_uint blockx= 0; blockx < width/4; ++blockx) + { + for(cl_uint i=0; i < 4; ++i) + { + for(cl_uint j=0; j < 4; ++j) + { + cl_uint index = (((blocky * width/4) + blockx) * keySize )+ (i*4 + j); + block[i*4 + j] = input[index]; + } + } + + if(inverse) + aesMainInv(block, rKey, rounds); + else + aesMain(block, rKey, rounds); + + for(cl_uint i=0; i <4 ; ++i) + { + for(cl_uint j=0; j <4; ++j) + { + cl_uint index = (((blocky * width/4) + blockx) * keySize )+ (i*4 + j); + output[index] = block[i*4 + j]; + } + } + } +} + + +int +AESEncryptDecrypt::initialize() +{ + // Call base class Initialize to get default configuration + if(!this->SDKSample::initialize()) + return SDK_FAILURE; + + streamsdk::Option* ifilename_opt = new streamsdk::Option; + if(!ifilename_opt) + { + sampleCommon->error("Memory allocation error.\n"); + return SDK_FAILURE; + } + ifilename_opt->_sVersion = "j"; + ifilename_opt->_lVersion = "input"; + ifilename_opt->_description = "Image as Input"; + ifilename_opt->_type = streamsdk::CA_ARG_STRING; + ifilename_opt->_value = &inFilename; + sampleArgs->AddOption(ifilename_opt); + + delete ifilename_opt; + + //////////////// + streamsdk::Option* ofilename_opt = new streamsdk::Option; + if(!ofilename_opt) + { + sampleCommon->error("Memory allocation error.\n"); + return SDK_FAILURE; + } + ofilename_opt->_sVersion = "o"; + ofilename_opt->_lVersion = "output"; + ofilename_opt->_description = "Image as Ouput"; + ofilename_opt->_type = streamsdk::CA_ARG_STRING; + ofilename_opt->_value = &outFilename; + sampleArgs->AddOption(ofilename_opt); + + delete ofilename_opt; + + //////////////// + streamsdk::Option* decrypt_opt = new streamsdk::Option; + if(!decrypt_opt) + { + sampleCommon->error("Memory allocation error.\n"); + return SDK_FAILURE; + } + decrypt_opt->_sVersion = "d"; + decrypt_opt->_lVersion = "decrypt"; + decrypt_opt->_description = "Decrypt the Input Image"; + decrypt_opt->_type = streamsdk::CA_NO_ARGUMENT; + decrypt_opt->_value = &decrypt; + sampleArgs->AddOption(decrypt_opt); + + delete decrypt_opt; + + streamsdk::Option* num_iterations = new streamsdk::Option; + if(!num_iterations) + { + sampleCommon->error("Memory allocation error.\n"); + return SDK_FAILURE; + } + + num_iterations->_sVersion = "x"; + num_iterations->_lVersion = "iterations"; + num_iterations->_description = "Number of iterations for kernel execution"; + num_iterations->_type = streamsdk::CA_ARG_INT; + num_iterations->_value = &iterations; + + sampleArgs->AddOption(num_iterations); + + delete num_iterations; + + return SDK_SUCCESS; +} + +int +AESEncryptDecrypt::setup() +{ + + std::string filePath = sampleCommon->getPath() + inFilename; + image.load(filePath.c_str()); + + width = image.getWidth(); + height = image.getHeight(); + + /* check condition for the bitmap to be initialized */ + if(width<0 || height <0) + return SDK_FAILURE; + + pixels = image.getPixels(); + + if(setupAESEncryptDecrypt()!=SDK_SUCCESS) + return SDK_FAILURE; + + int timer = sampleCommon->createTimer(); + sampleCommon->resetTimer(timer); + sampleCommon->startTimer(timer); + + if(setupCL()!=SDK_SUCCESS) + return SDK_FAILURE; + + sampleCommon->stopTimer(timer); + + setupTime = (double)(sampleCommon->readTimer(timer)); + + return SDK_SUCCESS; +} + + +int +AESEncryptDecrypt::run() +{ + int timer = sampleCommon->createTimer(); + sampleCommon->resetTimer(timer); + sampleCommon->startTimer(timer); + + std::cout << "Executing kernel for " << iterations << + " iterations" << std::endl; + std::cout << "-------------------------------------------" << std::endl; + + for(int i = 0; i < iterations; i++) + { + /* Arguments are set and execution call is enqueued on command buffer */ + if(runCLKernels()!=SDK_SUCCESS) + return SDK_FAILURE; + } + + sampleCommon->stopTimer(timer); + totalKernelTime = (double)(sampleCommon->readTimer(timer)) / iterations; + + + //XXX: Write output to an output Image + + convertGrayToPixels(output, pixels); + image.write(outFilename.c_str()); + + if(!quiet) { + std::cout << "Output Filename : " << outFilename << std::endl; + } + + + return SDK_SUCCESS; +} + +int +AESEncryptDecrypt::verifyResults() +{ + if(verify) + { + verificationOutput = (cl_uchar *) malloc(width*height*sizeof(cl_uchar)); + if(verificationOutput==NULL) { + sampleCommon->error("Failed to allocate host memory. (verificationOutput)"); + return SDK_FAILURE; + } + + /* + * reference implementation + */ + int refTimer = sampleCommon->createTimer(); + sampleCommon->resetTimer(refTimer); + sampleCommon->startTimer(refTimer); + AESEncryptDecryptCPUReference(verificationOutput, input, roundKey, explandedKeySize, + width, height, decrypt); + sampleCommon->stopTimer(refTimer); + referenceKernelTime = sampleCommon->readTimer(refTimer); + + /* compare the results and see if they match */ + if(memcmp(output, verificationOutput, height*width*sizeof(cl_uchar)) == 0) + { + std::cout<<"Passed!\n"; + return SDK_SUCCESS; + } + else + { + std::cout<<"Failed\n"; + return SDK_FAILURE; + } + } + + return SDK_SUCCESS; +} + +void AESEncryptDecrypt::printStats() +{ + std::string strArray[4] = {"Width", "Height", "Time(sec)", "KernelTime(sec)"}; + std::string stats[4]; + + totalTime = setupTime + totalKernelTime; + + stats[0] = sampleCommon->toString(width , std::dec); + stats[1] = sampleCommon->toString(height , std::dec); + stats[2] = sampleCommon->toString(totalTime, std::dec); + stats[3] = sampleCommon->toString(totalKernelTime, std::dec); + + this->SDKSample::printStats(strArray, stats, 4); +} + +int AESEncryptDecrypt::cleanup() +{ + /* Releases OpenCL resources (Context, Memory etc.) */ + cl_int status; + + status = clReleaseKernel(kernel); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clReleaseKernel failed.")) + return SDK_FAILURE; + + status = clReleaseProgram(program); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clReleaseProgram failed.")) + return SDK_FAILURE; + + status = clReleaseMemObject(inputBuffer); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clReleaseMemObject failed.")) + return SDK_FAILURE; + + status = clReleaseMemObject(outputBuffer); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clReleaseMemObject failed.")) + return SDK_FAILURE; + + status = clReleaseMemObject(rKeyBuffer); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clReleaseMemObject failed.")) + return SDK_FAILURE; + + status = clReleaseMemObject(sBoxBuffer); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clReleaseMemObject failed.")) + return SDK_FAILURE; + + status = clReleaseMemObject(rsBoxBuffer); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clReleaseMemObject failed.")) + return SDK_FAILURE; + + status = clReleaseCommandQueue(commandQueue); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clReleaseCommandQueue failed.")) + return SDK_FAILURE; + + status = clReleaseContext(context); + if(!sampleCommon->checkVal( + status, + CL_SUCCESS, + "clReleaseContext failed.")) + return SDK_FAILURE; + + /* release program resources (input memory etc.) */ + if(input) + free(input); + + if(key) + free(key); + + if(expandedKey) + free(expandedKey); + + if(roundKey) + free(roundKey); + + if(output) + free(output); + + if(verificationOutput) + free(verificationOutput); + + if(devices) + free(devices); + + if(maxWorkItemSizes) + free(maxWorkItemSizes); + + return SDK_SUCCESS; +} + +int +main(int argc, char * argv[]) +{ + AESEncryptDecrypt clAESEncryptDecrypt("OpenCL AES Encrypt Decrypt"); + + if(clAESEncryptDecrypt.initialize()!=SDK_SUCCESS) + return SDK_FAILURE; + if(!clAESEncryptDecrypt.parseCommandLine(argc, argv)) + return SDK_FAILURE; + if(clAESEncryptDecrypt.setup()!=SDK_SUCCESS) + return SDK_FAILURE; + if(clAESEncryptDecrypt.run()!=SDK_SUCCESS) + return SDK_FAILURE; + if(clAESEncryptDecrypt.verifyResults()!=SDK_SUCCESS) + return SDK_FAILURE; + if(clAESEncryptDecrypt.cleanup()!=SDK_SUCCESS) + return SDK_FAILURE; + clAESEncryptDecrypt.printStats(); + + return SDK_SUCCESS; +} + diff --git a/AES/AESEncryptDecrypt.hpp b/AES/AESEncryptDecrypt.hpp new file mode 100644 index 0000000..3092f10 --- /dev/null +++ b/AES/AESEncryptDecrypt.hpp @@ -0,0 +1,370 @@ +/* ============================================================ + +Copyright (c) 2009 Advanced Micro Devices, Inc. All rights reserved. + +Redistribution and use of this material is permitted under the following +conditions: + +Redistributions must retain the above copyright notice and all terms of this +license. + +In no event shall anyone redistributing or accessing or using this material +commence or participate in any arbitration or legal action relating to this +material against Advanced Micro Devices, Inc. or any copyright holders or +contributors. The foregoing shall survive any expiration or termination of +this license or any agreement or access or use related to this material. + +ANY BREACH OF ANY TERM OF THIS LICENSE SHALL RESULT IN THE IMMEDIATE REVOCATION +OF ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE THIS MATERIAL. + +THIS MATERIAL IS PROVIDED BY ADVANCED MICRO DEVICES, INC. AND ANY COPYRIGHT +HOLDERS AND CONTRIBUTORS "AS IS" IN ITS CURRENT CONDITION AND WITHOUT ANY +REPRESENTATIONS, GUARANTEE, OR WARRANTY OF ANY KIND OR IN ANY WAY RELATED TO +SUPPORT, INDEMNITY, ERROR FREE OR UNINTERRUPTED OPERA TION, OR THAT IT IS FREE +FROM DEFECTS OR VIRUSES. ALL OBLIGATIONS ARE HEREBY DISCLAIMED - WHETHER +EXPRESS, IMPLIED, OR STATUTORY - INCLUDING, BUT NOT LIMITED TO, ANY IMPLIED +WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, +ACCURACY, COMPLETENESS, OPERABILITY, QUALITY OF SERVICE, OR NON-INFRINGEMENT. +IN NO EVENT SHALL ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, PUNITIVE, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, REVENUE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED OR BASED ON ANY THEORY OF LIABILITY +ARISING IN ANY WAY RELATED TO THIS MATERIAL, EVEN IF ADVISED OF THE POSSIBILITY +OF SUCH DAMAGE. THE ENTIRE AND AGGREGATE LIABILITY OF ADVANCED MICRO DEVICES, +INC. AND ANY COPYRIGHT HOLDERS AND CONTRIBUTORS SHALL NOT EXCEED TEN DOLLARS +(US $10.00). ANYONE REDISTRIBUTING OR ACCESSING OR USING THIS MATERIAL ACCEPTS +THIS ALLOCATION OF RISK AND AGREES TO RELEASE ADVANCED MICRO DEVICES, INC. AND +ANY COPYRIGHT HOLDERS AND CONTRIBUTORS FROM ANY AND ALL LIABILITIES, +OBLIGATIONS, CLAIMS, OR DEMANDS IN EXCESS OF TEN DOLLARS (US $10.00). THE +FOREGOING ARE ESSENTIAL TERMS OF THIS LICENSE AND, IF ANY OF THESE TERMS ARE +CONSTRUED AS UNENFORCEABLE, FAIL IN ESSENTIAL PURPOSE, OR BECOME VOID OR +DETRIMENTAL TO ADVANCED MICRO DEVICES, INC. OR ANY COPYRIGHT HOLDERS OR +CONTRIBUTORS FOR ANY REASON, THEN ALL RIGHTS TO REDISTRIBUTE, ACCESS OR USE +THIS MATERIAL SHALL TERMINATE IMMEDIATELY. MOREOVER, THE FOREGOING SHALL +SURVIVE ANY EXPIRATION OR TERMINATION OF THIS LICENSE OR ANY AGREEMENT OR +ACCESS OR USE RELATED TO THIS MATERIAL. + +NOTICE IS HEREBY PROVIDED, AND BY REDISTRIBUTING OR ACCESSING OR USING THIS +MATERIAL SUCH NOTICE IS ACKNOWLEDGED, THAT THIS MATERIAL MAY BE SUBJECT TO +RESTRICTIONS UNDER THE LAWS AND REGULATIONS OF THE UNITED STATES OR OTHER +COUNTRIES, WHICH INCLUDE BUT ARE NOT LIMITED TO, U.S. EXPORT CONTROL LAWS SUCH +AS THE EXPORT ADMINISTRATION REGULATIONS AND NATIONAL SECURITY CONTROLS AS +DEFINED THEREUNDER, AS WELL AS STATE DEPARTMENT CONTROLS UNDER THE U.S. +MUNITIONS LIST. THIS MATERIAL MAY NOT BE USED, RELEASED, TRANSFERRED, IMPORTED, +EXPORTED AND/OR RE-EXPORTED IN ANY MANNER PROHIBITED UNDER ANY APPLICABLE LAWS, +INCLUDING U.S. EXPORT CONTROL LAWS REGARDING SPECIFICALLY DESIGNATED PERSONS, +COUNTRIES AND NATIONALS OF COUNTRIES SUBJECT TO NATIONAL SECURITY CONTROLS. +MOREOVER, THE FOREGOING SHALL SURVIVE ANY EXPIRATION OR TERMINATION OF ANY +LICENSE OR AGREEMENT OR ACCESS OR USE RELATED TO THIS MATERIAL. + +NOTICE REGARDING THE U.S. GOVERNMENT AND DOD AGENCIES: This material is +provided with "RESTRICTED RIGHTS" and/or "LIMITED RIGHTS" as applicable to +computer software and technical data, respectively. Use, duplication, +distribution or disclosure by the U.S. Government and/or DOD agencies is +subject to the full extent of restrictions in all applicable regulations, +including those found at FAR52.227 and DFARS252.227 et seq. and any successor +regulations thereof. Use of this material by the U.S. Government and/or DOD +agencies is acknowledgment of the proprietary rights of any copyright holders +and contributors, including those of Advanced Micro Devices, Inc., as well as +the provisions of FAR52.227-14 through 23 regarding privately developed and/or +commercial computer software. + +This license forms the entire agreement regarding the subject matter hereof and +supersedes all proposals and prior discussions and writings between the parties +with respect thereto. This license does not affect any ownership, rights, title, +or interest in, or relating to, this material. No terms of this license can be +modified or waived, and no breach of this license can be excused, unless done +so in a writing signed by all affected parties. Each term of this license is +separately enforceable. If any term of this license is determined to be or +becomes unenforceable or illegal, such term shall be reformed to the minimum +extent necessary in order for this license to remain in effect in accordance +with its terms as modified by such reformation. This license shall be governed +by and construed in accordance with the laws of the State of Texas without +regard to rules on conflicts of law of any state or jurisdiction or the United +Nations Convention on the International Sale of Goods. All disputes arising out +of this license shall be subject to the jurisdiction of the federal and state +courts in Austin, Texas, and all defenses are hereby waived concerning personal +jurisdiction and venue of these courts. + +============================================================ */ + + +#ifndef AESENCRYPTDECRYPT_H_ +#define AESENCRYPTDECRYPT_H_ + +#include <CL/cl.h> +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include <string.h> +#include <SDKUtil/SDKCommon.hpp> +#include <SDKUtil/SDKApplication.hpp> +#include <SDKUtil/SDKCommandArgs.hpp> +#include <SDKUtil/SDKFile.hpp> +#include <SDKUtil/SDKBitMap.hpp> + +using namespace streamsdk; +/** + * AESEncryptDecrypt + * Class implements OpenCL AESEncryptDecrypt sample + * Derived from SDKSample base class + */ + +namespace AES +{ + cl_uchar sbox[256] = + { 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76 //0 + , 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0 //1 + , 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15 //2 + , 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75 //3 + , 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84 //4 + , 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf //5 + , 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8 //6 + , 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2 //7 + , 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73 //8 + , 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb //9 + , 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79 //A + , 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08 //B + , 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a //C + , 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e //D + , 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf //E + , 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16};//F + //0 1 2 3 4 5 6 7 8 9 A B C D E F + + + cl_uchar rsbox[256] = + { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb + , 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb + , 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e + , 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25 + , 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92 + , 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84 + , 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06 + , 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b + , 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73 + , 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e + , 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b + , 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4 + , 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f + , 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef + , 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61 + , 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d}; + + + cl_uchar Rcon[255] = + { 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a + , 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39 + , 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a + , 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8 + , 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef + , 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc + , 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b + , 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3 + , 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94 + , 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20 + , 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35 + , 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd, 0x61, 0xc2, 0x9f + , 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb, 0x8d, 0x01, 0x02, 0x04 + , 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, 0x2f, 0x5e, 0xbc, 0x63 + , 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91, 0x39, 0x72, 0xe4, 0xd3, 0xbd + , 0x61, 0xc2, 0x9f, 0x25, 0x4a, 0x94, 0x33, 0x66, 0xcc, 0x83, 0x1d, 0x3a, 0x74, 0xe8, 0xcb }; + + + class AESEncryptDecrypt : public SDKSample + { + cl_uint seed;/**< Seed value for random number generation */ + cl_double setupTime;/**< Time for opencl setup */ + cl_double totalKernelTime;/**< Time for kernel execution */ + cl_double totalProgramTime;/**< Time for program execution */ + cl_double referenceKernelTime;/**< Time for reference implementation */ + cl_uchar *input; /**< Input array */ + cl_uchar *key; /**< Encryption Key */ + cl_uchar *expandedKey; /**< Encryption Key after expanding*/ + cl_uchar *roundKey; /**< Encryption Key after expanding rounded*/ + cl_uchar *output; /**< Output array */ + cl_uchar *verificationOutput;/**< Output array for reference implementation */ + cl_context context; /**< CL context */ + cl_device_id *devices; /**< CL device list */ + cl_mem inputBuffer; /**< CL memory input buffer */ + cl_mem outputBuffer; /**< CL memory output buffer */ + cl_mem rKeyBuffer; + cl_mem sBoxBuffer; + cl_mem rsBoxBuffer; + cl_command_queue commandQueue; /**< CL command queue */ + cl_program program; /**< CL program */ + cl_kernel kernel; /**< CL kernel */ + cl_bool decrypt; + + std::string inFilename; /**< filename of the input image */ + std::string outFilename; /**< filename of the input image */ + uchar4 *pixels; + cl_uint keySizeBits; + cl_uint keySize; + cl_uint explandedKeySize; + cl_uint rounds; + cl_int width; + cl_int height; + SDKBitMap image; + + size_t maxWorkGroupSize; /**< Device Specific Information */ + cl_uint maxDimensions; + size_t * maxWorkItemSizes; + size_t kernelWorkGroupSize; /**< Group Size returned by kernel */ + cl_ulong totalLocalMemory; + cl_ulong usedLocalMemory; + cl_ulong availableLocalMemory; + cl_ulong neededLocalMemory; + int iterations; /**< Number of iterations for kernel execution */ + + + private: + void convertColorToGray(const uchar4 *pixels, cl_uchar *gray); + void convertGrayToGray(const uchar4 *pixels, cl_uchar *gray); + void convertGrayToPixels(const cl_uchar *gray, uchar4 *pixels); + + public: + /** + * Constructor + * Initialize member variables + * @param name name of sample (string) + */ + AESEncryptDecrypt(std::string name) + : SDKSample(name) { + seed = 123; + input = NULL; + output = NULL; + key = NULL; + verificationOutput = NULL; + inFilename = "input512.bmp"; + outFilename = "output.bmp"; + decrypt = false; + keySizeBits = 128; + rounds = 10; + setupTime = 0; + totalKernelTime = 0; + iterations = 1; + } + + /** + * Constructor + * Initialize member variables + * @param name name of sample (const char*) + */ + AESEncryptDecrypt(const char* name) + : SDKSample(name) { + seed = 123; + input = NULL; + output = NULL; + key = NULL; + verificationOutput = NULL; + inFilename = "input512.bmp"; + outFilename = "output.bmp"; + decrypt = false; + keySizeBits = 128; + rounds = 10; + setupTime = 0; + totalKernelTime = 0; + iterations = 1; + } + + /** + * Allocate and initialize host memory array with random values + * @return 1 on success and 0 on failure + */ + int setupAESEncryptDecrypt(); + + /** + * OpenCL related initialisations. + * Set up Context, Device list, Command Queue, Memory buffers + * Build CL kernel program executable + * @return 1 on success and 0 on failure + */ + int setupCL(); + + /** + * Set values for kernels' arguments, enqueue calls to the kernels + * on to the command queue, wait till end of kernel execution. + * Get kernel start and end time if timing is enabled + * @return 1 on success and 0 on failure + */ + int runCLKernels(); + + /** + * TODO: + */ + /* common functions for both encryption and decryption */ + void mixColumns(cl_uchar * state, cl_bool inverse); + void subBytes(cl_uchar * state, cl_bool inverse); + void addRoundKey(cl_uchar * state, cl_uchar * roundKey); + void shiftRows(cl_uchar * state, cl_bool inverse); + cl_uchar galoisMultiplication(cl_uchar a, cl_uchar b); + + /* encryption specific*/ + void aesMain(cl_uchar * state, cl_uchar * expandedKey, cl_uint rounds); + void aesRound(cl_uchar * state, cl_uchar * roundKey); + void mixColumn(cl_uchar *column); + void shiftRow(cl_uchar * state, cl_uchar nbr); + cl_uchar getSBoxValue(cl_uint num); + + /* decryption specific */ + void aesMainInv(cl_uchar * state, cl_uchar * expandedKey, cl_uint rounds); + void aesRoundInv(cl_uchar * state, cl_uchar * roundKey); + void mixColumnInv(cl_uchar *column); + void shiftRowInv(cl_uchar * state, cl_uchar nbr); + cl_uchar getSBoxInvert(cl_uint num); + + /* key generation specific functions */ + void createRoundKey(cl_uchar * expandedKey, cl_uchar * roundKey); + cl_uchar getRconValue(cl_uint num); + void rotate(cl_uchar * word); + void core(cl_uchar * word, cl_uint iter); + void keyExpansion(cl_uchar * key, cl_uchar * expandedKey, + cl_uint keySize, cl_uint explandedKeySize); + + void AESEncryptDecryptCPUReference(cl_uchar * output, + cl_uchar * input , + cl_uchar * key , + cl_uint keySize , + cl_uint width , + cl_uint height , + cl_bool inverse ); + /** + * Override from SDKSample. Print sample stats. + */ + void printStats(); + + /** + * Override from SDKSample. Initialize + * command line parser, add custom options + */ + int initialize(); + + /** + * Override from SDKSample, adjust width and height + * of execution domain, perform all sample setup + */ + int setup(); + + /** + * Override from SDKSample + * Run OpenCL Bitonic Sort + */ + int run(); + + /** + * Override from SDKSample + * Cleanup memory allocations + */ + int cleanup(); + + /** + * Override from SDKSample + * Verify against reference implementation + */ + int verifyResults(); + }; +}//namespace AES + +#endif + diff --git a/OpenCL.sln b/OpenCL.sln new file mode 100644 index 0000000..c6da8ff --- /dev/null +++ b/OpenCL.sln @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 11.00 +# Visual Studio 2010 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "AES", "AES\AES.vcxproj", "{3FE30046-FDCA-46D3-82DC-2CDC29964EC0}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Release|Win32 = Release|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {3FE30046-FDCA-46D3-82DC-2CDC29964EC0}.Debug|Win32.ActiveCfg = Debug|Win32 + {3FE30046-FDCA-46D3-82DC-2CDC29964EC0}.Debug|Win32.Build.0 = Debug|Win32 + {3FE30046-FDCA-46D3-82DC-2CDC29964EC0}.Release|Win32.ActiveCfg = Release|Win32 + {3FE30046-FDCA-46D3-82DC-2CDC29964EC0}.Release|Win32.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal