diff --git a/CHANGES b/CHANGES index 8062d9fafa07996910504c720889a1a751b8c757..95e6786fd06192998ca19eb3ca8c819d467df8de 100644 --- a/CHANGES +++ b/CHANGES @@ -1,10 +1,12 @@ + +Change logs have been moved to doc/notes*.rst + 4.1 Unreleased ================= Notable User Facing Changes --------------------------- - Added support for Coarse-Grained buffer SVM on CUDA devices. - Notable Fixes ------------- diff --git a/CMakeLists.txt b/CMakeLists.txt index ad0b95b24ee25cfcaf73d8b1d7fc214500e146e7..b407f19efef4da0015a3b1425df8378cb23a2106 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1424,8 +1424,8 @@ if(ENABLE_ALMAIF_DEVICE) set(XRT $ENV{XILINX_XRT}) if(NOT XRT_INCLUDEDIR) - if(EXISTS "${XRT}/include/xrt") - set(XRT_INCLUDEDIR "${XRT}/include/xrt" CACHE PATH "XRT include dir") + if(EXISTS "${XRT}/include") + set(XRT_INCLUDEDIR "${XRT}/include" CACHE PATH "XRT include dir") else() message(FATAL_ERROR "please provide -DXRT_INCLUDEDIR=... to CMake") endif() diff --git a/doc/notes_5_0.rst b/doc/notes_5_0.rst index 7a5e2d5171ec5962cf2be2f230f845fc24de1f7d..7606f17fc6fe8c84e859e6446756d5dc7394b1f2 100644 --- a/doc/notes_5_0.rst +++ b/doc/notes_5_0.rst @@ -37,6 +37,20 @@ CUDA driver has gained some new features: never been tested properly and will be removed in the next release. SPIR-V remains the supported option. + +* AlmaIF: Add DBDevice backend, which can be used to transparently + reconfigure FPGAs from different vendors using a database of bitstreams. + The database with the bitstreams is generated by AFOCL project + (github.com/cpc/AFOCL). See a following publication for more info: + Topi Leppänen, Joonas Multanen, Leevi Leppänen, Pekka Jääskeläinen: + "AFOCL: Portable OpenCL Programming of FPGAs via Automated + Built-in Kernel Management", + 2023 IEEE Nordic Circuits and Systems Conference (NorCAS), + Aalborg, Denmark, 2023, pp. 1-7, + doi: 10.1109/NorCAS58970.2023.10305457 + + + ================ Acknowledgements ================ diff --git a/doc/sphinx/source/almaif.rst b/doc/sphinx/source/almaif.rst index 933dd3f16f58422387374488020aa8b9eee0a767..ea54fb35adb476561c3aff744dd12e2d6b455fda 100644 --- a/doc/sphinx/source/almaif.rst +++ b/doc/sphinx/source/almaif.rst @@ -179,7 +179,7 @@ First, set CMAKE variable VIVADO_PATH to point to the directory with the you can set ENABLE_TCE to 1 to enable RTL and firmware generation of various OpenASIP TTA cores with different memory configurations. Then, you can simulate them with ttasim instruction set simulator by running -``LLVM=1 ../tools/scripts/run_almaif_tests`` from the build directory. +``../tools/scripts/run_almaif_tests`` from the build directory. 2. If you have Vitis HLS installed, set VITIS_HLS_PATH to point to the directory with the vitis_hls executable. @@ -249,8 +249,40 @@ Example usage of the mode can be found in examples/accel/CMakelists.txt, which generates standalone tests using both ttasim and RTL simulator (ghdl) to run the example0 kernel on various TTA configurations. -Wrapping new hardware component -------------------------------- + +Using a bitstream database +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can use the AlmaIF-driver with the cross-vendor bitstream databases generated +with the `AFOCL-project <http://github.com/cpc/AFOCL>`_. +That project generates a directory-based database with a json-based metadata. +The database contains the bitstreams and firmware-files necessary to implement +the set of built-in kernels defined in the json-file. + +The bistream database device will report all the built-in kernel implementations it can +find from the database in clGetDeviceInfo's CL_DEVICE_BUILT_IN_KERNELS-query. +The bitstream database device ("0xF") will automatically fetch bitstream from the database +and reconfigure the FPGA when user enqueues a built-in kernel for execution. +Therefore, the user does not need to handle the bitstream binaries themselves, +since the OpenCL implementation reconfigures the FPGA behind-the-scenes. + +To use AFOCL-databases in PoCL, it is enough to point the Almaif-driver to the database +with the env variable:: + + POCL_DEVICES=almaif POCL_ALMAIF0_PARAMETERS=0xF,<path/to/afocl-db> ./accel_example + +At the moment, the public AlmaIF-driver and AFOCL include support only for +Xilinx Alveo U280 device, but adding support for other Alveo devices should be easy. +In the AFOCL publication the methodology was also demonstrated with Intel Arria 10, +but the code for that is not yet upstreamed. The driver is built to hide the +vendor-specific details from the end user, with different AlmaIFDevice backends +taking care of vendor-specific details. +For more information about the bitstream database, +see our :ref:`AFOCL-publication (2023) <publications>`. + + +Wrapping a new hardware component +--------------------------------- This section will walk through the addition of new implementation for an existing built-in kernel. @@ -310,8 +342,19 @@ in your academic work, please cite the following publication:: AUTHOR = {Topi Leppänen and Atro Lotvonen and Panagiotis Mousouliotis and Joonas Multanen and Georgios Keramidas and Pekka Jääskeläinen}, } +.. _publications: + The other relevant publications:: + @ARTICLE{afocl2023, + AUTHOR={Leppänen, Topi and Multanen, Joonas and Leppänen, Leevi and Jääskeläinen, Pekka}, + TITLE={{AFOCL}: Portable {OpenCL} Programming of {FPGAs} via Automated Built-in Kernel Management}, + BOOKTITLE={2023 IEEE Nordic Circuits and Systems Conference ({NorCAS})}, + YEAR={2023}, + PAGES={1-7}, + DOI={10.1109/NorCAS58970.2023.10305457} + } + @ARTICLE{leppanen2022, AUTHOR={Leppänen, Topi and Lotvonen, Atro and Jääskeläinen, Pekka}, TITLE={Cross-vendor programming abstraction for diverse heterogeneous platforms}, diff --git a/examples/accel/CMakeLists.txt b/examples/accel/CMakeLists.txt index e138a0b0ff047807a1fd8dc7887b110e702a94a4..e6c7299f617e80b1ccb7afc1578614306319afd1 100644 --- a/examples/accel/CMakeLists.txt +++ b/examples/accel/CMakeLists.txt @@ -138,7 +138,7 @@ endif () bitstreams/${core_name}.bit COMMAND cp vivado_${core_name}_1/vivado_${core_name}_1.gen/sources_1/bd/toplevel/hw_handoff/toplevel.hwh bitstreams/${core_name}.hwh - DEPENDS ${ADF} ${CMAKE_CURRENT_SOURCE_DIR}/generate_project.tcl) + DEPENDS ${core_name}_rtl ${CMAKE_CURRENT_SOURCE_DIR}/generate_project.tcl) add_custom_target(${core_name}_bs DEPENDS bitstreams/${core_name}.bit) add_dependencies(bitstreams ${core_name}_bs) diff --git a/examples/accel/firmware.c b/examples/accel/firmware.c index 5b6246829c65535ab8c4e009d1ef9f4cb4ec1fef..69c5577d84f9651fae02dfb5ba952f8930a85a0f 100644 --- a/examples/accel/firmware.c +++ b/examples/accel/firmware.c @@ -54,6 +54,8 @@ #define __cq__ __attribute__ ((address_space (5))) #define __buffer__ __attribute__ ((address_space (1))) +// NOTE: This enum contains highly experimental built-in kernel IDs, that are +// subject to change in future PoCL releases without any deprecation period. enum BuiltinKernelId : uint16_t { // CD = custom device, BI = built-in @@ -77,7 +79,14 @@ enum BuiltinKernelId : uint16_t POCL_CDBI_MUL_I16 = 15, POCL_CDBI_STREAMOUT_I32 = 16, POCL_CDBI_STREAMIN_I32 = 17, - POCL_CDBI_LAST = 18, + POCL_CDBI_VOTE_U32 = 18, + POCL_CDBI_VOTE_U8 = 19, + POCL_CDBI_DNN_CONV2D_NCHW_F32 = 20, + POCL_CDBI_OPENVX_SCALEIMAGE_NN_U8 = 21, + POCL_CDBI_OPENVX_SCALEIMAGE_BL_U8 = 22, + POCL_CDBI_OPENVX_TENSORCONVERTDEPTH_WRAP_U8_F32 = 23, + POCL_CDBI_OPENVX_MINMAXLOC_R1_U8 = 24, + POCL_CDBI_LAST = 25, POCL_CDBI_JIT_COMPILER = 0xFFFF }; @@ -224,10 +233,6 @@ main () #endif uint32_t kernel_id = packet->kernel_object_low; - if (kernel_id > POCL_CDBI_MUL_I32) - { - continue; - } __buffer__ uint32_t *kernarg_ptr = (__buffer__ uint32_t *)(packet->kernarg_address_low); @@ -235,25 +240,70 @@ main () __buffer__ uint32_t *arg0 = (__buffer__ uint32_t *)kernarg_ptr[0]; __buffer__ uint32_t *arg1 = (__buffer__ uint32_t *)kernarg_ptr[1]; __buffer__ uint32_t *arg2 = (__buffer__ uint32_t *)kernarg_ptr[2]; - - uint32_t dim_x = packet->grid_size_x; - - for (int idx = 0; idx < dim_x; idx++) + __buffer__ uint32_t *arg3 = (__buffer__ uint32_t *)kernarg_ptr[3]; + __buffer__ uint32_t *arg4 = (__buffer__ uint32_t *)kernarg_ptr[4]; + + // Check how many dimensions are in use, and set the unused ones + // to 1. + int dim_x = packet->grid_size_x; + int dim_y = (packet->dimensions >= 2) ? (packet->grid_size_y) : 1; + int dim_z = (packet->dimensions == 3) ? (packet->grid_size_z) : 1; + + uint8_t min = 255; + uint8_t max = 0; + uint32_t minlocx, minlocy, maxlocx, maxlocy; + for (int z = 0; z < dim_z; z++) { - // Do the operation based on the kernel_object (integer id) - switch (kernel_id) + for (int y = 0; y < dim_y; y++) { - case (POCL_CDBI_COPY_I8): - arg1[idx] = arg0[idx]; - break; - case (POCL_CDBI_ADD_I32): - arg2[idx] = arg0[idx] + arg1[idx]; - break; - case (POCL_CDBI_MUL_I32): - arg2[idx] = arg0[idx] * arg1[idx]; - break; + for (int x = 0; x < dim_x; x++) + { + // Linearize grid + int idx = z * dim_y * dim_x + dim_x * y + x; + // Do the operation based on the kernel_object (integer + // id) + switch (kernel_id) + { + case (POCL_CDBI_COPY_I8): + arg1[idx] = arg0[idx]; + break; + case (POCL_CDBI_ADD_I32): + arg2[idx] = arg0[idx] + arg1[idx]; + break; + case (POCL_CDBI_MUL_I32): + arg2[idx] = arg0[idx] * arg1[idx]; + break; + case (POCL_CDBI_OPENVX_MINMAXLOC_R1_U8): + { + uint8_t pixel = ((__buffer__ uint8_t *)arg0)[idx]; + if (pixel < min) + { + min = pixel; + minlocx = x; + minlocy = y; + } + if (pixel > max) + { + max = pixel; + maxlocx = x; + maxlocy = y; + } + } + break; + } + } } } + + if (kernel_id == POCL_CDBI_OPENVX_MINMAXLOC_R1_U8) + { + arg1[0] = min; + arg2[0] = max; + arg3[0] = minlocx; + arg3[1] = minlocy; + arg4[0] = maxlocx; + arg4[1] = maxlocy; + } #ifdef BASE_ADDRESS cc_l = control_region[ALMAIF_STATUS_REG_CC_LOW / 4]; // cc_h = control_region[ALMAIF_STATUS_REG_CC_HIGH/4]; diff --git a/lib/CL/devices/almaif/AlmaIFDevice.cc b/lib/CL/devices/almaif/AlmaIFDevice.cc index 3d0f7e268638fb389acb3c99ab49d99735ab4f5e..183bf1520e5dfb9dc8a30a0f713716402f9cac45 100644 --- a/lib/CL/devices/almaif/AlmaIFDevice.cc +++ b/lib/CL/devices/almaif/AlmaIFDevice.cc @@ -42,6 +42,7 @@ AlmaIFDevice::~AlmaIFDevice() { delete InstructionMemory; delete CQMemory; delete DataMemory; + delete ExternalMemory; memory_region_t *el, *tmp; LL_FOREACH_SAFE(AllocRegions, el, tmp) { free(el); } } @@ -67,25 +68,25 @@ void AlmaIFDevice::discoverDeviceParameters() { BaseAddress + 3*segment_size + Dmem_size - PRIVATE_MEM_SIZE --> Local scratchpad memory for stack etc Where segment_size = 0x10000 (size of imem) */ - imem_size = ControlMemory->Read32(ALMAIF_INFO_IMEM_SIZE_LEGACY); - // cq_size = ControlMemory->Read32(ALMAIF_INFO_PMEM_SIZE_LEGACY); - cq_size = 4 * 64; - // dmem_size = ControlMemory->Read32(ALMAIF_INFO_PMEM_SIZE_LEGACY); + ImemSize = ControlMemory->Read32(ALMAIF_INFO_IMEM_SIZE_LEGACY); + // CQSize = ControlMemory->Read32(ALMAIF_INFO_PMEM_SIZE_LEGACY); + CQSize = 4 * 64; + // DmemSize = ControlMemory->Read32(ALMAIF_INFO_PMEM_SIZE_LEGACY); int private_mem_size = pocl_get_int_option("POCL_ALMAIF_PRIVATE_MEM_SIZE", ALMAIF_DEFAULT_PRIVATE_MEM_SIZE); - dmem_size = ControlMemory->Read32(ALMAIF_INFO_PMEM_SIZE_LEGACY) - - private_mem_size - cq_size; + DmemSize = ControlMemory->Read32(ALMAIF_INFO_PMEM_SIZE_LEGACY) - + private_mem_size - CQSize; PointerSize = 4; RelativeAddressing = false; - uint32_t segment_size = imem_size; - imem_start = segment_size; - dmem_start = 3 * segment_size; - cq_start = dmem_start + dmem_size; - cq_start += ControlMemory->PhysAddress; - imem_start += ControlMemory->PhysAddress; - dmem_start += ControlMemory->PhysAddress; + uint32_t segment_size = ImemSize; + ImemStart = segment_size; + DmemStart = 3 * segment_size; + CQStart = DmemStart + DmemSize; + CQStart += ControlMemory->PhysAddress(); + ImemStart += ControlMemory->PhysAddress(); + DmemStart += ControlMemory->PhysAddress(); } else if (interface_version == ALMAIF_VERSION_3) { uint64_t feature_flags = ControlMemory->Read64(ALMAIF_INFO_FEATURE_FLAGS_LOW); @@ -95,123 +96,123 @@ void AlmaIFDevice::discoverDeviceParameters() { RelativeAddressing = (feature_flags & ALMAIF_FF_BIT_AXI_MASTER) ? (false) : (true); - imem_size = ControlMemory->Read32(ALMAIF_INFO_IMEM_SIZE); - cq_size = ControlMemory->Read32(ALMAIF_INFO_CQMEM_SIZE_LOW); - dmem_size = ControlMemory->Read32(ALMAIF_INFO_DMEM_SIZE_LOW); + ImemSize = ControlMemory->Read32(ALMAIF_INFO_IMEM_SIZE); + CQSize = ControlMemory->Read32(ALMAIF_INFO_CQMEM_SIZE_LOW); + DmemSize = ControlMemory->Read32(ALMAIF_INFO_DMEM_SIZE_LOW); - imem_start = ControlMemory->Read64(ALMAIF_INFO_IMEM_START_LOW); - cq_start = ControlMemory->Read64(ALMAIF_INFO_CQMEM_START_LOW); - dmem_start = ControlMemory->Read64(ALMAIF_INFO_DMEM_START_LOW); + ImemStart = ControlMemory->Read64(ALMAIF_INFO_IMEM_START_LOW); + CQStart = ControlMemory->Read64(ALMAIF_INFO_CQMEM_START_LOW); + DmemStart = ControlMemory->Read64(ALMAIF_INFO_DMEM_START_LOW); if (RelativeAddressing) { POCL_MSG_PRINT_ALMAIF("Almaif: Enabled relative addressing\n"); - cq_start += ControlMemory->PhysAddress; - imem_start += ControlMemory->PhysAddress; - dmem_start += ControlMemory->PhysAddress; + CQStart += ControlMemory->PhysAddress(); + ImemStart += ControlMemory->PhysAddress(); + DmemStart += ControlMemory->PhysAddress(); } } else { POCL_ABORT_UNIMPLEMENTED("Unsupported AlmaIF version\n"); } - POCL_MSG_PRINT_ALMAIF("cq_start=%p imem_start=%p dmem_start=%p\n", - (void *)cq_start, (void *)imem_start, - (void *)dmem_start); - POCL_MSG_PRINT_ALMAIF("cq_size=%u imem_size=%u dmem_size=%u\n", cq_size, - imem_size, dmem_size); + POCL_MSG_PRINT_ALMAIF("CQStart=%p ImemStart=%p DmemStart=%p\n", + (void *)CQStart, (void *)ImemStart, (void *)DmemStart); + POCL_MSG_PRINT_ALMAIF("CQSize=%u ImemSize=%u DmemSize=%u\n", CQSize, ImemSize, + DmemSize); POCL_MSG_PRINT_ALMAIF("ControlMemory->PhysAddress=%zu\n", - ControlMemory->PhysAddress); + ControlMemory->PhysAddress()); AllocRegions = (memory_region_t *)calloc(1, sizeof(memory_region_t)); pocl_init_mem_region(AllocRegions, - dmem_start + ALMAIF_DEFAULT_CONSTANT_MEM_SIZE, - dmem_size - ALMAIF_DEFAULT_CONSTANT_MEM_SIZE); + DmemStart + ALMAIF_DEFAULT_CONSTANT_MEM_SIZE, + DmemSize - ALMAIF_DEFAULT_CONSTANT_MEM_SIZE); POCL_MSG_PRINT_ALMAIF( "Reserved %d bytes at the start of global memory for constant data\n", ALMAIF_DEFAULT_CONSTANT_MEM_SIZE); } -void AlmaIFDevice::loadProgramToDevice(almaif_kernel_data_s *kd, - cl_kernel kernel, - _cl_command_node *cmd) { - assert(kd); +void AlmaIFDevice::loadProgramToDevice(almaif_kernel_data_s *KernelData, + cl_kernel Kernel, + _cl_command_node *Command) { + assert(KernelData); - if (kd->imem_img_size == 0) { + if (KernelData->imem_img_size == 0) { char img_file[POCL_MAX_PATHNAME_LENGTH]; char cachedir[POCL_MAX_PATHNAME_LENGTH]; // first try specialized - pocl_cache_kernel_cachedir_path(img_file, kernel->program, - cmd->program_device_i, kernel, - "/parallel.img", cmd, 1); + pocl_cache_kernel_cachedir_path(img_file, Kernel->program, + Command->program_device_i, Kernel, + "/parallel.img", Command, 1); if (pocl_exists(img_file)) { - pocl_cache_kernel_cachedir_path( - cachedir, kernel->program, cmd->program_device_i, kernel, "", cmd, 1); - preread_images(cachedir, kd); + pocl_cache_kernel_cachedir_path(cachedir, Kernel->program, + Command->program_device_i, Kernel, "", + Command, 1); + prereadImages(cachedir, KernelData); } else { // if it doesn't exist, try specialized with local sizes 0-0-0 // should pick either 0-0-0 or 0-0-0-goffs0 _cl_command_node cmd_copy; - memcpy(&cmd_copy, cmd, sizeof(_cl_command_node)); + memcpy(&cmd_copy, Command, sizeof(_cl_command_node)); cmd_copy.command.run.pc.local_size[0] = 0; cmd_copy.command.run.pc.local_size[1] = 0; cmd_copy.command.run.pc.local_size[2] = 0; - pocl_cache_kernel_cachedir_path(img_file, kernel->program, - cmd->program_device_i, kernel, + pocl_cache_kernel_cachedir_path(img_file, Kernel->program, + Command->program_device_i, Kernel, "/parallel.img", &cmd_copy, 1); if (pocl_exists(img_file)) { - pocl_cache_kernel_cachedir_path(cachedir, kernel->program, - cmd->program_device_i, kernel, "", + pocl_cache_kernel_cachedir_path(cachedir, Kernel->program, + Command->program_device_i, Kernel, "", &cmd_copy, 1); } else { - pocl_cache_kernel_cachedir_path(cachedir, kernel->program, - cmd->program_device_i, kernel, "", + pocl_cache_kernel_cachedir_path(cachedir, Kernel->program, + Command->program_device_i, Kernel, "", &cmd_copy, 0); } POCL_MSG_PRINT_ALMAIF("Specialized kernel not found, using %s\n", cachedir); - preread_images(cachedir, kd); + prereadImages(cachedir, KernelData); } } - assert(kd->imem_img_size > 0); + assert(KernelData->imem_img_size > 0); ControlMemory->Write32(ALMAIF_CONTROL_REG_COMMAND, ALMAIF_RESET_CMD); - InstructionMemory->CopyToMMAP(InstructionMemory->PhysAddress, kd->imem_img, - kd->imem_img_size); + InstructionMemory->CopyToMMAP(InstructionMemory->PhysAddress(), + KernelData->imem_img, + KernelData->imem_img_size); POCL_MSG_PRINT_ALMAIF("IMEM image written: %zu / %zu B\n", - InstructionMemory->PhysAddress, kd->imem_img_size); + InstructionMemory->PhysAddress(), + KernelData->imem_img_size); ControlMemory->Write32(ALMAIF_CONTROL_REG_COMMAND, ALMAIF_CONTINUE_CMD); HwClockStart = pocl_gettimemono_ns(); } -void AlmaIFDevice::preread_images(const char *kernel_cachedir, - almaif_kernel_data_s *kd) { +void AlmaIFDevice::prereadImages(const std::string &KernelCacheDir, + almaif_kernel_data_s *KernelData) { POCL_MSG_PRINT_ALMAIF("Reading image files\n"); uint64_t temp = 0; size_t size = 0; char *content = NULL; - char module_fn[POCL_MAX_PATHNAME_LENGTH]; - snprintf(module_fn, POCL_MAX_PATHNAME_LENGTH, "%s/parallel.img", - kernel_cachedir); + std::string module_fn = KernelCacheDir + "/parallel.img"; - if (pocl_exists(module_fn)) { - int res = pocl_read_file(module_fn, &content, &temp); + if (pocl_exists(module_fn.c_str())) { + int res = pocl_read_file(module_fn.c_str(), &content, &temp); size = (size_t)temp; assert(res == 0); assert(size > 0); - assert(size < InstructionMemory->Size); - kd->imem_img = content; - kd->imem_img_size = size; + assert(size < InstructionMemory->Size()); + KernelData->imem_img = content; + KernelData->imem_img_size = size; content = NULL; } else - POCL_ABORT("ALMAIF: %s for this kernel does not exist.\n", module_fn); + POCL_ABORT("ALMAIF: %s for this kernel does not exist.\n", + module_fn.c_str()); - snprintf(module_fn, POCL_MAX_PATHNAME_LENGTH, "%s/kernel_address.txt", - kernel_cachedir); - if (pocl_exists(module_fn)) { - int res = pocl_read_file(module_fn, &content, &temp); + module_fn = KernelCacheDir + "/kernel_address.txt"; + if (pocl_exists(module_fn.c_str())) { + int res = pocl_read_file(module_fn.c_str(), &content, &temp); assert(res == 0); size = (size_t)temp; assert(size > 0); @@ -219,21 +220,22 @@ void AlmaIFDevice::preread_images(const char *kernel_cachedir, uint32_t kernel_address = 0; sscanf(content, "kernel address = %u", &kernel_address); assert(kernel_address != 0); - kd->kernel_address = kernel_address; + KernelData->kernel_address = kernel_address; content = NULL; } else - POCL_ABORT("ALMAIF: %s for this kernel does not exist.\n", module_fn); + POCL_ABORT("ALMAIF: %s for this kernel does not exist.\n", + module_fn.c_str()); /* snprintf(module_fn, POCL_MAX_PATHNAME_LENGTH, "%s/parallel_local.img", - kernel_cachedir); + KernelCacheDir); if (pocl_exists(module_fn)) { int res = pocl_read_file(module_fn, &content, &temp); assert(res == 0); size = (size_t)temp; if (size == 0) POCL_MEM_FREE(content); - kd->dmem_img = content; - kd->dmem_img_size = size; + KernelData->dmem_img = content; + KernelData->dmem_img_size = size; uint32_t kernel_addr = 0; if (size) { @@ -242,7 +244,7 @@ void AlmaIFDevice::preread_images(const char *kernel_cachedir, kernel_addr = *up; } POCL_MSG_PRINT_ALMAIF("Kernel address (%0x) found\n", kernel_addr); - kd->kernel_address = kernel_addr; + KernelData->kernel_address = kernel_addr; content = NULL; } else POCL_ABORT("ALMAIF: %s for this kernel does not exist.\n", module_fn); @@ -250,43 +252,95 @@ void AlmaIFDevice::preread_images(const char *kernel_cachedir, } void AlmaIFDevice::printMemoryDump() { - for (unsigned k = 0; k < CQMemory->Size; k += 4) { + for (unsigned k = 0; k < InstructionMemory->Size(); k += 4) { + uint32_t value = InstructionMemory->Read32(k); + std::cerr << "IMEM at " << k << "=" << value << "\n"; + } + for (unsigned k = 0; k < CQMemory->Size(); k += 4) { uint32_t value = CQMemory->Read32(k); std::cerr << "CQ at " << k << "=" << value << "\n"; } - for (unsigned k = 0; k < DataMemory->Size; k += 4) { + for (unsigned k = 0; k < DataMemory->Size(); k += 4) { uint32_t value = DataMemory->Read32(k); std::cerr << "Data at " << k << "=" << value << "\n"; } std::cerr << std::endl; } -void AlmaIFDevice::writeDataToDevice(size_t dst, - const char *__restrict__ const src, - size_t size) { - if (DataMemory->isInRange(dst)) { - POCL_MSG_PRINT_ALMAIF("almaif: Copying %zu bytes to 0x%zx\n", size, dst); - DataMemory->CopyToMMAP(dst, src, size); - } else if (ExternalMemory && ExternalMemory->isInRange(dst)) { - POCL_MSG_PRINT_ALMAIF("almaif: Copying %zu bytes to external 0x%zx\n", size, - dst); - ExternalMemory->CopyToMMAP(dst, src, size); +void AlmaIFDevice::writeDataToDevice(pocl_mem_identifier *DstMemId, + const char *__restrict__ const Src, + size_t Size, size_t Offset) { + chunk_info_t *chunk = (chunk_info_t *)DstMemId->mem_ptr; + size_t Dst = chunk->start_address + Offset; + + if (DataMemory->isInRange(Dst)) { + POCL_MSG_PRINT_ALMAIF("almaif: Copying %zu bytes to 0x%zx\n", Size, Dst); + DataMemory->CopyToMMAP(Dst, Src, Size); + } else if (ExternalMemory && ExternalMemory->isInRange(Dst)) { + POCL_MSG_PRINT_ALMAIF("almaif: Copying %zu bytes to external 0x%zx\n", Size, + Dst); + ExternalMemory->CopyToMMAP(Dst, Src, Size); } else { - POCL_ABORT("Attempt to write data to outside the device memories\n"); + POCL_ABORT( + "Attempt to write data to outside the device memories. Address=%zu\n", + Dst); } } -void AlmaIFDevice::readDataFromDevice(char *__restrict__ const dst, size_t src, - size_t size) { - if (DataMemory->isInRange(src)) { - POCL_MSG_PRINT_ALMAIF("almaif: Copying %zu bytes from 0x%zx\n", size, src); - DataMemory->CopyFromMMAP(dst, src, size); - } else if (ExternalMemory && ExternalMemory->isInRange(src)) { - POCL_MSG_PRINT_ALMAIF("almaif: Copying 0x%zu bytes from external 0x%zx\n", - size, src); - ExternalMemory->CopyFromMMAP(dst, src, size); +void AlmaIFDevice::readDataFromDevice(char *__restrict__ const Dst, + pocl_mem_identifier *SrcMemId, + size_t Size, size_t Offset) { + + chunk_info_t *chunk = (chunk_info_t *)SrcMemId->mem_ptr; + POCL_MSG_PRINT_ALMAIF("Reading data with chunk start %zu, and offset %zu\n", + chunk->start_address, Offset); + size_t Src = chunk->start_address + Offset; + if (DataMemory->isInRange(Src)) { + POCL_MSG_PRINT_ALMAIF("almaif: Copying %zu bytes from 0x%zx\n", Size, Src); + DataMemory->CopyFromMMAP(Dst, Src, Size); + } else if (ExternalMemory && ExternalMemory->isInRange(Src)) { + POCL_MSG_PRINT_ALMAIF("almaif: Copying %zu bytes from external 0x%zx\n", + Size, Src); + ExternalMemory->CopyFromMMAP(Dst, Src, Size); } else { - POCL_ABORT("Attempt to write data to outside the device memories\n"); + POCL_ABORT( + "Attempt to read data from outside the device memories. Address=%zu\n", + Src); } } + +size_t AlmaIFDevice::pointerDeviceOffset(pocl_mem_identifier *P) { + assert(P->extra == 0); + chunk_info_t *chunk = (chunk_info_t *)P->mem_ptr; + assert(chunk != NULL); + return chunk->start_address; +} + +void AlmaIFDevice::freeBuffer(pocl_mem_identifier *P) { + chunk_info_t *chunk = (chunk_info_t *)P->mem_ptr; + + POCL_MSG_PRINT_MEMORY("almaif: freed buffer from 0x%zx\n", + chunk->start_address); + + assert(chunk != NULL); + pocl_free_chunk(chunk); +} + +cl_int AlmaIFDevice::allocateBuffer(pocl_mem_identifier *P, size_t Size) { + + assert(P->mem_ptr == NULL); + chunk_info_t *chunk = NULL; + + chunk = pocl_alloc_buffer(AllocRegions, Size); + if (chunk == NULL) + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + + POCL_MSG_PRINT_MEMORY("almaif: allocated %zu bytes from 0x%zx\n", Size, + chunk->start_address); + + P->mem_ptr = chunk; + P->version = 0; + P->extra = 0; + return CL_SUCCESS; +} diff --git a/lib/CL/devices/almaif/AlmaIFDevice.hh b/lib/CL/devices/almaif/AlmaIFDevice.hh index ccbfc161d4b99b39cad50f09c432281615ce393f..af17a8188fa6ca290fb268009fdfce30595ee4b3 100644 --- a/lib/CL/devices/almaif/AlmaIFDevice.hh +++ b/lib/CL/devices/almaif/AlmaIFDevice.hh @@ -32,6 +32,7 @@ #include "pocl_types.h" #include <stdlib.h> +#include <string> struct almaif_kernel_data_s; @@ -40,8 +41,8 @@ public: AlmaIFDevice(); virtual ~AlmaIFDevice(); - virtual void loadProgramToDevice(almaif_kernel_data_s *kd, cl_kernel kernel, - _cl_command_node *cmd); + virtual void loadProgramToDevice(almaif_kernel_data_s *KernelData, + cl_kernel Kernel, _cl_command_node *Command); AlmaIFRegion *ControlMemory; AlmaIFRegion *InstructionMemory; @@ -63,22 +64,35 @@ public: void printMemoryDump(); - virtual void writeDataToDevice(size_t dst, const char *__restrict__ const src, - size_t size); - virtual void readDataFromDevice(char *__restrict__ const dst, size_t src, - size_t size); + virtual void writeDataToDevice(pocl_mem_identifier *DstMemId, + const char *__restrict__ const Src, + size_t Size, size_t Offset); + virtual void readDataFromDevice(char *__restrict__ const Dst, + pocl_mem_identifier *SrcMemId, size_t Size, + size_t Offset); -protected: virtual void discoverDeviceParameters(); - uintptr_t imem_start; - uint32_t imem_size; - uintptr_t cq_start; - uint32_t cq_size; - uintptr_t dmem_start; - uint32_t dmem_size; + + virtual bool isDBDevice() { return false; } + + // Allocate buffer from AlmaIFDevice's DataMemory or ExternalMemory + virtual cl_int allocateBuffer(pocl_mem_identifier *P, size_t Size); + // Free buffer from AlmaIFDevice's DataMemory or ExternalMemory + virtual void freeBuffer(pocl_mem_identifier *P); + // Retuns the offset of the allocated buffer, to be used as a kernel argument + virtual size_t pointerDeviceOffset(pocl_mem_identifier *P); + +protected: + uintptr_t ImemStart; + uint32_t ImemSize; + uintptr_t CQStart; + uint32_t CQSize; + uintptr_t DmemStart; + uint32_t DmemSize; private: - void preread_images(const char *kernel_cachedir, almaif_kernel_data_s *kd); + void prereadImages(const std::string &KernelCacheDir, + almaif_kernel_data_s *KernelData); }; #endif diff --git a/lib/CL/devices/almaif/AlmaIFRegion.cc b/lib/CL/devices/almaif/AlmaIFRegion.cc index 79832bda49bcf7a925a5b7d9c896f3fb5b445489..6e197f6c3b86d4967a669e6af522fbff63311c71 100644 --- a/lib/CL/devices/almaif/AlmaIFRegion.cc +++ b/lib/CL/devices/almaif/AlmaIFRegion.cc @@ -27,5 +27,9 @@ AlmaIFRegion::~AlmaIFRegion() {} bool AlmaIFRegion::isInRange(size_t dst) { - return ((dst >= PhysAddress) && (dst < (PhysAddress + Size))); + return ((dst >= PhysAddress_) && (dst < (PhysAddress_ + Size_))); } + +size_t AlmaIFRegion::PhysAddress() { return PhysAddress_; } + +size_t AlmaIFRegion::Size() { return Size_; } diff --git a/lib/CL/devices/almaif/AlmaIFRegion.hh b/lib/CL/devices/almaif/AlmaIFRegion.hh index 7c167709e77e8cc3f44abffe489c4245ce84d7ef..cc0cea825baab97adf8761852c80555b0ea8dbc1 100644 --- a/lib/CL/devices/almaif/AlmaIFRegion.hh +++ b/lib/CL/devices/almaif/AlmaIFRegion.hh @@ -44,9 +44,12 @@ public: virtual void CopyInMem(size_t source, size_t destination, size_t bytes) = 0; virtual bool isInRange(size_t dst); + virtual size_t PhysAddress(); + virtual size_t Size(); - size_t PhysAddress; - size_t Size; +protected: + size_t PhysAddress_; + size_t Size_; }; #endif diff --git a/lib/CL/devices/almaif/AlmaifCompile.cc b/lib/CL/devices/almaif/AlmaifCompile.cc index 9066fc7d9f550ab91782e31771bf6f3ace7dc8f7..a8181b93d21773a59dd691c08c77041e1885fa71 100644 --- a/lib/CL/devices/almaif/AlmaifCompile.cc +++ b/lib/CL/devices/almaif/AlmaifCompile.cc @@ -37,12 +37,13 @@ #endif #ifdef ENABLE_COMPILER -#include "openasip/AlmaifCompileTCE.hh" +#include "openasip/AlmaifCompileOpenasip.hh" #endif extern int pocl_offline_compile; -int pocl_almaif_compile_init(unsigned j, cl_device_id dev, const char *parameters) { +int pocl_almaif_compile_init(unsigned j, cl_device_id dev, + const std::string ¶meters) { AlmaifData *d = (AlmaifData *)dev->data; d->compilationData = (compilation_data_t *)pocl_aligned_malloc( @@ -86,8 +87,7 @@ int pocl_almaif_compile_init(unsigned j, cl_device_id dev, const char *parameter d->compilationData->current_kernel = NULL; SETUP_DEVICE_CL_VERSION(1, 2); - // dev->available = CL_TRUE; - dev->available = pocl_offline_compile ? CL_FALSE : CL_TRUE; + d->Available = pocl_offline_compile ? CL_FALSE : CL_TRUE; dev->compiler_available = true; dev->linker_available = true; @@ -97,12 +97,12 @@ int pocl_almaif_compile_init(unsigned j, cl_device_id dev, const char *parameter #ifdef ENABLE_COMPILER // TODO tce specific - adi->initialize_device = pocl_almaif_tce_initialize; - adi->cleanup_device = pocl_almaif_tce_cleanup; - adi->compile_kernel = pocl_almaif_tce_compile; + adi->initialize_device = pocl_almaif_openasip_initialize; + adi->cleanup_device = pocl_almaif_openasip_cleanup; + adi->compile_kernel = pocl_almaif_openasip_compile; if (pocl_get_bool_option("POCL_ALMAIF_STANDALONE", 0)) { adi->produce_standalone_program = - pocl_almaif_tce_produce_standalone_program; + pocl_almaif_openasip_produce_standalone_program; } // backend specific init POCL_MSG_PRINT_ALMAIF("Starting device specific initializion\n"); @@ -111,8 +111,8 @@ int pocl_almaif_compile_init(unsigned j, cl_device_id dev, const char *parameter POCL_MSG_PRINT_ALMAIF("Device specific initializion done\n"); SHA1_digest_t digest; - pocl_almaif_tce_device_hash(parameters, dev->llvm_target_triplet, - (char *)digest); + pocl_almaif_openasip_device_hash(parameters.c_str(), dev->llvm_target_triplet, + (char *)digest); POCL_MSG_PRINT_ALMAIF("ALMAIF TCE DEVICE HASH=%s", (char *)digest); adi->build_hash = strdup((char *)digest); @@ -134,8 +134,8 @@ int pocl_almaif_compile_init(unsigned j, cl_device_id dev, const char *parameter dev->ops->build_poclbinary = pocl_driver_build_poclbinary; dev->ops->build_binary = pocl_almaif_build_binary; #ifdef ENABLE_COMPILER - dev->ops->compile_kernel = pocl_almaif_tce_compile; - dev->ops->init_build = pocl_tce_init_build; + dev->ops->compile_kernel = pocl_almaif_openasip_compile; + dev->ops->init_build = pocl_almaif_openasip_init_build; #endif return CL_SUCCESS; } diff --git a/lib/CL/devices/almaif/AlmaifCompile.hh b/lib/CL/devices/almaif/AlmaifCompile.hh index bf8c1b85b162fd0f7dadac2ffc0b2003b81a121e..213d2820bb291143fa8004eae6c8bb1b6b6b3354 100644 --- a/lib/CL/devices/almaif/AlmaifCompile.hh +++ b/lib/CL/devices/almaif/AlmaifCompile.hh @@ -47,7 +47,7 @@ typedef struct compilation_data_s { /* device-specific callbacks */ void (*compile_kernel)(_cl_command_node *cmd, cl_kernel kernel, cl_device_id device, int specialize); - int (*initialize_device)(cl_device_id device, const char *parameters); + int (*initialize_device)(cl_device_id device, const std::string ¶meters); int (*cleanup_device)(cl_device_id device); void (*produce_standalone_program)(AlmaifData *D, _cl_command_node *cmd, @@ -70,7 +70,8 @@ typedef struct almaif_kernel_data_s { uint32_t kernel_md_offset; } almaif_kernel_data_t; -int pocl_almaif_compile_init(unsigned j, cl_device_id dev, const char *parameters); +int pocl_almaif_compile_init(unsigned j, cl_device_id dev, + const std::string ¶meters); cl_int pocl_almaif_compile_uninit(unsigned j, cl_device_id dev); extern "C" { @@ -85,8 +86,6 @@ int pocl_almaif_free_kernel(cl_device_id device, cl_program program, int pocl_almaif_build_binary(cl_program program, cl_uint device_i, int link_program, int spir_build); -void preread_images(const char *kernel_cachedir, void *d_void, - almaif_kernel_data_t *kd); char *pocl_almaif_compile_build_hash(cl_device_id device); #endif diff --git a/lib/CL/devices/almaif/AlmaifDB/AlmaIFBitstreamDatabaseManager.cc b/lib/CL/devices/almaif/AlmaifDB/AlmaIFBitstreamDatabaseManager.cc new file mode 100644 index 0000000000000000000000000000000000000000..42c3bec9a0e50c179f93e8462efa624bdc907a58 --- /dev/null +++ b/lib/CL/devices/almaif/AlmaifDB/AlmaIFBitstreamDatabaseManager.cc @@ -0,0 +1,298 @@ +/* AlmaIFBitstreamDatabaseManager.cc - Parses and responds to queries about + AlmaifDB + + Copyright (c) 2023 Topi Leppänen / Tampere University + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal in the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + IN THE SOFTWARE. +*/ + +#include "AlmaIFBitstreamDatabaseManager.hh" + +#include "../AlmaifShared.hh" + +#include "pocl_file_util.h" + +#include <dirent.h> +#include <iostream> +#include <set> + +#include "tiny-json.h" + +void AlmaIFBitstreamDatabaseManager::parseOverlay(json_t const *Overlay, + const std::string &DBPath) { + + json_t const *OverlayName = json_getProperty(Overlay, "name"); + if (!OverlayName || JSON_TEXT != json_getType(OverlayName)) { + POCL_ABORT("Error, the overlay name property is not found."); + } + std::string OverlayNameStr = json_getValue(OverlayName); + POCL_MSG_PRINT_ALMAIF("Overlay Name: %s.\n", OverlayNameStr.c_str()); + + std::string OverlayPath = DBPath + "/" + OverlayNameStr; + + json_t const *PrDevice = json_getProperty(Overlay, "device"); + if (!PrDevice || JSON_TEXT != json_getType(PrDevice)) { + POCL_ABORT("Overlay doesn't have associated device\n"); + } + std::string PrDeviceName = json_getValue(PrDevice); + POCL_MSG_PRINT_ALMAIF("PR device name: %s\n", PrDeviceName.c_str()); + DEVICE_TYPE PrDeviceEnum = string2DeviceTypeEnum(PrDeviceName); + POCL_MSG_PRINT_ALMAIF("PR device enum: %d\n", PrDeviceEnum); + + struct ProgrammingFiles ProgFilesInfo = {0, PrDeviceEnum, "", "", ""}; + + json_t const *OverlayDefaultFilename = json_getProperty(Overlay, "filename"); + if (!OverlayDefaultFilename || + JSON_TEXT != json_getType(OverlayDefaultFilename)) { + POCL_ABORT("Error, the overlay default filename property is not found."); + } + std::string OverlayDefaultFilenameStr = json_getValue(OverlayDefaultFilename); + + std::string OverlayDefaultFilenamePath = + OverlayPath + "/" + OverlayDefaultFilenameStr; + + POCL_MSG_PRINT_ALMAIF("Overlay default filename path: %s.\n", + OverlayDefaultFilenamePath.c_str()); + DefaultFilenamePath_ = OverlayDefaultFilenamePath; + + json_t const *OverlayDefaultKernelName = + json_getProperty(Overlay, "default-kernel"); + if (!OverlayDefaultKernelName || + JSON_TEXT != json_getType(OverlayDefaultKernelName)) { + POCL_ABORT("Error, the overlay default kernel name property is not found."); + } + std::string OverlayDefaultKernelNameStr = + json_getValue(OverlayDefaultKernelName); + DefaultKernelName_ = OverlayDefaultKernelNameStr; + + json_t const *OverlayExternalMemory = + json_getProperty(Overlay, "external-memory"); + if (!OverlayExternalMemory || + JSON_TEXT != json_getType(OverlayExternalMemory)) { + POCL_ABORT("Error, the overlay external-memory property is not found."); + } + DeviceExternalMemParameters_[PrDeviceEnum] = + json_getValue(OverlayExternalMemory); + + json_t const *Accelerators = json_getProperty(Overlay, "accelerators"); + if (!Accelerators || JSON_ARRAY != json_getType(Accelerators)) { + POCL_ABORT("Error, accelerators list not parsed\n"); + } + + json_t const *Accel; + for (Accel = json_getChild(Accelerators); Accel != 0; + Accel = json_getSibling(Accel)) { + parseAccelerator(Accel, ProgFilesInfo, OverlayPath); + } +} + +void AlmaIFBitstreamDatabaseManager::parseAccelerator( + json_t const *Accel, struct ProgrammingFiles &ProgFilesInfo, + const std::string &OverlayPath) { + + json_t const *AccelNameJs = json_getProperty(Accel, "name"); + if (!AccelNameJs || JSON_TEXT != json_getType(AccelNameJs)) { + POCL_ABORT("Partial bitstream doesn't have a name\n"); + } + ProgFilesInfo.KernelName = json_getValue(AccelNameJs); + POCL_MSG_PRINT_ALMAIF("PR device name: %s\n", + ProgFilesInfo.KernelName.c_str()); + + std::string AcceleratorPath = + OverlayPath + "/accelerators/" + ProgFilesInfo.KernelName; + + json_t const *PrBitstream = json_getProperty(Accel, "filename"); + if (!PrBitstream || JSON_TEXT != json_getType(PrBitstream)) { + POCL_ABORT("Partial bitstream filename parsing failed\n"); + } + + std::string PrBitstreamPath = + AcceleratorPath + "/" + json_getValue(PrBitstream); + POCL_MSG_PRINT_ALMAIF("Arria device pr file %s\n", PrBitstreamPath.c_str()); + ProgFilesInfo.BitstreamPath = PrBitstreamPath; + + json_t const *Firmwares = json_getProperty(Accel, "firmwares"); + if (!Firmwares || JSON_ARRAY != json_getType(Firmwares)) { + POCL_ABORT("Error, firmwares not found\n"); + } + for (json_t const *Firmware = json_getChild(Firmwares); Firmware != 0; + Firmware = json_getSibling(Firmware)) { + parseFirmware(Firmware, ProgFilesInfo, AcceleratorPath); + } +} + +void AlmaIFBitstreamDatabaseManager::parseFirmware( + json_t const *Firmware, struct ProgrammingFiles &ProgFilesInfo, + const std::string &AcceleratorPath) { + json_t const *FirmwarePath = json_getProperty(Firmware, "filename"); + if (!FirmwarePath || JSON_TEXT != json_getType(FirmwarePath)) { + POCL_ABORT("Error, firmware filepath not found from json\n"); + } + std::string FirmwarePathStr = + AcceleratorPath + "/firmwares/" + json_getValue(FirmwarePath); + ProgFilesInfo.FirmwarePath = FirmwarePathStr; + + json_t const *BiKernels = json_getProperty(Firmware, "builtin-kernels"); + if (!BiKernels || JSON_ARRAY != json_getType(BiKernels)) { + POCL_ABORT("Error, builtin kernels not found\n"); + } + json_t const *Bik; + for (Bik = json_getChild(BiKernels); Bik != 0; Bik = json_getSibling(Bik)) { + parseBIKernels(Bik, ProgFilesInfo); + } +} + +void AlmaIFBitstreamDatabaseManager::parseBIKernels( + json_t const *Bik, struct ProgrammingFiles &ProgFilesInfo) { + if (JSON_INTEGER != json_getType(Bik)) { + POCL_ABORT("Error, Builtin kernel id is wrong type\n"); + } + int64_t BikIDLong = json_getInteger(Bik); + assert(BikIDLong < 0xFFFF); + + BuiltinKernelId BikID = (BuiltinKernelId)BikIDLong; + SupportedBIKernels_[ProgFilesInfo.FpgaType].push_back( + {BikID, ProgFilesInfo.FpgaType, ProgFilesInfo.BitstreamPath, + ProgFilesInfo.FirmwarePath, ProgFilesInfo.KernelName}); + POCL_MSG_PRINT_ALMAIF( + "Found support for builtin kernel %d with fw path: %s\n", BikID, + ProgFilesInfo.FirmwarePath.c_str()); +} + +AlmaIFBitstreamDatabaseManager::AlmaIFBitstreamDatabaseManager( + const std::string &DBPath) { + + std::string DBFile = DBPath; + + DIR *dp; + struct dirent *dirp; + if ((dp = opendir(DBPath.c_str())) == NULL) { + POCL_ABORT("Failed opening the Almaif db directory\n"); + } + while ((dirp = readdir(dp)) != NULL) { + std::string OverlayFolderName = dirp->d_name; + if (OverlayFolderName.find("overlay") != std::string::npos) { + POCL_MSG_PRINT_ALMAIF("Found overlay dir %s\n", + OverlayFolderName.c_str()); + std::string BitstreamDatabaseIndexPath = + DBFile + "/" + OverlayFolderName + "/db.json"; + + uint64_t Size = 0; + char *BitstreamDatabaseIndex = NULL; + pocl_read_file(BitstreamDatabaseIndexPath.c_str(), + &BitstreamDatabaseIndex, &Size); + POCL_MSG_PRINT_ALMAIF("Read file size=%lld\n", Size); + + POCL_MSG_PRINT_ALMAIF("DATABASE FILE %s:\n", + BitstreamDatabaseIndexPath.c_str()); + POCL_MSG_PRINT_ALMAIF("%s\n", BitstreamDatabaseIndex); + POCL_MSG_PRINT_ALMAIF("DATABASE FILE END\n"); + + json_t Mem[256]; + json_t const *t = + json_create(BitstreamDatabaseIndex, Mem, sizeof(Mem) / sizeof(*Mem)); + if (!t) { + POCL_ABORT("Failed opening AlmaifDB as json object\n"); + } + parseOverlay(t, DBPath); + } + } +} + +AlmaIFBitstreamDatabaseManager::~AlmaIFBitstreamDatabaseManager() {} + +AlmaIFBitstreamDatabaseManager::DEVICE_TYPE +AlmaIFBitstreamDatabaseManager::string2DeviceTypeEnum(const std::string &Str) { + unsigned int Len = Str.length(); + std::string StringToConvert = Str; + for (int i = 0; i < Len; i++) { + StringToConvert[i] = tolower(Str[i]); + } + + for (int j = 0; j < sizeof(Conversion) / sizeof(Conversion[0]); ++j) + if (StringToConvert == Conversion[j].Str) { + return Conversion[j].Val; + } else { + POCL_MSG_PRINT_ALMAIF( + "String-to-enum. String:%s, comparing:%s, lengths:%d,%d\n", + Conversion[j].Str.c_str(), StringToConvert.c_str(), + Conversion[j].Str.length(), StringToConvert.length()); + } + POCL_ABORT("Almaif DB device string to enum conversion failed. String %s\n", + Str.c_str()); +} + +std::string +AlmaIFBitstreamDatabaseManager::deviceTypeEnum2String(DEVICE_TYPE DeviceType) { + for (int j = 0; j < sizeof(Conversion) / sizeof(Conversion[0]); ++j) { + if (DeviceType == Conversion[j].Val) { + return Conversion[j].Str; + } + } + POCL_ABORT("Almaif DB device enum to string conversion failed"); +} + +const AlmaIFBitstreamDatabaseManager::ProgrammingFiles & +AlmaIFBitstreamDatabaseManager::getBitstreamFile(BuiltinKernelId BikID, + DEVICE_TYPE UsedDeviceType) { + + for (const ProgrammingFiles &Iter : SupportedBIKernels_[UsedDeviceType]) { + if (Iter.BikID == BikID) { + return Iter; + } + } + POCL_ABORT("Built in kernel %d bitstream not found\n", BikID); +} + +const AlmaIFBitstreamDatabaseManager::ProgrammingFiles & +AlmaIFBitstreamDatabaseManager::getFirmwareFile(BuiltinKernelId BikID, + DEVICE_TYPE UsedDeviceType) { + + for (const ProgrammingFiles &Iter : SupportedBIKernels_[UsedDeviceType]) { + if (Iter.BikID == BikID) { + return Iter; + } + } + POCL_ABORT("Built in kernel %d firmware not found\n", BikID); +} + +std::vector<BuiltinKernelId> +AlmaIFBitstreamDatabaseManager::supportedBuiltinKernels( + DEVICE_TYPE UsedDeviceType) { + + std::vector<BuiltinKernelId> Output; + for (const ProgrammingFiles &Iter : SupportedBIKernels_[UsedDeviceType]) { + Output.push_back((BuiltinKernelId)Iter.BikID); + } + return Output; +} + +std::string AlmaIFBitstreamDatabaseManager::externalMemoryParameters( + DEVICE_TYPE UsedDeviceType) { + + return DeviceExternalMemParameters_[UsedDeviceType]; +} + +std::string AlmaIFBitstreamDatabaseManager::defaultBitstream() { + return DefaultFilenamePath_; +} + +std::string AlmaIFBitstreamDatabaseManager::defaultKernelName() { + return DefaultKernelName_; +} diff --git a/lib/CL/devices/almaif/AlmaifDB/AlmaIFBitstreamDatabaseManager.hh b/lib/CL/devices/almaif/AlmaifDB/AlmaIFBitstreamDatabaseManager.hh new file mode 100644 index 0000000000000000000000000000000000000000..324821b9cdd6734ef7caff38fa4334bc37d4abcb --- /dev/null +++ b/lib/CL/devices/almaif/AlmaifDB/AlmaIFBitstreamDatabaseManager.hh @@ -0,0 +1,101 @@ +/* AlmaIFBitstreamDatabaseManager.hh - Parses and responds to queries about + AlmaifDB + + Copyright (c) 2023 Topi Leppänen / Tampere University + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal in the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + IN THE SOFTWARE. +*/ + +#ifndef POCL_ALMAIFBITSTREAMDATABASEMANAGER_H +#define POCL_ALMAIFBITSTREAMDATABASEMANAGER_H + +#include "builtin_kernels.hh" + +#include <map> +#include <string> + +typedef struct json_s json_t; + +// A helper class used by DBDevice to parse the bitstream database. +// This class can be thought to be the interface from C++ to the JSON-based +// database. After parsing, the DBDevice will query this class +// for information about the bitstream database. +// +// Since the AFOCL bitstream database format is still very experimental, there +// is no fixed specification for it. Therefore, this class defines the format +// since it is responsible for parsing the database. +// While having a clear, versioned format would obviously be the best solution, +// maintaining a separate specification at this point would risk it +// going out-of-date. You can see the separate AFOCL-project for examples of +// the current database format. +class AlmaIFBitstreamDatabaseManager { +public: + AlmaIFBitstreamDatabaseManager(const std::string &DBPath); + virtual ~AlmaIFBitstreamDatabaseManager(); + + enum DEVICE_TYPE { ARRIA10, ALVEOU280 }; + + DEVICE_TYPE string2DeviceTypeEnum(const std::string &Str); + std::string deviceTypeEnum2String(DEVICE_TYPE DeviceType); + + struct ProgrammingFiles { + int BikID; + DEVICE_TYPE FpgaType; + std::string BitstreamPath; + std::string FirmwarePath; + std::string KernelName; + }; + const ProgrammingFiles &getBitstreamFile(BuiltinKernelId BikID, + DEVICE_TYPE UsedDeviceType); + const ProgrammingFiles &getFirmwareFile(BuiltinKernelId BikID, + DEVICE_TYPE UsedDeviceType); + + std::vector<BuiltinKernelId> + supportedBuiltinKernels(DEVICE_TYPE UsedDeviceType); + std::string externalMemoryParameters(DEVICE_TYPE UsedDeviceType); + std::string defaultBitstream(); + std::string defaultKernelName(); + +private: + void parseDB(json_t const *DB, const std::string &DBPath); + void parseOverlay(json_t const *Overlay, const std::string &DBPath); + void parseAccelerator(json_t const *Accel, + struct ProgrammingFiles &ProgFilesInfo, + const std::string &OverlayPath); + void parseFirmware(json_t const *Firmware, + struct ProgrammingFiles &ProgFilesInfo, + const std::string &AcceleratorPath); + void parseBIKernels(json_t const *Bik, + struct ProgrammingFiles &ProgFilesInfo); + + const struct { + DEVICE_TYPE Val; + const std::string Str; + } Conversion[2] = { + {ARRIA10, "arria10"}, + {ALVEOU280, "alveou280"}, + }; + + std::map<DEVICE_TYPE, std::vector<ProgrammingFiles>> SupportedBIKernels_; + std::map<DEVICE_TYPE, std::string> DeviceExternalMemParameters_; + std::string DefaultFilenamePath_; + std::string DefaultKernelName_; +}; + +#endif diff --git a/lib/CL/devices/almaif/AlmaifDB/DBDevice.cc b/lib/CL/devices/almaif/AlmaifDB/DBDevice.cc new file mode 100644 index 0000000000000000000000000000000000000000..5097d33eec18b6614aeb0f1ea4f533de5a1a3f05 --- /dev/null +++ b/lib/CL/devices/almaif/AlmaifDB/DBDevice.cc @@ -0,0 +1,167 @@ +/* DBDevice.cc - Device based on parsing Almaif database and instantiating + * other device types based on what it finds from there + + Copyright (c) 2023 Topi Leppänen / Tampere University + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal in the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + IN THE SOFTWARE. +*/ + +#include "DBDevice.hh" + +#include "../AlmaifShared.hh" + +#ifdef HAVE_XRT +#include "../XilinxXrtDevice.hh" +#include "../XilinxXrtRegion.hh" +#endif + +DBDevice::DBDevice(const std::string &DBPath) : DB_(DBPath) { + + POCL_MSG_PRINT_INFO("Starting bitstream database device initialization"); + + bool UseAlveoDevice = false; + bool UseIntelDevice = false; + if (pocl_is_option_set("XILINX_XRT")) { + UseAlveoDevice = true; + UsedDeviceType_ = AlmaIFBitstreamDatabaseManager::DEVICE_TYPE::ALVEOU280; + } + if (pocl_is_option_set("INTEL_ACL")) { + UseIntelDevice = true; + UsedDeviceType_ = AlmaIFBitstreamDatabaseManager::DEVICE_TYPE::ARRIA10; + } + if (UseAlveoDevice && UseIntelDevice) { + POCL_ABORT("AlmaIF: DBDevice only supports one vendor FPGA at the time\n"); + } + + std::string ExternalMemParams = DB_.externalMemoryParameters(UsedDeviceType_); + + if (UseAlveoDevice) { + Dev_ = new XilinxXrtDevice(DB_.defaultKernelName(), DB_.defaultBitstream(), + ExternalMemParams, 0); + } else if (UseIntelDevice) { + POCL_ABORT_UNIMPLEMENTED("AlmaIF intel device not implemented\n"); + } else { + POCL_ABORT("AlmaIF: DBDevice didn't find any vendor FPGAs\n"); + } + + ControlMemory = Dev_->ControlMemory; + InstructionMemory = Dev_->InstructionMemory; + CQMemory = Dev_->CQMemory; + DataMemory = Dev_->DataMemory; + RelativeAddressing = Dev_->RelativeAddressing; + HasHardwareClock = Dev_->HasHardwareClock; + HwClockFrequency = Dev_->HwClockFrequency; + PointerSize = Dev_->PointerSize; + ExternalMemory = Dev_->ExternalMemory; + AllocRegions = Dev_->AllocRegions; +} + +DBDevice::~DBDevice() { delete Dev_; } + +void DBDevice::programBIKernelBitstream(BuiltinKernelId BikID) { + + const AlmaIFBitstreamDatabaseManager::ProgrammingFiles &BitstreamToProgram = + DB_.getBitstreamFile(BikID, UsedDeviceType_); + std::string BitstreamPath = BitstreamToProgram.BitstreamPath; + std::string KernelName = BitstreamToProgram.KernelName; + + if (BitstreamPath == LoadedBitstreamPath_) { + return; + } + + POCL_MSG_PRINT_ALMAIF("Programming built-in kernel %s bitstream from: %s\n", + KernelName.c_str(), BitstreamPath.c_str()); + if (UsedDeviceType_ == + AlmaIFBitstreamDatabaseManager::DEVICE_TYPE::ALVEOU280) { + ((XilinxXrtDevice *)Dev_) + ->programBitstream(KernelName.c_str(), BitstreamPath.c_str(), 0); + } else if (UsedDeviceType_ == + AlmaIFBitstreamDatabaseManager::DEVICE_TYPE::ARRIA10) { + POCL_ABORT_UNIMPLEMENTED("AlmaIF intel device not implemented\n"); + } else { + POCL_ABORT("Almaif neither device activated\n"); + } + + LoadedBitstreamPath_ = BitstreamPath; +} + +void DBDevice::programBIKernelFirmware(BuiltinKernelId BikID) { + + const AlmaIFBitstreamDatabaseManager::ProgrammingFiles &BitstreamToProgram = + DB_.getFirmwareFile(BikID, UsedDeviceType_); + std::string FirmwarePath = BitstreamToProgram.FirmwarePath; + + if (FirmwarePath == LoadedFirmwarePath_) { + return; + } + POCL_MSG_PRINT_ALMAIF("Programming built-in kernel firmware from: %s\n", + FirmwarePath.c_str()); + + ControlMemory->Write32(ALMAIF_CONTROL_REG_COMMAND, ALMAIF_RESET_CMD); + + if (UsedDeviceType_ == + AlmaIFBitstreamDatabaseManager::DEVICE_TYPE::ALVEOU280) { + ((XilinxXrtRegion *)InstructionMemory)->initRegion(FirmwarePath.c_str()); + } else if (UsedDeviceType_ == + AlmaIFBitstreamDatabaseManager::DEVICE_TYPE::ARRIA10) { + POCL_ABORT_UNIMPLEMENTED("AlmaIF intel device not implemented\n"); + } else { + POCL_ABORT("Neither device activated\n"); + } + ControlMemory->Write32(ALMAIF_CONTROL_REG_COMMAND, ALMAIF_CONTINUE_CMD); + + POCL_MSG_PRINT_ALMAIF("Programming done"); + LoadedFirmwarePath_ = FirmwarePath; +} + +void DBDevice::loadProgramToDevice(almaif_kernel_data_s *KernelData, + cl_kernel Kernel, + _cl_command_node *Command) { + Dev_->loadProgramToDevice(KernelData, Kernel, Command); +} + +void DBDevice::printMemoryDump() { Dev_->printMemoryDump(); } + +void DBDevice::writeDataToDevice(pocl_mem_identifier *DstMemId, + const char *__restrict__ const Src, + size_t Size, size_t Offset) { + Dev_->writeDataToDevice(DstMemId, Src, Size, Offset); +} + +void DBDevice::readDataFromDevice(char *__restrict__ const Dst, + pocl_mem_identifier *SrcMemId, size_t Size, + size_t Offset) { + Dev_->readDataFromDevice(Dst, SrcMemId, Size, Offset); +} + +cl_int DBDevice::allocateBuffer(pocl_mem_identifier *P, size_t Size) { + Dev_->allocateBuffer(P, Size); +} + +void DBDevice::freeBuffer(pocl_mem_identifier *P) { Dev_->freeBuffer(P); } + +size_t DBDevice::pointerDeviceOffset(pocl_mem_identifier *P) { + Dev_->pointerDeviceOffset(P); +} + +void DBDevice::discoverDeviceParameters() { Dev_->discoverDeviceParameters(); } + +std::vector<BuiltinKernelId> DBDevice::supportedBuiltinKernels() { + return DB_.supportedBuiltinKernels(UsedDeviceType_); +} diff --git a/lib/CL/devices/almaif/AlmaifDB/DBDevice.hh b/lib/CL/devices/almaif/AlmaifDB/DBDevice.hh new file mode 100644 index 0000000000000000000000000000000000000000..0004fb63632f3ba53228b4750bda8789c5364d3e --- /dev/null +++ b/lib/CL/devices/almaif/AlmaifDB/DBDevice.hh @@ -0,0 +1,80 @@ +/* DBDevice.hh - Device based on parsing Almaif database and instantiating + * other device types based on what it finds from there + + Copyright (c) 2023 Topi Leppänen / Tampere University + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal in the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + IN THE SOFTWARE. +*/ + +#ifndef POCL_DBDEVICE_H +#define POCL_DBDEVICE_H + +#include "../AlmaIFDevice.hh" +#include "AlmaIFBitstreamDatabaseManager.hh" + +// A class that acts as an interface between the Almaif-driver +// and the underlying FPGA device. +// This class is FPGA vendor-agnostic AlmaIFDevice. +// It instantiates a separate vendor-specific AlmaIFDevice-class. +// Many of the class methods are simply forwarded as is to the +// underlying vendor-specific AlmaIFDevice stored in the private +// Dev_-variable. +// +// This class uses AlmaIFBitstreamManager-class to parse the +// bitstream database, and to fetch the bitstream and firmware +// filepaths from there. +class DBDevice : public AlmaIFDevice { + +public: + DBDevice(const std::string &DBPath); + ~DBDevice(); + + virtual void loadProgramToDevice(almaif_kernel_data_s *KernelData, + cl_kernel Kernel, _cl_command_node *Command); + void printMemoryDump(); + void writeDataToDevice(pocl_mem_identifier *DstMemId, + const char *__restrict__ const Src, size_t Size, + size_t Offset) override; + void readDataFromDevice(char *__restrict__ const Dst, + pocl_mem_identifier *SrcMemId, size_t Size, + size_t Offset) override; + cl_int allocateBuffer(pocl_mem_identifier *P, size_t Size) override; + void freeBuffer(pocl_mem_identifier *P) override; + size_t pointerDeviceOffset(pocl_mem_identifier *P) override; + + virtual void programBIKernelFirmware(BuiltinKernelId BikID); + virtual void programBIKernelBitstream(BuiltinKernelId BikID); + + virtual std::vector<BuiltinKernelId> supportedBuiltinKernels(); + virtual void discoverDeviceParameters(); + + bool isDBDevice() override { return true; } + +protected: +private: + AlmaIFBitstreamDatabaseManager DB_; + AlmaIFDevice *Dev_; + + AlmaIFBitstreamDatabaseManager::DEVICE_TYPE UsedDeviceType_; + + std::string LoadedBitstreamPath_ = ""; + std::string LoadedFirmwarePath_ = ""; +}; + +#endif diff --git a/lib/CL/devices/almaif/AlmaifDB/tiny-json.c b/lib/CL/devices/almaif/AlmaifDB/tiny-json.c new file mode 100644 index 0000000000000000000000000000000000000000..795715c5e8cd8934418f45653c86dc2e3452492e --- /dev/null +++ b/lib/CL/devices/almaif/AlmaifDB/tiny-json.c @@ -0,0 +1,461 @@ + +/* + +<https://github.com/rafagafe/tiny-json> + + Licensed under the MIT License <http://opensource.org/licenses/MIT>. + SPDX-License-Identifier: MIT + Copyright (c) 2016-2018 Rafa Garcia <rafagarcia77@gmail.com>. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + +*/ + +#include <string.h> +#include <ctype.h> +#include "tiny-json.h" + +/** Structure to handle a heap of JSON properties. */ +typedef struct jsonStaticPool_s { + json_t* mem; /**< Pointer to array of json properties. */ + unsigned int qty; /**< Length of the array of json properties. */ + unsigned int nextFree; /**< The index of the next free json property. */ + jsonPool_t pool; +} jsonStaticPool_t; + +/* Search a property by its name in a JSON object. */ +json_t const* json_getProperty( json_t const* obj, char const* property ) { + json_t const* sibling; + for( sibling = obj->u.c.child; sibling; sibling = sibling->sibling ) + if ( sibling->name && !strcmp( sibling->name, property ) ) + return sibling; + return 0; +} + +/* Search a property by its name in a JSON object and return its value. */ +char const* json_getPropertyValue( json_t const* obj, char const* property ) { + json_t const* field = json_getProperty( obj, property ); + if ( !field ) return 0; + jsonType_t type = json_getType( field ); + if ( JSON_ARRAY >= type ) return 0; + return json_getValue( field ); +} + +/* Internal prototypes: */ +static char* goBlank( char* str ); +static char* goNum( char* str ); +static json_t* poolInit( jsonPool_t* pool ); +static json_t* poolAlloc( jsonPool_t* pool ); +static char* objValue( char* ptr, json_t* obj, jsonPool_t* pool ); +static char* setToNull( char* ch ); +static bool isEndOfPrimitive( char ch ); + +/* Parse a string to get a json. */ +json_t const* json_createWithPool( char *str, jsonPool_t *pool ) { + char* ptr = goBlank( str ); + if ( !ptr || (*ptr != '{' && *ptr != '[') ) return 0; + json_t* obj = pool->init( pool ); + obj->name = 0; + obj->sibling = 0; + obj->u.c.child = 0; + ptr = objValue( ptr, obj, pool ); + if ( !ptr ) return 0; + return obj; +} + +/* Parse a string to get a json. */ +json_t const* json_create( char* str, json_t mem[], unsigned int qty ) { + jsonStaticPool_t spool; + spool.mem = mem; + spool.qty = qty; + spool.pool.init = poolInit; + spool.pool.alloc = poolAlloc; + return json_createWithPool( str, &spool.pool ); +} + +/** Get a special character with its escape character. Examples: + * 'b' -> '\\b', 'n' -> '\\n', 't' -> '\\t' + * @param ch The escape character. + * @retval The character code. */ +static char getEscape( char ch ) { + static struct { char ch; char code; } const pair[] = { + { '\"', '\"' }, { '\\', '\\' }, + { '/', '/' }, { 'b', '\b' }, + { 'f', '\f' }, { 'n', '\n' }, + { 'r', '\r' }, { 't', '\t' }, + }; + unsigned int i; + for( i = 0; i < sizeof pair / sizeof *pair; ++i ) + if ( pair[i].ch == ch ) + return pair[i].code; + return '\0'; +} + +/** Parse 4 characters. + * @param str Pointer to first digit. + * @retval '?' If the four characters are hexadecimal digits. + * @retval '\0' In other cases. */ +static unsigned char getCharFromUnicode( unsigned char const* str ) { + unsigned int i; + for( i = 0; i < 4; ++i ) + if ( !isxdigit( str[i] ) ) + return '\0'; + return '?'; +} + +/** Parse a string and replace the scape characters by their meaning characters. + * This parser stops when finds the character '\"'. Then replaces '\"' by '\0'. + * @param str Pointer to first character. + * @retval Pointer to first non white space after the string. If success. + * @retval Null pointer if any error occur. */ +static char* parseString( char* str ) { + unsigned char* head = (unsigned char*)str; + unsigned char* tail = (unsigned char*)str; + for( ; *head; ++head, ++tail ) { + if ( *head == '\"' ) { + *tail = '\0'; + return (char*)++head; + } + if ( *head == '\\' ) { + if ( *++head == 'u' ) { + char const ch = getCharFromUnicode( ++head ); + if ( ch == '\0' ) return 0; + *tail = ch; + head += 3; + } + else { + char const esc = getEscape( *head ); + if ( esc == '\0' ) return 0; + *tail = esc; + } + } + else *tail = *head; + } + return 0; +} + +/** Parse a string to get the name of a property. + * @param ptr Pointer to first character. + * @param property The property to assign the name. + * @retval Pointer to first of property value. If success. + * @retval Null pointer if any error occur. */ +static char* propertyName( char* ptr, json_t* property ) { + property->name = ++ptr; + ptr = parseString( ptr ); + if ( !ptr ) return 0; + ptr = goBlank( ptr ); + if ( !ptr ) return 0; + if ( *ptr++ != ':' ) return 0; + return goBlank( ptr ); +} + +/** Parse a string to get the value of a property when its type is JSON_TEXT. + * @param ptr Pointer to first character ('\"'). + * @param property The property to assign the name. + * @retval Pointer to first non white space after the string. If success. + * @retval Null pointer if any error occur. */ +static char* textValue( char* ptr, json_t* property ) { + ++property->u.value; + ptr = parseString( ++ptr ); + if ( !ptr ) return 0; + property->type = JSON_TEXT; + return ptr; +} + +/** Compare two strings until get the null character in the second one. + * @param ptr sub string + * @param str main string + * @retval Pointer to next character. + * @retval Null pointer if any error occur. */ +static char* checkStr( char* ptr, char const* str ) { + while( *str ) + if ( *ptr++ != *str++ ) + return 0; + return ptr; +} + +/** Parser a string to get a primitive value. + * If the first character after the value is different of '}' or ']' is set to '\0'. + * @param ptr Pointer to first character. + * @param property Property handler to set the value and the type, (true, false or null). + * @param value String with the primitive literal. + * @param type The code of the type. ( JSON_BOOLEAN or JSON_NULL ) + * @retval Pointer to first non white space after the string. If success. + * @retval Null pointer if any error occur. */ +static char* primitiveValue( char* ptr, json_t* property, char const* value, jsonType_t type ) { + ptr = checkStr( ptr, value ); + if ( !ptr || !isEndOfPrimitive( *ptr ) ) return 0; + ptr = setToNull( ptr ); + property->type = type; + return ptr; +} + +/** Parser a string to get a true value. + * If the first character after the value is different of '}' or ']' is set to '\0'. + * @param ptr Pointer to first character. + * @param property Property handler to set the value and the type, (true, false or null). + * @retval Pointer to first non white space after the string. If success. + * @retval Null pointer if any error occur. */ +static char* trueValue( char* ptr, json_t* property ) { + return primitiveValue( ptr, property, "true", JSON_BOOLEAN ); +} + +/** Parser a string to get a false value. + * If the first character after the value is different of '}' or ']' is set to '\0'. + * @param ptr Pointer to first character. + * @param property Property handler to set the value and the type, (true, false or null). + * @retval Pointer to first non white space after the string. If success. + * @retval Null pointer if any error occur. */ +static char* falseValue( char* ptr, json_t* property ) { + return primitiveValue( ptr, property, "false", JSON_BOOLEAN ); +} + +/** Parser a string to get a null value. + * If the first character after the value is different of '}' or ']' is set to '\0'. + * @param ptr Pointer to first character. + * @param property Property handler to set the value and the type, (true, false or null). + * @retval Pointer to first non white space after the string. If success. + * @retval Null pointer if any error occur. */ +static char* nullValue( char* ptr, json_t* property ) { + return primitiveValue( ptr, property, "null", JSON_NULL ); +} + +/** Analyze the exponential part of a real number. + * @param ptr Pointer to first character. + * @retval Pointer to first non numerical after the string. If success. + * @retval Null pointer if any error occur. */ +static char* expValue( char* ptr ) { + if ( *ptr == '-' || *ptr == '+' ) ++ptr; + if ( !isdigit( (int)(*ptr) ) ) return 0; + ptr = goNum( ++ptr ); + return ptr; +} + +/** Analyze the decimal part of a real number. + * @param ptr Pointer to first character. + * @retval Pointer to first non numerical after the string. If success. + * @retval Null pointer if any error occur. */ +static char* fraqValue( char* ptr ) { + if ( !isdigit( (int)(*ptr) ) ) return 0; + ptr = goNum( ++ptr ); + if ( !ptr ) return 0; + return ptr; +} + +/** Parser a string to get a numerical value. + * If the first character after the value is different of '}' or ']' is set to '\0'. + * @param ptr Pointer to first character. + * @param property Property handler to set the value and the type: JSON_REAL or JSON_INTEGER. + * @retval Pointer to first non white space after the string. If success. + * @retval Null pointer if any error occur. */ +static char* numValue( char* ptr, json_t* property ) { + if ( *ptr == '-' ) ++ptr; + if ( !isdigit( (int)(*ptr) ) ) return 0; + if ( *ptr != '0' ) { + ptr = goNum( ptr ); + if ( !ptr ) return 0; + } + else if ( isdigit( (int)(*++ptr) ) ) return 0; + property->type = JSON_INTEGER; + if ( *ptr == '.' ) { + ptr = fraqValue( ++ptr ); + if ( !ptr ) return 0; + property->type = JSON_REAL; + } + if ( *ptr == 'e' || *ptr == 'E' ) { + ptr = expValue( ++ptr ); + if ( !ptr ) return 0; + property->type = JSON_REAL; + } + if ( !isEndOfPrimitive( *ptr ) ) return 0; + if ( JSON_INTEGER == property->type ) { + char const* value = property->u.value; + bool const negative = *value == '-'; + static char const min[] = "-9223372036854775808"; + static char const max[] = "9223372036854775807"; + unsigned int const maxdigits = ( negative? sizeof min: sizeof max ) - 1; + unsigned int const len = ( unsigned int const ) ( ptr - value ); + if ( len > maxdigits ) return 0; + if ( len == maxdigits ) { + char const tmp = *ptr; + *ptr = '\0'; + char const* const threshold = negative ? min: max; + if ( 0 > strcmp( threshold, value ) ) return 0; + *ptr = tmp; + } + } + ptr = setToNull( ptr ); + return ptr; +} + +/** Add a property to a JSON object or array. + * @param obj The handler of the JSON object or array. + * @param property The handler of the property to be added. */ +static void add( json_t* obj, json_t* property ) { + property->sibling = 0; + if ( !obj->u.c.child ){ + obj->u.c.child = property; + obj->u.c.last_child = property; + } else { + obj->u.c.last_child->sibling = property; + obj->u.c.last_child = property; + } +} + +/** Parser a string to get a json object value. + * @param ptr Pointer to first character. + * @param obj The handler of the JSON root object or array. + * @param pool The handler of a json pool for creating json instances. + * @retval Pointer to first character after the value. If success. + * @retval Null pointer if any error occur. */ +static char* objValue( char* ptr, json_t* obj, jsonPool_t* pool ) { + obj->type = *ptr == '{' ? JSON_OBJ : JSON_ARRAY; + obj->u.c.child = 0; + obj->sibling = 0; + ptr++; + for(;;) { + ptr = goBlank( ptr ); + if ( !ptr ) return 0; + if ( *ptr == ',' ) { + ++ptr; + continue; + } + char const endchar = ( obj->type == JSON_OBJ )? '}': ']'; + if ( *ptr == endchar ) { + *ptr = '\0'; + json_t* parentObj = obj->sibling; + if ( !parentObj ) return ++ptr; + obj->sibling = 0; + obj = parentObj; + ++ptr; + continue; + } + json_t* property = pool->alloc( pool ); + if ( !property ) return 0; + if( obj->type != JSON_ARRAY ) { + if ( *ptr != '\"' ) return 0; + ptr = propertyName( ptr, property ); + if ( !ptr ) return 0; + } + else property->name = 0; + add( obj, property ); + property->u.value = ptr; + switch( *ptr ) { + case '{': + property->type = JSON_OBJ; + property->u.c.child = 0; + property->sibling = obj; + obj = property; + ++ptr; + break; + case '[': + property->type = JSON_ARRAY; + property->u.c.child = 0; + property->sibling = obj; + obj = property; + ++ptr; + break; + case '\"': ptr = textValue( ptr, property ); break; + case 't': ptr = trueValue( ptr, property ); break; + case 'f': ptr = falseValue( ptr, property ); break; + case 'n': ptr = nullValue( ptr, property ); break; + default: ptr = numValue( ptr, property ); break; + } + if ( !ptr ) return 0; + } +} + +/** Initialize a json pool. + * @param pool The handler of the pool. + * @return a instance of a json. */ +static json_t* poolInit( jsonPool_t* pool ) { + jsonStaticPool_t *spool = json_containerOf( pool, jsonStaticPool_t, pool ); + spool->nextFree = 1; + return spool->mem; +} + +/** Create an instance of a json from a pool. + * @param pool The handler of the pool. + * @retval The handler of the new instance if success. + * @retval Null pointer if the pool was empty. */ +static json_t* poolAlloc( jsonPool_t* pool ) { + jsonStaticPool_t *spool = json_containerOf( pool, jsonStaticPool_t, pool ); + if ( spool->nextFree >= spool->qty ) return 0; + return spool->mem + spool->nextFree++; +} + +/** Checks whether an character belongs to set. + * @param ch Character value to be checked. + * @param set Set of characters. It is just a null-terminated string. + * @return true or false there is membership or not. */ +static bool isOneOfThem( char ch, char const* set ) { + while( *set != '\0' ) + if ( ch == *set++ ) + return true; + return false; +} + +/** Increases a pointer while it points to a character that belongs to a set. + * @param str The initial pointer value. + * @param set Set of characters. It is just a null-terminated string. + * @return The final pointer value or null pointer if the null character was found. */ +static char* goWhile( char* str, char const* set ) { + for(; *str != '\0'; ++str ) { + if ( !isOneOfThem( *str, set ) ) + return str; + } + return 0; +} + +/** Set of characters that defines a blank. */ +static char const* const blank = " \n\r\t\f"; + +/** Increases a pointer while it points to a white space character. + * @param str The initial pointer value. + * @return The final pointer value or null pointer if the null character was found. */ +static char* goBlank( char* str ) { + return goWhile( str, blank ); +} + +/** Increases a pointer while it points to a decimal digit character. + * @param str The initial pointer value. + * @return The final pointer value or null pointer if the null character was found. */ +static char* goNum( char* str ) { + for( ; *str != '\0'; ++str ) { + if ( !isdigit( (int)(*str) ) ) + return str; + } + return 0; +} + +/** Set of characters that defines the end of an array or a JSON object. */ +static char const* const endofblock = "}]"; + +/** Set a char to '\0' and increase its pointer if the char is different to '}' or ']'. + * @param ch Pointer to character. + * @return Final value pointer. */ +static char* setToNull( char* ch ) { + if ( !isOneOfThem( *ch, endofblock ) ) *ch++ = '\0'; + return ch; +} + +/** Indicate if a character is the end of a primitive value. */ +static bool isEndOfPrimitive( char ch ) { + return ch == ',' || isOneOfThem( ch, blank ) || isOneOfThem( ch, endofblock ); +} diff --git a/lib/CL/devices/almaif/AlmaifDB/tiny-json.h b/lib/CL/devices/almaif/AlmaifDB/tiny-json.h new file mode 100644 index 0000000000000000000000000000000000000000..2b527e7af55bd414e6e9c9e5157924fa0f41cd56 --- /dev/null +++ b/lib/CL/devices/almaif/AlmaifDB/tiny-json.h @@ -0,0 +1,176 @@ + +/* + +<https://github.com/rafagafe/tiny-json> + + Licensed under the MIT License <http://opensource.org/licenses/MIT>. + SPDX-License-Identifier: MIT + Copyright (c) 2016-2018 Rafa Garcia <rafagarcia77@gmail.com>. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + +*/ + +#ifndef _TINY_JSON_H_ +#define _TINY_JSON_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stddef.h> +#include <stdlib.h> +#include <stdbool.h> +#include <stdint.h> + +#define json_containerOf( ptr, type, member ) \ + ((type*)( (char*)ptr - offsetof( type, member ) )) + +/** @defgroup tinyJson Tiny JSON parser. + * @{ */ + +/** Enumeration of codes of supported JSON properties types. */ +typedef enum { + JSON_OBJ, JSON_ARRAY, JSON_TEXT, JSON_BOOLEAN, + JSON_INTEGER, JSON_REAL, JSON_NULL +} jsonType_t; + +/** Structure to handle JSON properties. */ +typedef struct json_s { + struct json_s* sibling; + char const* name; + union { + char const* value; + struct { + struct json_s* child; + struct json_s* last_child; + } c; + } u; + jsonType_t type; +} json_t; + +/** Parse a string to get a json. + * @param str String pointer with a JSON object. It will be modified. + * @param mem Array of json properties to allocate. + * @param qty Number of elements of mem. + * @retval Null pointer if any was wrong in the parse process. + * @retval If the parser process was successfully a valid handler of a json. + * This property is always unnamed and its type is JSON_OBJ. */ +json_t const* json_create( char* str, json_t mem[], unsigned int qty ); + +/** Get the name of a json property. + * @param json A valid handler of a json property. + * @retval Pointer to null-terminated if property has name. + * @retval Null pointer if the property is unnamed. */ +static inline char const* json_getName( json_t const* json ) { + return json->name; +} + +/** Get the value of a json property. + * The type of property cannot be JSON_OBJ or JSON_ARRAY. + * @param property A valid handler of a json property. + * @return Pointer to null-terminated string with the value. */ +static inline char const* json_getValue( json_t const* property ) { + return property->u.value; +} + +/** Get the type of a json property. + * @param json A valid handler of a json property. + * @return The code of type.*/ +static inline jsonType_t json_getType( json_t const* json ) { + return json->type; +} + +/** Get the next sibling of a JSON property that is within a JSON object or array. + * @param json A valid handler of a json property. + * @retval The handler of the next sibling if found. + * @retval Null pointer if the json property is the last one. */ +static inline json_t const* json_getSibling( json_t const* json ) { + return json->sibling; +} + +/** Search a property by its name in a JSON object. + * @param obj A valid handler of a json object. Its type must be JSON_OBJ. + * @param property The name of property to get. + * @retval The handler of the json property if found. + * @retval Null pointer if not found. */ +json_t const* json_getProperty( json_t const* obj, char const* property ); + + +/** Search a property by its name in a JSON object and return its value. + * @param obj A valid handler of a json object. Its type must be JSON_OBJ. + * @param property The name of property to get. + * @retval If found a pointer to null-terminated string with the value. + * @retval Null pointer if not found or it is an array or an object. */ +char const* json_getPropertyValue( json_t const* obj, char const* property ); + +/** Get the first property of a JSON object or array. + * @param json A valid handler of a json property. + * Its type must be JSON_OBJ or JSON_ARRAY. + * @retval The handler of the first property if there is. + * @retval Null pointer if the json object has not properties. */ +static inline json_t const* json_getChild( json_t const* json ) { + return json->u.c.child; +} + +/** Get the value of a json boolean property. + * @param property A valid handler of a json object. Its type must be JSON_BOOLEAN. + * @return The value stdbool. */ +static inline bool json_getBoolean( json_t const* property ) { + return *property->u.value == 't'; +} + +/** Get the value of a json integer property. + * @param property A valid handler of a json object. Its type must be JSON_INTEGER. + * @return The value stdint. */ +static inline int64_t json_getInteger( json_t const* property ) { + return strtoll( property->u.value,(char**)NULL, 10); +} + +/** Get the value of a json real property. + * @param property A valid handler of a json object. Its type must be JSON_REAL. + * @return The value. */ +static inline double json_getReal( json_t const* property ) { + return strtod( property->u.value,(char**)NULL ); +} + + + +/** Structure to handle a heap of JSON properties. */ +typedef struct jsonPool_s jsonPool_t; +struct jsonPool_s { + json_t* (*init)( jsonPool_t* pool ); + json_t* (*alloc)( jsonPool_t* pool ); +}; + +/** Parse a string to get a json. + * @param str String pointer with a JSON object. It will be modified. + * @param pool Custom json pool pointer. + * @retval Null pointer if any was wrong in the parse process. + * @retval If the parser process was successfully a valid handler of a json. + * This property is always unnamed and its type is JSON_OBJ. */ +json_t const* json_createWithPool( char* str, jsonPool_t* pool ); + +/** @ } */ + +#ifdef __cplusplus +} +#endif + +#endif /* _TINY_JSON_H_ */ diff --git a/lib/CL/devices/almaif/AlmaifShared.hh b/lib/CL/devices/almaif/AlmaifShared.hh index 735691fd5a7b7c540200d97a1f918b961d82930c..cc2ce13ddd4ea68961e0af0785bbf06b860aa41f 100644 --- a/lib/CL/devices/almaif/AlmaifShared.hh +++ b/lib/CL/devices/almaif/AlmaifShared.hh @@ -127,6 +127,13 @@ struct AQLQueueInfo { #define ALMAIF_DRIVER_SLEEP 200 +enum ALMAIF_DEVICE_TYPE : size_t { + POCL_ALMAIFDEVICE_XRT = 0xA, + POCL_ALMAIFDEVICE_TTASIM = 0xB, + POCL_ALMAIFDEVICE_EMULATION = 0xE, + POCL_ALMAIFDEVICE_BITSTREAMDATABASE = 0xF, +}; + struct CommandMetadata { uint32_t completion_signal; uint32_t reserved0; @@ -179,6 +186,8 @@ struct AlmaifData { AlmaIFDevice *Dev; + cl_bool Available; + std::set<BIKD *> SupportedKernels; // List of commands ready to be executed. _cl_command_node *ReadyList; @@ -190,8 +199,8 @@ struct AlmaifData { // Lock for device-side command queue manipulation pocl_lock_t AQLQueueLock; - void *printf_buffer; - void *printf_position; + void *PrintfBuffer; + void *PrintfPosition; emulation_data_t EmulationData; diff --git a/lib/CL/devices/almaif/CMakeLists.txt b/lib/CL/devices/almaif/CMakeLists.txt index 2f6d5063bf406375f3ca2c9336658db7b30ee239..4d506c3574d67f335aa8dff1f0c50b5036bc32b8 100644 --- a/lib/CL/devices/almaif/CMakeLists.txt +++ b/lib/CL/devices/almaif/CMakeLists.txt @@ -40,18 +40,26 @@ set(ALMAIF_SOURCES "AlmaifShared.hh" "EmulationRegion.hh" "EmulationDevice.cc" "EmulationDevice.hh" - "XrtDevice.hh" + "XilinxXrtDevice.hh" "AlmaifCompile.cc" "AlmaifCompile.hh" - "openasip/AlmaifCompileTCE.hh" + "openasip/AlmaifCompileOpenasip.hh" ) if(HAVE_XRT) add_compile_options(-I${XRT_INCLUDEDIR}) set(ALMAIF_SOURCES ${ALMAIF_SOURCES} - "XrtDevice.cc" - "XrtRegion.cc" - "XrtRegion.hh" + "XilinxXrtDevice.cc" + "XilinxXrtRegion.cc" + "XilinxXrtRegion.hh" + "XilinxXrtExternalRegion.cc" + "XilinxXrtExternalRegion.hh" + "AlmaifDB/DBDevice.hh" + "AlmaifDB/DBDevice.cc" + "AlmaifDB/AlmaIFBitstreamDatabaseManager.cc" + "AlmaifDB/AlmaIFBitstreamDatabaseManager.hh" + "AlmaifDB/tiny-json.c" + "AlmaifDB/tiny-json.h" ) endif() @@ -63,7 +71,7 @@ if(ENABLE_TCE) "openasip/TTASimRegion.hh" "openasip/TTASimControlRegion.cc" "openasip/TTASimControlRegion.hh" - "openasip/AlmaifCompileTCE.cc" + "openasip/AlmaifCompileOpenasip.cc" ) endif() @@ -74,7 +82,7 @@ endif(MSVC) add_pocl_device_library(pocl-devices-almaif ${ALMAIF_SOURCES}) -if(HAVE_XRT}) +if(HAVE_XRT) target_link_libraries(pocl-devices-almaif PRIVATE "${XRT_LIBDIR}/libxrt_coreutil.so") endif() diff --git a/lib/CL/devices/almaif/EmulationDevice.cc b/lib/CL/devices/almaif/EmulationDevice.cc index c6b50834bb3a279a84e4ad7889a54d8ee452c83b..5c0565da57a458d7a083b9fb51b932ca78b5730e 100644 --- a/lib/CL/devices/almaif/EmulationDevice.cc +++ b/lib/CL/devices/almaif/EmulationDevice.cc @@ -54,9 +54,9 @@ EmulationDevice::EmulationDevice() { discoverDeviceParameters(); - InstructionMemory = new EmulationRegion(imem_start, imem_size); - CQMemory = new EmulationRegion(cq_start, cq_size); - DataMemory = new EmulationRegion(dmem_start, dmem_size); + InstructionMemory = new EmulationRegion(ImemStart, ImemSize); + CQMemory = new EmulationRegion(CQStart, CQSize); + DataMemory = new EmulationRegion(DmemStart, DmemSize); } EmulationDevice::~EmulationDevice() { @@ -79,8 +79,8 @@ void *emulate_almaif(void *E_void) { void *base_address = E->emulating_address; uint32_t ctrl_size = 1024; - uint32_t imem_size = 0; - uint32_t dmem_size = EMULATING_MAX_SIZE * 3 / 4; + uint32_t ImemSize = 0; + uint32_t DmemSize = EMULATING_MAX_SIZE * 3 / 4; // The accelerator can choose the size of the queue (must be a power-of-two) // Can be even 1, to make the packet handling easiest with static offsets uint32_t queue_length = 3; @@ -89,14 +89,14 @@ void *emulate_almaif(void *E_void) { // The accelerator can set the starting addresses // Even the order can be changed if the accelerator wants to // Here packing the memory regions tighly as an example. - uintptr_t imem_start = (uintptr_t)base_address + ctrl_size; - uintptr_t cqmem_start = imem_start + imem_size; - uintptr_t dmem_start = cqmem_start + cqmem_size; + uintptr_t ImemStart = (uintptr_t)base_address + ctrl_size; + uintptr_t cqmem_start = ImemStart + ImemSize; + uintptr_t DmemStart = cqmem_start + cqmem_size; volatile uint32_t *Control = (uint32_t *)base_address; - //volatile uint8_t *Instruction = (uint8_t *)imem_start; + // volatile uint8_t *Instruction = (uint8_t *)ImemStart; volatile uint32_t *CQ = (uint32_t *)cqmem_start; - //volatile uint8_t *Data = (uint8_t *)dmem_start; + // volatile uint8_t *Data = (uint8_t *)DmemStart; // Set initial values for info registers: Control[ALMAIF_INFO_DEV_CLASS / 4] = 0xE; // Unused @@ -113,16 +113,16 @@ void *emulate_almaif(void *E_void) { // that are written BEFORE hw reset is deasserted. // E.g. program binaries of a processor-based accelerator Control[ALMAIF_INFO_IMEM_SIZE / 4] = 0; - Control[ALMAIF_INFO_IMEM_START_LOW / 4] = (uint32_t)imem_start; - Control[ALMAIF_INFO_IMEM_START_HIGH / 4] = (uint32_t)(imem_start >> 32); + Control[ALMAIF_INFO_IMEM_START_LOW / 4] = (uint32_t)ImemStart; + Control[ALMAIF_INFO_IMEM_START_HIGH / 4] = (uint32_t)(ImemStart >> 32); Control[ALMAIF_INFO_CQMEM_SIZE_LOW / 4] = cqmem_size; Control[ALMAIF_INFO_CQMEM_START_LOW / 4] = (uint32_t)cqmem_start; Control[ALMAIF_INFO_CQMEM_START_HIGH / 4] = (uint32_t)(cqmem_start >> 32); - Control[ALMAIF_INFO_DMEM_SIZE_LOW / 4] = dmem_size; - Control[ALMAIF_INFO_DMEM_START_LOW / 4] = (uint32_t)dmem_start; - Control[ALMAIF_INFO_DMEM_START_HIGH / 4] = (uint32_t)(dmem_start >> 32); + Control[ALMAIF_INFO_DMEM_SIZE_LOW / 4] = DmemSize; + Control[ALMAIF_INFO_DMEM_START_LOW / 4] = (uint32_t)DmemStart; + Control[ALMAIF_INFO_DMEM_START_HIGH / 4] = (uint32_t)(DmemStart >> 32); uint32_t feature_flags_low = ALMAIF_FF_BIT_AXI_MASTER; Control[ALMAIF_INFO_FEATURE_FLAGS_LOW / 4] = feature_flags_low; diff --git a/lib/CL/devices/almaif/EmulationDevice.hh b/lib/CL/devices/almaif/EmulationDevice.hh index fb1e1f2fa9ed09d9499f9d49f5e9278f4d850855..4c30e4512b60577f9b28c8bfa1bf8b719eac2849 100644 --- a/lib/CL/devices/almaif/EmulationDevice.hh +++ b/lib/CL/devices/almaif/EmulationDevice.hh @@ -29,7 +29,6 @@ #include "AlmaIFDevice.hh" -#define EMULATING_ADDRESS 0xE #define EMULATING_MAX_SIZE (256 * 1024 * 1024) //#define EMULATING_MAX_SIZE 4 * 4096 diff --git a/lib/CL/devices/almaif/EmulationRegion.cc b/lib/CL/devices/almaif/EmulationRegion.cc index 793c2934ce7d7b942dfe92f5c49ca08740ed1e65..70ce690a59f6b4a5bf0fd2117edb01421e3fc865 100644 --- a/lib/CL/devices/almaif/EmulationRegion.cc +++ b/lib/CL/devices/almaif/EmulationRegion.cc @@ -26,7 +26,7 @@ // Used in emulator to hack the MMAP to work with just virtually contiguous // memory EmulationRegion::EmulationRegion(size_t Address, size_t RegionSize) { - PhysAddress = Address; + PhysAddress_ = Address; Data = (void *)Address; - Size = RegionSize; + Size_ = RegionSize; } diff --git a/lib/CL/devices/almaif/MMAPDevice.cc b/lib/CL/devices/almaif/MMAPDevice.cc index 8b9b40416c8decebbe9c85b044020a9fc7402ad0..7f00d61ac8a6626a60d6cd3c4fa6ae09be0df66a 100644 --- a/lib/CL/devices/almaif/MMAPDevice.cc +++ b/lib/CL/devices/almaif/MMAPDevice.cc @@ -32,7 +32,7 @@ //#include <sys/stat.h> #include <fcntl.h> -MMAPDevice::MMAPDevice(size_t base_address, char *kernel_name) { +MMAPDevice::MMAPDevice(size_t base_address, const std::string &kernel_name) { int mem_fd = -1; mem_fd = open("/dev/mem", O_RDWR | O_SYNC); if (mem_fd == -1) { @@ -42,23 +42,19 @@ MMAPDevice::MMAPDevice(size_t base_address, char *kernel_name) { discoverDeviceParameters(); - InstructionMemory = new MMAPRegion(imem_start, imem_size, mem_fd); - CQMemory = new MMAPRegion(cq_start, cq_size, mem_fd); - DataMemory = new MMAPRegion(dmem_start, dmem_size, mem_fd); + InstructionMemory = new MMAPRegion(ImemStart, ImemSize, mem_fd); + CQMemory = new MMAPRegion(CQStart, CQSize, mem_fd); + DataMemory = new MMAPRegion(DmemStart, DmemSize, mem_fd); - unsigned img_file_name_length = strlen(kernel_name) + 5; - char *file_name = (char *)malloc(img_file_name_length); - assert(file_name); - snprintf(file_name, img_file_name_length, "%s.img", kernel_name); + std::string file_name = kernel_name + ".img"; - if (pocl_exists(file_name)) { + if (pocl_exists(file_name.c_str())) { POCL_MSG_PRINT_ALMAIF( "Almaif: Found built-in kernel firmaware. Loading it in\n"); ((MMAPRegion *)InstructionMemory)->initRegion(file_name); } else { POCL_MSG_PRINT_ALMAIF("Almaif: No default firmware found. Skipping\n"); } - free(file_name); if (pocl_is_option_set("POCL_ALMAIF_EXTERNALREGION")) { char *region_params = diff --git a/lib/CL/devices/almaif/MMAPDevice.hh b/lib/CL/devices/almaif/MMAPDevice.hh index 1e6936e8fa88e699805bbf68e39c667a6d584d58..d569611b2043e8cbca29c706b47395dbe94fa997 100644 --- a/lib/CL/devices/almaif/MMAPDevice.hh +++ b/lib/CL/devices/almaif/MMAPDevice.hh @@ -32,7 +32,7 @@ private: ~MMAPDevice(); public: - MMAPDevice(size_t base_address, char *kernel_name); + MMAPDevice(size_t base_address, const std::string &kernel_name); }; #endif diff --git a/lib/CL/devices/almaif/MMAPRegion.cc b/lib/CL/devices/almaif/MMAPRegion.cc index 677d08e2adeb994793ab21d911682c6f2259c9e6..44f7942704e9bd7cdfe22311d7f50e7e54dd21ab 100644 --- a/lib/CL/devices/almaif/MMAPRegion.cc +++ b/lib/CL/devices/almaif/MMAPRegion.cc @@ -24,6 +24,7 @@ #include <assert.h> #include <fstream> #include <stdlib.h> +#include <string> #include <sys/mman.h> #include <unistd.h> @@ -34,9 +35,9 @@ MMAPRegion::MMAPRegion() {} MMAPRegion::MMAPRegion(size_t Address, size_t RegionSize, int mem_fd) { - PhysAddress = Address; - Size = RegionSize; - if (Size == 0) { + PhysAddress_ = Address; + Size_ = RegionSize; + if (Size_ == 0) { return; } POCL_MSG_PRINT_ALMAIF_MMAP( @@ -46,7 +47,7 @@ MMAPRegion::MMAPRegion(size_t Address, size_t RegionSize, int mem_fd) { long page_size = sysconf(_SC_PAGESIZE); size_t roundDownAddress = (Address / page_size) * page_size; size_t difference = Address - roundDownAddress; - Data = mmap(0, Size + difference, PROT_READ | PROT_WRITE, MAP_SHARED, mem_fd, + Data = mmap(0, Size_ + difference, PROT_READ | PROT_WRITE, MAP_SHARED, mem_fd, roundDownAddress); assert(Data != MAP_FAILED && "MMAPRegion mapping failed"); // Increment back to the unaligned address user asked for @@ -54,9 +55,9 @@ MMAPRegion::MMAPRegion(size_t Address, size_t RegionSize, int mem_fd) { POCL_MSG_PRINT_ALMAIF_MMAP("almaif: got address %p\n", Data); } -void MMAPRegion::initRegion(char *init_file) { +void MMAPRegion::initRegion(const std::string &init_file) { std::ifstream inFile; - inFile.open(init_file, std::ios::binary); + inFile.open(init_file.c_str(), std::ios::binary); unsigned int current; int i = 0; while (inFile.good()) { @@ -70,24 +71,24 @@ void MMAPRegion::initRegion(char *init_file) { MMAPRegion::~MMAPRegion() { POCL_MSG_PRINT_ALMAIF_MMAP("almaif: munmap'ing from address 0x%zx\n", - PhysAddress); + PhysAddress_); if (Data) { // Align unmap to page_size long page_size = sysconf(_SC_PAGESIZE); size_t roundDownAddress = ((size_t)Data / page_size) * page_size; size_t difference = (size_t)Data - roundDownAddress; - munmap((void *)roundDownAddress, Size + difference); + munmap((void *)roundDownAddress, Size_ + difference); Data = NULL; } } uint32_t MMAPRegion::Read32(size_t offset) { POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Reading from physical address 0x%zx with " - "offset 0x%zx\n", - PhysAddress, offset); + "offset 0x%zx\n", + PhysAddress_, offset); assert(Data && "No pointer to MMAP'd region; read before mapping?"); - assert(offset < Size && "Attempt to access data outside MMAP'd buffer"); + assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer"); auto value = static_cast<volatile uint32_t *>(Data)[offset / sizeof(uint32_t)]; return value; @@ -95,28 +96,28 @@ uint32_t MMAPRegion::Read32(size_t offset) { void MMAPRegion::Write32(size_t offset, uint32_t value) { POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Writing to physical address 0x%zx with " - "offset 0x%zx\n", - PhysAddress, offset); + "offset 0x%zx\n", + PhysAddress_, offset); assert(Data && "No pointer to MMAP'd region; write before mapping?"); - assert(offset < Size && "Attempt to access data outside MMAP'd buffer"); + assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer"); static_cast<volatile uint32_t *>(Data)[offset / sizeof(uint32_t)] = value; } void MMAPRegion::Write16(size_t offset, uint16_t value) { POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Writing to physical address 0x%zx with " - "offset 0x%zx\n", - PhysAddress, offset); + "offset 0x%zx\n", + PhysAddress_, offset); assert(Data && "No pointer to MMAP'd region; write before mapping?"); - assert(offset < Size && "Attempt to access data outside MMAP'd buffer"); + assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer"); static_cast<volatile uint16_t *>(Data)[offset / sizeof(uint16_t)] = value; } uint64_t MMAPRegion::Read64(size_t offset) { POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Reading from physical address 0x%zx with " - "offset 0x%zx\n", - PhysAddress, offset); + "offset 0x%zx\n", + PhysAddress_, offset); assert(Data && "No pointer to MMAP'd region; read before mapping?"); - assert(offset < Size && "Attempt to access data outside MMAP'd buffer"); + assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer"); auto value = static_cast<volatile uint64_t *>(Data)[offset / sizeof(uint64_t)]; return value; @@ -124,32 +125,33 @@ uint64_t MMAPRegion::Read64(size_t offset) { void MMAPRegion::Write64(size_t offset, uint64_t value) { POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Writing to physical address 0x%zx with " - "offset 0x%zx\n", - PhysAddress, offset); + "offset 0x%zx\n", + PhysAddress_, offset); assert(Data && "No pointer to MMAP'd region; write before mapping?"); - assert(offset < Size && "Attempt to access data outside MMAP'd buffer"); + assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer"); static_cast<volatile uint64_t *>(Data)[offset / sizeof(uint64_t)] = value; } void MMAPRegion::CopyToMMAP(size_t destination, const void *source, size_t bytes) { - POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Writing 0x%zx bytes to buffer at 0x%zx with " - "address 0x%zx\n", - bytes, PhysAddress, destination); + POCL_MSG_PRINT_ALMAIF_MMAP( + "MMAP: Writing 0x%zx bytes to buffer at 0x%zx with " + "address 0x%zx\n", + bytes, PhysAddress_, destination); auto src = (char *)source; - size_t offset = destination - PhysAddress; - assert(offset < Size && "Attempt to access data outside MMAP'd buffer"); + size_t offset = destination - PhysAddress_; + assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer"); auto dst = offset + static_cast<volatile char *>(Data); memcpy((void *)dst, src, bytes); } void MMAPRegion::CopyFromMMAP(void *destination, size_t source, size_t bytes) { POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Reading 0x%zx bytes from buffer at 0x%zx " - "with address 0x%zx\n", - bytes, PhysAddress, source); + "with address 0x%zx\n", + bytes, PhysAddress_, source); auto dst = (char *)destination; - size_t offset = source - PhysAddress; - assert(offset < Size && "Attempt to access data outside MMAP'd buffer"); + size_t offset = source - PhysAddress_; + assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer"); auto src = offset + static_cast<volatile char *>(Data); memcpy(dst, (void *)src, bytes); } @@ -158,11 +160,11 @@ void MMAPRegion::CopyInMem(size_t source, size_t destination, size_t bytes) { POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Copying 0x%zx bytes from 0x%zx " "to 0x%zx\n", bytes, source, destination); - size_t src_offset = source - PhysAddress; - size_t dst_offset = destination - PhysAddress; - assert(src_offset < Size && (src_offset + bytes) <= Size && + size_t src_offset = source - PhysAddress_; + size_t dst_offset = destination - PhysAddress_; + assert(src_offset < Size_ && (src_offset + bytes) <= Size_ && "Attempt to access data outside MMAP'd buffer"); - assert(dst_offset < Size && (dst_offset + bytes) <= Size && + assert(dst_offset < Size_ && (dst_offset + bytes) <= Size_ && "Attempt to access data outside MMAP'd buffer"); volatile char *src = src_offset + static_cast<volatile char *>(Data); volatile char *dst = dst_offset + static_cast<volatile char *>(Data); diff --git a/lib/CL/devices/almaif/MMAPRegion.hh b/lib/CL/devices/almaif/MMAPRegion.hh index aa6f30115342635e392ec33df653628e62f1ecf6..85b5dc42c6fb801803bea9c1bab06626bc80efb6 100644 --- a/lib/CL/devices/almaif/MMAPRegion.hh +++ b/lib/CL/devices/almaif/MMAPRegion.hh @@ -26,6 +26,7 @@ #define MMAPREGION_H #include <stdlib.h> +#include <string> #include "pocl_types.h" @@ -49,7 +50,7 @@ public: virtual void CopyInMem(size_t source, size_t destination, size_t bytes) override; - virtual void initRegion(char *init_file); + virtual void initRegion(const std::string &init_file); protected: MMAPRegion(); diff --git a/lib/CL/devices/almaif/XilinxXrtDevice.cc b/lib/CL/devices/almaif/XilinxXrtDevice.cc new file mode 100644 index 0000000000000000000000000000000000000000..0a63488bea85960826e5e0a72f84874e98196edc --- /dev/null +++ b/lib/CL/devices/almaif/XilinxXrtDevice.cc @@ -0,0 +1,270 @@ +/* XilinxXrtDevice.cc - Access AlmaIF device in Xilinx PCIe FPGA. + + Copyright (c) 2022 Topi Leppänen / Tampere University + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal in the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + IN THE SOFTWARE. +*/ + +#include "XilinxXrtDevice.hh" + +#include "AlmaifShared.hh" +#include "XilinxXrtExternalRegion.hh" +#include "XilinxXrtRegion.hh" + +#include "experimental/xrt_ip.h" + +#include "pocl_file_util.h" +#include "pocl_timing.h" + +#include <libgen.h> + +void *DeviceHandle; + +XilinxXrtDevice::XilinxXrtDevice(const std::string &XrtKernelNamePrefix, + unsigned j) { + + char *TmpKernelName = strdup(XrtKernelNamePrefix.c_str()); + char *KernelName = basename(TmpKernelName); + + std::string xclbin_char = XrtKernelNamePrefix + ".xclbin"; + + std::string ExternalMemoryParameters = + pocl_get_string_option("POCL_ALMAIF_EXTERNALREGION", ""); + + init_xrtdevice(KernelName, xclbin_char, ExternalMemoryParameters, j); + + free(TmpKernelName); +} + +XilinxXrtDevice::XilinxXrtDevice(const std::string &XrtKernelNamePrefix, + const std::string &XclbinFile, unsigned j) { + std::string ExternalMemoryParameters = + pocl_get_string_option("POCL_ALMAIF_EXTERNALREGION", ""); + init_xrtdevice(XrtKernelNamePrefix, XclbinFile, ExternalMemoryParameters, j); +} + +XilinxXrtDevice::XilinxXrtDevice(const std::string &XrtKernelNamePrefix, + const std::string &XclbinFile, + const std::string &ExternalMemoryParameters, + unsigned j) { + init_xrtdevice(XrtKernelNamePrefix, XclbinFile, ExternalMemoryParameters, j); +} + +void XilinxXrtDevice::init_xrtdevice( + const std::string &XrtKernelNamePrefix, const std::string &XclbinFile, + const std::string &ExternalMemoryParameters, unsigned j) { + if (j == 0) { + auto devicehandle = new xrt::device(0); + assert(devicehandle != NULL && "devicehandle null\n"); + DeviceHandle = (void *)devicehandle; + } + programBitstream(XrtKernelNamePrefix, XclbinFile, j); + // TODO Remove magic + size_t DeviceOffset = 0x40000000 + j * 0x10000; + // size_t DeviceOffset = 0x00000000; + ControlMemory = new XilinxXrtRegion(DeviceOffset, ALMAIF_DEFAULT_CTRL_SIZE, + Kernel, DeviceOffset); + + discoverDeviceParameters(); + + char TmpXclbinFile[POCL_MAX_PATHNAME_LENGTH]; + strncpy(TmpXclbinFile, XclbinFile.c_str(), POCL_MAX_PATHNAME_LENGTH); + char *DirectoryName = dirname(TmpXclbinFile); + std::string ImgFileName = DirectoryName; + ImgFileName += "/" + XrtKernelNamePrefix + ".img"; + if (pocl_exists(ImgFileName.c_str())) { + POCL_MSG_PRINT_ALMAIF( + "Almaif: Found built-in kernel firmware. Loading it in\n"); + InstructionMemory = new XilinxXrtRegion(ImemStart, ImemSize, Kernel, + ImgFileName, DeviceOffset); + } else { + POCL_MSG_PRINT_ALMAIF("Almaif: No default firmware found. Skipping\n"); + InstructionMemory = + new XilinxXrtRegion(ImemStart, ImemSize, Kernel, DeviceOffset); + } + + CQMemory = new XilinxXrtRegion(CQStart, CQSize, Kernel, DeviceOffset); + DataMemory = new XilinxXrtRegion(DmemStart, DmemSize, Kernel, DeviceOffset); + + if (ExternalMemoryParameters != "") { + char *tmp_params = strdup(ExternalMemoryParameters.c_str()); + char *save_ptr; + char *param_token = strtok_r(tmp_params, ",", &save_ptr); + size_t region_address = strtoul(param_token, NULL, 0); + param_token = strtok_r(NULL, ",", &save_ptr); + size_t region_size = strtoul(param_token, NULL, 0); + if (region_size > 0) { + ExternalXRTMemory = new XilinxXrtExternalRegion( + region_address, region_size, DeviceHandle); + POCL_MSG_PRINT_ALMAIF("Almaif: initialized external XRT alloc region at " + "%zx with size %zx\n", + region_address, region_size); + } + free(tmp_params); + } + XilinxXrtDeviceInitDone_ = 1; +} + +XilinxXrtDevice::~XilinxXrtDevice() { + delete ((xrt::ip *)Kernel); + delete ((xrt::device *)DeviceHandle); + /* if (ExternalXRTMemory) { + LL_DELETE(AllocRegions, AllocRegions->next); + }*/ +} + +void XilinxXrtDevice::programBitstream(const std::string &XrtKernelNamePrefix, + const std::string &XclbinFile, + unsigned j) { + + xrt::device *devicehandle = (xrt::device *)DeviceHandle; + + // TODO: Fix the case when the kernel name contains a path + // Needs to tokenize the last part of the path and use that + // as the kernel name + std::string XrtKernelName = + XrtKernelNamePrefix + ":{" + XrtKernelNamePrefix + "_1}"; + + if (XilinxXrtDeviceInitDone_) { + delete (xrt::ip *)Kernel; + } + + if (j == 0) { + uint64_t start_time = pocl_gettimemono_ns(); + auto uuid = devicehandle->load_xclbin(XclbinFile); + uint64_t end_time = pocl_gettimemono_ns(); + printf("Reprogramming done. Time: %d ms\n", + (end_time - start_time) / 1000000); + + std::string MemInfo = devicehandle->get_info<xrt::info::device::memory>(); + POCL_MSG_PRINT_ALMAIF_MMAP("XRT device's memory info:%s\n", + MemInfo.c_str()); + } + auto uuid = devicehandle->get_xclbin_uuid(); + + auto kernel = new xrt::ip(*devicehandle, uuid, XrtKernelName.c_str()); + + assert(kernel != XRT_NULL_HANDLE && + "xrtKernelHandle NULL, is the kernel opened properly?"); + + Kernel = (void *)kernel; + + POCL_MSG_PRINT_ALMAIF("TEST\n"); + if (XilinxXrtDeviceInitDone_) { + ((XilinxXrtRegion *)ControlMemory)->setKernelPtr(Kernel); + ((XilinxXrtRegion *)InstructionMemory)->setKernelPtr(Kernel); + ((XilinxXrtRegion *)CQMemory)->setKernelPtr(Kernel); + ((XilinxXrtRegion *)DataMemory)->setKernelPtr(Kernel); + } + POCL_MSG_PRINT_ALMAIF("BITSTREAM PROGRAMMING DONE\n"); +} + +void XilinxXrtDevice::freeBuffer(pocl_mem_identifier *P) { + if (P->extra == 1) { + POCL_MSG_PRINT_MEMORY("almaif: freed buffer from 0x%zx\n", + ExternalXRTMemory->pointerDeviceOffset(P)); + ExternalXRTMemory->freeBuffer(P); + } else { + chunk_info_t *chunk = (chunk_info_t *)P->mem_ptr; + + POCL_MSG_PRINT_MEMORY("almaif: freed buffer from 0x%zx\n", + chunk->start_address); + + assert(chunk != NULL); + pocl_free_chunk(chunk); + } +} + +size_t XilinxXrtDevice::pointerDeviceOffset(pocl_mem_identifier *P) { + if (P->extra == 1) { + return ExternalXRTMemory->pointerDeviceOffset(P); + } else { + chunk_info_t *chunk = (chunk_info_t *)P->mem_ptr; + assert(chunk != NULL); + return chunk->start_address; + } +} + +cl_int XilinxXrtDevice::allocateBuffer(pocl_mem_identifier *P, size_t Size) { + + assert(P->mem_ptr == NULL); + chunk_info_t *chunk = NULL; + + // TODO: add bufalloc-based on-chip memory allocation here. The current + // version always allocates from external memory, since the current + // kernels do not know how to access the on-chip memory. + if (chunk == NULL) { + if (ExternalXRTMemory) { + // XilinxXrtExternalRegion has its own allocation requirements + // (doesn't use bufalloc) + cl_int alloc_status = ExternalXRTMemory->allocateBuffer(P, Size); + P->version = 0; + P->extra = 1; + return alloc_status; + } else { + return CL_MEM_OBJECT_ALLOCATION_FAILURE; + } + } else { + POCL_MSG_PRINT_MEMORY("almaif: allocated %zu bytes from 0x%zx\n", Size, + chunk->start_address); + + P->mem_ptr = chunk; + P->extra = 0; + } + P->version = 0; + return CL_SUCCESS; +} + +void XilinxXrtDevice::writeDataToDevice(pocl_mem_identifier *DstMemId, + const char *__restrict__ const Src, + size_t Size, size_t Offset) { + + if (DstMemId->extra == 0) { + chunk_info_t *chunk = (chunk_info_t *)DstMemId->mem_ptr; + size_t Dst = chunk->start_address + Offset; + POCL_MSG_PRINT_ALMAIF("almaif: Copying %zu bytes to 0x%zx\n", Size, Dst); + DataMemory->CopyToMMAP(Dst, Src, Size); + } else if (DstMemId->extra == 1) { + POCL_MSG_PRINT_ALMAIF("almaif: Copying %zu bytes to external Xrt buffer\n", + Size); + ExternalXRTMemory->CopyToMMAP(DstMemId, Src, Size, Offset); + } else { + POCL_ABORT("Attempt to write data to outside the device memories.\n"); + } +} + +void XilinxXrtDevice::readDataFromDevice(char *__restrict__ const Dst, + pocl_mem_identifier *SrcMemId, + size_t Size, size_t Offset) { + + chunk_info_t *chunk = (chunk_info_t *)SrcMemId->mem_ptr; + POCL_MSG_PRINT_ALMAIF("Reading data with chunk start %zu, and offset %zu\n", + chunk->start_address, Offset); + size_t Src = chunk->start_address + Offset; + if (SrcMemId->extra == 0) { + POCL_MSG_PRINT_ALMAIF("almaif: Copying %zu bytes from 0x%zx\n", Size, Src); + DataMemory->CopyFromMMAP(Dst, Src, Size); + } else if (SrcMemId->extra == 1) { + POCL_MSG_PRINT_ALMAIF( + "almaif: Copying %zu bytes from external XRT buffer\n", Size); + ExternalXRTMemory->CopyFromMMAP(Dst, SrcMemId, Size, Offset); + } else { + POCL_ABORT("Attempt to read data from outside the device memories.\n"); + } +} diff --git a/lib/CL/devices/almaif/XilinxXrtDevice.hh b/lib/CL/devices/almaif/XilinxXrtDevice.hh new file mode 100644 index 0000000000000000000000000000000000000000..805de1ad8754cbc8e6a6abe86138e2c45aa0fbae --- /dev/null +++ b/lib/CL/devices/almaif/XilinxXrtDevice.hh @@ -0,0 +1,71 @@ +/* XilinxXrtDevice.hh - Access AlmaIF device in Xilinx PCIe FPGA. + + Copyright (c) 2022 Topi Leppänen / Tampere University + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal in the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + IN THE SOFTWARE. +*/ + +#ifndef XILINXXRTDEVICE_H +#define XILINXXRTDEVICE_H + +#include "AlmaIFDevice.hh" + +class XilinxXrtExternalRegion; + +// This class abstracts the Almaif device instantiated on a Xilinx (PCIe) FPGA. +// The FPGA is reconfigured and Almaif device's memory map accessed with +// the Xilinx Runtime (XRT) API. +class XilinxXrtDevice : public AlmaIFDevice { +public: + XilinxXrtDevice(const std::string &XrtKernelNamePrefix, unsigned j); + XilinxXrtDevice(const std::string &XrtKernelNamePrefix, + const std::string &XclbinFile, unsigned j); + XilinxXrtDevice(const std::string &XrtKernelNamePrefix, + const std::string &XclbinFile, + const std::string &ExternalMemoryParameters, unsigned j); + void init_xrtdevice(const std::string &XrtKernelNamePrefix, + const std::string &XclbinFile, + const std::string &ExternalMemoryParameters, unsigned j); + ~XilinxXrtDevice() override; + // Reconfigures the FPGA + void programBitstream(const std::string &XrtKernelNamePrefix, + const std::string &XclbinFile, unsigned j); + + // Allocate buffers from either on-chip or external memory regions + // (Directs to either XilinxXrtRegion or XilinxXrtExternalRegion) + cl_int allocateBuffer(pocl_mem_identifier *P, size_t Size) override; + void freeBuffer(pocl_mem_identifier *P) override; + // Retuns the offset of the allocated buffer, in order to be passed + // as a kernel argument. This is relevant for XilinxXrtDevice specifically, + // since the allocations in XilinxXrtExternalRegion are managed by XRT API. + size_t pointerDeviceOffset(pocl_mem_identifier *P) override; + void writeDataToDevice(pocl_mem_identifier *DstMemId, + const char *__restrict__ const Src, size_t Size, + size_t Offset) override; + void readDataFromDevice(char *__restrict__ const Dst, + pocl_mem_identifier *SrcMemId, size_t Size, + size_t Offset) override; + +private: + XilinxXrtExternalRegion *ExternalXRTMemory; + void *Kernel; + int XilinxXrtDeviceInitDone_ = 0; +}; + +#endif diff --git a/lib/CL/devices/almaif/XilinxXrtExternalRegion.cc b/lib/CL/devices/almaif/XilinxXrtExternalRegion.cc new file mode 100644 index 0000000000000000000000000000000000000000..0ff4be1025d2bab1ffad247cdc626868820c407d --- /dev/null +++ b/lib/CL/devices/almaif/XilinxXrtExternalRegion.cc @@ -0,0 +1,128 @@ +/* XilinxXrtExternalRegion.cc - Access external memory (DDR or HBM) of an XRT + device + * as AlmaIFRegion + + Copyright (c) 2023 Topi Leppänen / Tampere University + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal in the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + IN THE SOFTWARE. +*/ + +#include <assert.h> +#include <fstream> +#include <stdlib.h> +#include <unistd.h> + +#include "xrt/xrt_bo.h" +#include "xrt/xrt_device.h" + +#include "XilinxXrtExternalRegion.hh" +#include "pocl_util.h" + +XilinxXrtExternalRegion::XilinxXrtExternalRegion(size_t Address, + size_t RegionSize, + void *Device) { + + POCL_MSG_PRINT_ALMAIF_MMAP( + "XRTMMAP: Initializing XilinxXrtExternalRegion with Address %zu " + "and Size %zu and device %p\n", + Address, RegionSize, Device); + PhysAddress_ = Address; + Size_ = RegionSize; + + XilinxXrtDeviceHandle_ = Device; +} + +void XilinxXrtExternalRegion::freeBuffer(pocl_mem_identifier *P) { + delete (xrt::bo *)(P->mem_ptr); + P->mem_ptr = NULL; +} + +uint64_t XilinxXrtExternalRegion::pointerDeviceOffset(pocl_mem_identifier *P) { + assert(P->mem_ptr); + return ((xrt::bo *)(P->mem_ptr))->address(); +} + +// Buffer allocation uses XRT buffer allocation API +cl_int XilinxXrtExternalRegion::allocateBuffer(pocl_mem_identifier *P, + size_t Size) { + xrt::bo *DeviceBuffer = new xrt::bo(*(xrt::device *)XilinxXrtDeviceHandle_, + Size, (xrt::memory_group)0); + + assert(DeviceBuffer != XRT_NULL_HANDLE && "xrtBufferHandle NULL"); + P->mem_ptr = DeviceBuffer; + uint64_t PhysAddress = pointerDeviceOffset(P); + POCL_MSG_PRINT_ALMAIF( + "XRTMMAP: Initialized XilinxXrtExternalRegion buffer with " + "physical address %" PRIu64 "\n", + PhysAddress); + return CL_SUCCESS; +} + +void XilinxXrtExternalRegion::CopyToMMAP(pocl_mem_identifier *DstMemId, + const void *Source, size_t Bytes, + size_t Offset) { + POCL_MSG_PRINT_ALMAIF_MMAP( + "XRTMMAP: Writing 0x%zx bytes to buffer at 0x%zx with " + "address 0x%zx\n", + Bytes, PhysAddress_, pointerDeviceOffset(DstMemId)); + auto src = (uint32_t *)Source; + assert(Offset < Size_ && "Attempt to access data outside XRT memory"); + + xrt::bo *b = (xrt::bo *)(DstMemId->mem_ptr); + assert(b != XRT_NULL_HANDLE && "No buffer handle?"); + b->write(Source, Bytes, Offset); + b->sync(XCL_BO_SYNC_BO_TO_DEVICE, Bytes, Offset); +} + +void XilinxXrtExternalRegion::CopyFromMMAP(void *Destination, + pocl_mem_identifier *SrcMemId, + size_t Bytes, size_t Offset) { + POCL_MSG_PRINT_ALMAIF_MMAP( + "XRTMMAP: Reading 0x%zx bytes from buffer at 0x%zx " + "with address 0x%zx\n", + Bytes, PhysAddress_, pointerDeviceOffset(SrcMemId)); + assert(Offset < Size_ && "Attempt to access data outside XRT memory"); + + xrt::bo *b = (xrt::bo *)(SrcMemId->mem_ptr); + assert(b != XRT_NULL_HANDLE && "No kernel handle?"); + b->sync(XCL_BO_SYNC_BO_FROM_DEVICE, Bytes, Offset); + b->read(Destination, Bytes, Offset); +} + +void XilinxXrtExternalRegion::CopyInMem(size_t Source, size_t Destination, + size_t Bytes) { + POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Copying 0x%zx bytes from 0x%zx " + "to 0x%zx\n", + Bytes, Source, Destination); + size_t SrcOffset = Source - PhysAddress_; + size_t DstOffset = Destination - PhysAddress_; + assert(SrcOffset < Size_ && (SrcOffset + Bytes) <= Size_ && + "Attempt to access data outside XRT memory"); + assert(DstOffset < Size_ && (DstOffset + Bytes) <= Size_ && + "Attempt to access data outside XRT memory"); +// assert(DeviceBuffer != XRT_NULL_HANDLE && +// "No kernel handle; write before mapping?"); +/* + xrt::bo *b = (xrt::bo *)DeviceBuffer; + auto b_mapped = b->map(); + + b->sync(XCL_BO_SYNC_BO_FROM_DEVICE, Bytes, SrcOffset); + memcpy((char *)b_mapped + DstOffset, (char *)b_mapped + SrcOffset, Bytes); + b->sync(XCL_BO_SYNC_BO_TO_DEVICE, Bytes, DstOffset); +*/} diff --git a/lib/CL/devices/almaif/XilinxXrtExternalRegion.hh b/lib/CL/devices/almaif/XilinxXrtExternalRegion.hh new file mode 100644 index 0000000000000000000000000000000000000000..bf7f71e8496f32418422e90ddaeca0a67ae42f3f --- /dev/null +++ b/lib/CL/devices/almaif/XilinxXrtExternalRegion.hh @@ -0,0 +1,63 @@ +/* XilinxXrtExternalRegion.hh - Access external memory (DDR or HBM) of an XRT + device + * as AlmaIFRegion + + Copyright (c) 2023 Topi Leppänen / Tampere University + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal in the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + IN THE SOFTWARE. +*/ + +#ifndef POCL_XILINXXRTEXTERNALREGION_H +#define POCL_XILINXXRTEXTERNALREGION_H + +#include <stdlib.h> + +#include "pocl_cl.h" + +class XilinxXrtExternalRegion { +public: + XilinxXrtExternalRegion(size_t Address, size_t RegionSize, void *Device); + + void CopyToMMAP(pocl_mem_identifier *DstMemId, const void *Source, + size_t Bytes, size_t Offset); + void CopyFromMMAP(void *Destination, pocl_mem_identifier *SrcMemId, + size_t Bytes, size_t Offset); + void CopyInMem(size_t Source, size_t Destination, size_t Bytes); + + // Returns the offset of the allocated pointer in the Xrt address space + // used by the kernel + uint64_t pointerDeviceOffset(pocl_mem_identifier *P); + // Buffer allocation uses XRT buffer allocation API. + // This is done in order to support multiple distinct external memory + // types in Xilinx PCIe FPGAs (multiple HBM and DDR banks). + // The alternative of using our bufalloc library to map the entire memory + // banks as bufalloc-regions was found to have significant performance + // issues when buffers were being read and written via the XRT API. + // (Possibly the entire bufalloc-regions were being read/flushed when only + // parts of it were read or written, or something to that effect.) + cl_int allocateBuffer(pocl_mem_identifier *P, size_t Size); + void freeBuffer(pocl_mem_identifier *P); + +private: + size_t Size_; + size_t PhysAddress_; + void *XilinxXrtDeviceHandle_; +}; + +#endif diff --git a/lib/CL/devices/almaif/XilinxXrtRegion.cc b/lib/CL/devices/almaif/XilinxXrtRegion.cc new file mode 100644 index 0000000000000000000000000000000000000000..6a7cbd9e8d7203dd7a1fc1f3dc7ac8ecb8569640 --- /dev/null +++ b/lib/CL/devices/almaif/XilinxXrtRegion.cc @@ -0,0 +1,263 @@ +/* XilinxXrtRegion.cc - Access on-chip memory of an XRT device as AlmaIFRegion + + Copyright (c) 2022 Topi Leppänen / Tampere University + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal in the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + IN THE SOFTWARE. +*/ + +#include <assert.h> +#include <stdlib.h> +#include <unistd.h> +// #include <stdio.h> +#include <fstream> + +#include "experimental/xrt_ip.h" + +#include "XilinxXrtRegion.hh" +#include "pocl_util.h" + +XilinxXrtRegion::XilinxXrtRegion(size_t Address, size_t RegionSize, + void *kernel, size_t DeviceOffset) { + + POCL_MSG_PRINT_ALMAIF_MMAP( + "XRTMMAP: Initializing XilinxXrtRegion with Address %zu " + "and Size %zu and kernel %p and DeviceOffset 0x%zx\n", + Address, RegionSize, kernel, DeviceOffset); + PhysAddress_ = Address; + Size_ = RegionSize; + Kernel_ = kernel; + DeviceOffset_ = DeviceOffset; + assert(Kernel_ != XRT_NULL_HANDLE && + "xrtKernelHandle NULL, is the kernel opened properly?"); +} + +XilinxXrtRegion::XilinxXrtRegion(size_t Address, size_t RegionSize, + void *kernel, const std::string &init_file, + size_t DeviceOffset) + : XilinxXrtRegion(Address, RegionSize, kernel, DeviceOffset) { + + if (RegionSize == 0) { + return; // don't try to write to empty region + } + POCL_MSG_PRINT_ALMAIF_MMAP( + "XRTMMAP: Initializing XilinxXrtRegion with file %s\n", + init_file.c_str()); + std::ifstream inFile; + inFile.open(init_file, std::ios::binary); + unsigned int current; + int i = 0; + while (inFile.good()) { + inFile.read(reinterpret_cast<char *>(¤t), sizeof(current)); + + ((xrt::ip *)Kernel_)->write_register(Address + i - DeviceOffset_, current); + i += 4; + } + + POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Initialized region with %i bytes \n", + i - 4); +} + +void XilinxXrtRegion::initRegion(const std::string &init_file) { + std::ifstream inFile; + inFile.open(init_file, std::ios::binary); + unsigned int current; + int i = 0; + while (inFile.good()) { + inFile.read(reinterpret_cast<char *>(¤t), sizeof(current)); + Write32(i, current); + i += 4; + } + + POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Initialized region with %i bytes \n", + i - 4); +} + +uint32_t XilinxXrtRegion::Read32(size_t offset) { + POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Reading from region at 0x%zx with " + "offset 0x%zx\n", + PhysAddress_, + PhysAddress_ + offset - DeviceOffset_); + assert(Kernel_ != XRT_NULL_HANDLE && + "No kernel handle; read before mapping?"); + assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer"); + uint32_t value = ((xrt::ip *)Kernel_) + ->read_register(PhysAddress_ + offset - DeviceOffset_); + return value; +} + +void XilinxXrtRegion::Write32(size_t offset, uint32_t value) { + POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Writing to region at 0x%zx with " + "offset 0x%zx\n", + PhysAddress_, + PhysAddress_ + offset - DeviceOffset_); + assert(Kernel_ != XRT_NULL_HANDLE && + "No kernel handle; write before mapping?"); + assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer"); + ((xrt::ip *)Kernel_) + ->write_register(PhysAddress_ + offset - DeviceOffset_, value); +} + +void XilinxXrtRegion::Write64(size_t offset, uint64_t value) { + POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Writing 64b to region at 0x%zx with " + "offset 0x%zx\n", + PhysAddress_, + PhysAddress_ + offset - DeviceOffset_); + assert(Kernel_ != XRT_NULL_HANDLE && + "No kernel handle; write before mapping?"); + assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer"); + ((xrt::ip *)Kernel_) + ->write_register(PhysAddress_ + offset - DeviceOffset_, value); + ((xrt::ip *)Kernel_) + ->write_register(PhysAddress_ + offset - DeviceOffset_ + 4, value >> 32); +} + +void XilinxXrtRegion::Write16(size_t offset, uint16_t value) { + POCL_MSG_PRINT_ALMAIF_MMAP( + "XRTMMAP: Writing 16b to region at 0x%zx with " + "offset 0x%zx, DeviceOffset 0x%zx and total offset 0x%zx\n", + PhysAddress_, offset, DeviceOffset_, + PhysAddress_ + offset - DeviceOffset_); + assert(Kernel_ != XRT_NULL_HANDLE && + "No kernel handle; write before mapping?"); + assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer"); + + uint32_t old_value = + ((xrt::ip *)Kernel_) + ->read_register(PhysAddress_ + (offset & 0xFFFFFFFC) - DeviceOffset_); + + uint32_t new_value = 0; + if ((offset & 0b10) == 0) { + new_value = (old_value & 0xFFFF0000) | (uint32_t)value; + } else { + new_value = ((uint32_t)value << 16) | (old_value & 0xFFFF); + } + ((xrt::ip *)Kernel_) + ->write_register(PhysAddress_ + (offset & 0xFFFFFFFC) - DeviceOffset_, + new_value); +} + +uint64_t XilinxXrtRegion::Read64(size_t offset) { + POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Reading 64b from region at 0x%zx with " + "offset 0x%zx\n", + PhysAddress_, + PhysAddress_ + offset - DeviceOffset_); + assert(Kernel_ != XRT_NULL_HANDLE && + "No kernel handle; write before mapping?"); + assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer"); + uint32_t value_low = + ((xrt::ip *)Kernel_) + ->read_register(PhysAddress_ + offset - DeviceOffset_); + uint32_t value_high = + ((xrt::ip *)Kernel_) + ->read_register(PhysAddress_ + offset - DeviceOffset_ + 4); + uint64_t value = ((uint64_t)value_high << 32) | value_low; + return value; +} + +void XilinxXrtRegion::CopyToMMAP(size_t destination, const void *source, + size_t bytes) { + auto src = (uint32_t *)source; + size_t offset = destination - PhysAddress_; + POCL_MSG_PRINT_ALMAIF_MMAP( + "XRTMMAP: Writing 0x%zx bytes to buffer at region 0x%zx with " + "address 0x%zx and offset %zx\n", + bytes, PhysAddress_, destination, offset); + assert(offset < Size_ && "Attempt to access data outside XRT memory"); + + assert((offset & 0b11) == 0 && + "Xrt copytommap destination must be 4 byte aligned"); + assert(((size_t)src & 0b11) == 0 && + "Xrt copytommap source must be 4 byte aligned"); + assert((bytes % 4) == 0 && "Xrt copytommap size must be 4 byte multiple"); + + for (size_t i = 0; i < bytes / 4; ++i) { + ((xrt::ip *)Kernel_) + ->write_register(destination + 4 * i - DeviceOffset_, src[i]); + } +} + +void XilinxXrtRegion::CopyFromMMAP(void *destination, size_t source, + size_t bytes) { + auto dst = (uint32_t *)destination; + size_t offset = source - PhysAddress_; + POCL_MSG_PRINT_ALMAIF_MMAP( + "XRTMMAP: Reading 0x%zx bytes from region at 0x%zx " + "with address 0x%zx and offset\n", + bytes, PhysAddress_, source, offset); + assert(offset < Size_ && "Attempt to access data outside XRT memory"); + assert((offset & 0b11) == 0 && + "Xrt copyfrommmap source must be 4 byte aligned"); + + switch (bytes) { + case 1: { + uint32_t value = + ((xrt::ip *)Kernel_)->read_register(source - DeviceOffset_); + *((uint8_t *)destination) = value; + break; + } + case 2: { + uint32_t value = + ((xrt::ip *)Kernel_)->read_register(source - DeviceOffset_); + *((uint16_t *)destination) = value; + break; + } + default: { + assert(((size_t)dst & 0b11) == 0 && + "Xrt copyfrommmap destination must be 4 byte aligned"); + size_t i; + for (i = 0; i < bytes / 4; ++i) { + dst[i] = + ((xrt::ip *)Kernel_)->read_register(source - DeviceOffset_ + 4 * i); + } + if ((bytes % 4) != 0) { + union value { + char bytes[4]; + uint32_t full; + } value1; + value1.full = + ((xrt::ip *)Kernel_)->read_register(source - DeviceOffset_ + 4 * i); + for (int k = 0; k < (bytes % 4); k++) { + dst[i] = value1.bytes[k]; + } + } + } + } +} + +void XilinxXrtRegion::CopyInMem(size_t source, size_t destination, + size_t bytes) { + POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Copying 0x%zx bytes from 0x%zx " + "to 0x%zx\n", + bytes, source, destination); + size_t src_offset = source - PhysAddress_; + size_t dst_offset = destination - PhysAddress_; + assert(src_offset < Size_ && (src_offset + bytes) <= Size_ && + "Attempt to access data outside XRT memory"); + assert(dst_offset < Size_ && (dst_offset + bytes) <= Size_ && + "Attempt to access data outside XRT memory"); + assert((bytes % 4) == 0 && "Xrt copyinmem size must be 4 byte multiple"); + xrt::ip *k = (xrt::ip *)Kernel_; + + for (size_t i = 0; i < bytes / 4; ++i) { + uint32_t m = k->read_register(source - DeviceOffset_ + 4 * i); + k->write_register(destination - DeviceOffset_ + 4 * i, m); + } +} + +void XilinxXrtRegion::setKernelPtr(void *ptr) { Kernel_ = ptr; } diff --git a/lib/CL/devices/almaif/XrtRegion.hh b/lib/CL/devices/almaif/XilinxXrtRegion.hh similarity index 71% rename from lib/CL/devices/almaif/XrtRegion.hh rename to lib/CL/devices/almaif/XilinxXrtRegion.hh index acc585eb32099f29f48c4be76b13a21294964834..d619c2e8c7fae196ceb89ff961836a1dabb1a210 100644 --- a/lib/CL/devices/almaif/XrtRegion.hh +++ b/lib/CL/devices/almaif/XilinxXrtRegion.hh @@ -1,5 +1,4 @@ -/* XrtRegion.hh - basic way of accessing accelerator memory. - * as a memory mapped region +/* XilinxXrtRegion.hh - Access on-chip memory of an XRT device as AlmaIFRegion Copyright (c) 2022 Topi Leppänen / Tampere University @@ -22,8 +21,8 @@ IN THE SOFTWARE. */ -#ifndef XRTREGION_H -#define XRTREGION_H +#ifndef XILINXXRTREGION_H +#define XILINXXRTREGION_H #include <stdlib.h> @@ -31,24 +30,32 @@ #include "AlmaIFRegion.hh" -class XrtRegion : public AlmaIFRegion { +// Uses XRT's kernel IP API to abstract the on-chip memory +// of an Almaif device +class XilinxXrtRegion : public AlmaIFRegion { public: - XrtRegion(size_t Address, size_t RegionSize, void *kernel); - XrtRegion(size_t Address, size_t RegionSize, void *kernel, char *init_file); + XilinxXrtRegion(size_t Address, size_t RegionSize, void *kernel, + size_t DeviceOffset); + XilinxXrtRegion(size_t Address, size_t RegionSize, void *kernel, + const std::string &init_file, size_t DeviceOffset); uint32_t Read32(size_t offset) override; void Write32(size_t offset, uint32_t value) override; void Write16(size_t offset, uint16_t value) override; uint64_t Read64(size_t offset) override; + void Write64(size_t offset, uint64_t value) override; void CopyToMMAP(size_t destination, const void *source, size_t bytes) override; void CopyFromMMAP(void *destination, size_t source, size_t bytes) override; void CopyInMem(size_t source, size_t destination, size_t bytes) override; + void initRegion(const std::string &init_file); + void setKernelPtr(void *ptr); + private: - void *Kernel; - void *DeviceHandle; + void *Kernel_; + size_t DeviceOffset_; }; #endif diff --git a/lib/CL/devices/almaif/XrtDevice.cc b/lib/CL/devices/almaif/XrtDevice.cc deleted file mode 100644 index 0cde026ea9f7e6200b22466f82c96fb3019de6c3..0000000000000000000000000000000000000000 --- a/lib/CL/devices/almaif/XrtDevice.cc +++ /dev/null @@ -1,75 +0,0 @@ -/* XrtDevice.cc - accessing accelerator memory as memory mapped region. - - Copyright (c) 2022 Topi Leppänen / Tampere University - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to - deal in the Software without restriction, including without limitation the - rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - sell copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - IN THE SOFTWARE. -*/ - -#include "XrtDevice.hh" - -#include "XrtRegion.hh" -#include "AlmaifShared.hh" - -#include "experimental/xrt_kernel.h" - -XrtDevice::XrtDevice(char *xrt_kernel_name) { - - unsigned xclbin_char_length = strlen(xrt_kernel_name) + 8; - char *xclbin_char = (char *)malloc(xclbin_char_length); - assert(xclbin_char); - snprintf(xclbin_char, xclbin_char_length, "%s.xclbin", xrt_kernel_name); - - // TODO: Fix the case when the kernel name contains a path - // Needs to tokenize the last part of the path and use that - // as the kernel name - unsigned xrt_kernel_name_length = 2 * strlen(xrt_kernel_name) + 6; - char *xrt_kernel_name = (char *)malloc(xrt_kernel_name_length); - assert(xrt_kernel_name); - snprintf(xrt_kernel_name, xrt_kernel_name_length, "%s:{%s_1}", - xrt_kernel_name, xrt_kernel_name); - - auto devicehandle = new xrt::device(0); - assert(devicehandle != NULL && "devicehandle null\n"); - - auto uuid = devicehandle->load_xclbin(xclbin_char); - auto kernel = new xrt::kernel(*devicehandle, uuid, xrt_kernel_name, - xrt::kernel::cu_access_mode::exclusive); - - free(xclbin_char); - free(xrt_kernel_name); - - assert(kernel != XRT_NULL_HANDLE && - "xrtKernelHandle NULL, is the kernel opened properly?"); - - Kernel = (void *)kernel; - DeviceHandle = (void *)devicehandle; - - ControlMemory = new XrtRegion(0, ALMAIF_DEFAULT_CTRL_SIZE, Kernel); - - discoverDeviceParameters(); - - InstructionMemory = new XrtRegion(imem_start, imem_size, Kernel); - CQMemory = new XrtRegion(cq_start, cq_size, Kernel); - DataMemory = new XrtRegion(dmem_start, dmem_size, Kernel); -} - -XrtDevice::~XrtDevice() { - delete ((xrt::kernel *)Kernel); - delete ((xrt::device *)DeviceHandle); -} diff --git a/lib/CL/devices/almaif/XrtDevice.hh b/lib/CL/devices/almaif/XrtDevice.hh deleted file mode 100644 index 110b8ab11ca79ffd17e2c5316a1b823cb35f41f9..0000000000000000000000000000000000000000 --- a/lib/CL/devices/almaif/XrtDevice.hh +++ /dev/null @@ -1,40 +0,0 @@ -/* XrtDevice.hh - basic way of accessing accelerator memory. - * as a memory mapped region - - Copyright (c) 2022 Topi Leppänen / Tampere University - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to - deal in the Software without restriction, including without limitation the - rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - sell copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - IN THE SOFTWARE. -*/ - -#ifndef XrtDevice_H -#define XrtDevice_H - -#include "AlmaIFDevice.hh" - -class XrtDevice : public AlmaIFDevice { -public: - XrtDevice(char *xrt_kernel_name); - ~XrtDevice() override; - -private: - void *Kernel; - void *DeviceHandle; -}; - -#endif diff --git a/lib/CL/devices/almaif/XrtRegion.cc b/lib/CL/devices/almaif/XrtRegion.cc deleted file mode 100644 index e8a59402a83ede075ff4f5ad11a1ff5447e8dc8c..0000000000000000000000000000000000000000 --- a/lib/CL/devices/almaif/XrtRegion.cc +++ /dev/null @@ -1,184 +0,0 @@ -/* XrtRegion.cc - accessing accelerator memory as memory mapped region. - - Copyright (c) 2022 Topi Leppänen / Tampere University - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to - deal in the Software without restriction, including without limitation the - rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - sell copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - IN THE SOFTWARE. -*/ - -#include <assert.h> -#include <stdlib.h> -#include <unistd.h> -//#include <stdio.h> -#include <fstream> - -#include "experimental/xrt_kernel.h" - -#include "XrtRegion.hh" -#include "pocl_util.h" - -XrtRegion::XrtRegion(size_t Address, size_t RegionSize, void *kernel) { - - POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Initializing XrtRegion with Address %zu " - "and Size %zu and kernel %p\n", - Address, RegionSize, kernel); - PhysAddress = Address; - Size = RegionSize; - Kernel = kernel; - assert(Kernel != XRT_NULL_HANDLE && - "xrtKernelHandle NULL, is the kernel opened properly?"); -} - -XrtRegion::XrtRegion(size_t Address, size_t RegionSize, void *kernel, - char *init_file) - : XrtRegion(Address, RegionSize, kernel) { - - if (RegionSize == 0) { - return; // don't try to write to empty region - } - POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Initializing XrtRegion with file %s\n", - init_file); - std::ifstream inFile; - inFile.open(init_file, std::ios::binary); - unsigned int current; - int i = 0; - while (inFile.good()) { - inFile.read(reinterpret_cast<char *>(¤t), sizeof(current)); - - ((xrt::kernel *)Kernel)->write_register(Address + i, current); - i += 4; - } - - POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Initialized region with %i bytes \n", - i - 4); -} - -uint32_t XrtRegion::Read32(size_t offset) { - POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Reading from physical address 0x%zx with " - "offset 0x%zx\n", - PhysAddress, offset); - assert(Kernel != XRT_NULL_HANDLE && "No kernel handle; read before mapping?"); - assert(offset < Size && "Attempt to access data outside MMAP'd buffer"); - uint32_t value = ((xrt::kernel *)Kernel)->read_register(PhysAddress + offset); - return value; -} - -void XrtRegion::Write32(size_t offset, uint32_t value) { - POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Writing to physical address 0x%zx with " - "offset 0x%zx\n", - PhysAddress, offset); - assert(Kernel != XRT_NULL_HANDLE && - "No kernel handle; write before mapping?"); - assert(offset < Size && "Attempt to access data outside MMAP'd buffer"); - ((xrt::kernel *)Kernel)->write_register(PhysAddress + offset, value); -} - -void XrtRegion::Write16(size_t offset, uint16_t value) { - POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Writing to physical address 0x%zx with " - "offset 0x%zx\n", - PhysAddress, offset); - assert(Kernel != XRT_NULL_HANDLE && - "No kernel handle; write before mapping?"); - assert(offset < Size && "Attempt to access data outside MMAP'd buffer"); - - uint32_t old_value = ((xrt::kernel *)Kernel) - ->read_register(PhysAddress + (offset & 0xFFFFFFFC)); - - uint32_t new_value = 0; - if ((offset & 0b10) == 0) { - new_value = (old_value & 0xFFFF0000) | (uint32_t)value; - } else { - new_value = ((uint32_t)value << 16) | (old_value & 0xFFFF); - } - ((xrt::kernel *)Kernel) - ->write_register(PhysAddress + (offset & 0xFFFFFFFC), new_value); -} - -uint64_t XrtRegion::Read64(size_t offset) { - POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Reading from physical address 0x%zx with " - "offset 0x%zx\n", - PhysAddress, offset); - assert(Kernel != XRT_NULL_HANDLE && - "No kernel handle; write before mapping?"); - assert(offset < Size && "Attempt to access data outside MMAP'd buffer"); - uint32_t value_low = - ((xrt::kernel *)Kernel)->read_register(PhysAddress + offset); - uint32_t value_high = - ((xrt::kernel *)Kernel)->read_register(PhysAddress + offset + 4); - uint64_t value = ((uint64_t)value_high << 32) | value_low; - return value; -} - -void XrtRegion::CopyToMMAP(size_t destination, const void *source, - size_t bytes) { - POCL_MSG_PRINT_ALMAIF_MMAP( - "XRTMMAP: Writing 0x%zx bytes to buffer at 0x%zx with " - "address 0x%zx\n", - bytes, PhysAddress, destination); - auto src = (uint32_t *)source; - size_t offset = destination - PhysAddress; - assert(offset < Size && "Attempt to access data outside XRT memory"); - - assert((offset & 0b11) == 0 && - "Xrt copytommap destination must be 4 byte aligned"); - assert(((size_t)src & 0b11) == 0 && - "Xrt copytommap source must be 4 byte aligned"); - assert((bytes % 4) == 0 && "Xrt copytommap size must be 4 byte multiple"); - - for (size_t i = 0; i < bytes / 4; ++i) { - ((xrt::kernel *)Kernel)->write_register(destination + 4 * i, src[i]); - } -} - -void XrtRegion::CopyFromMMAP(void *destination, size_t source, size_t bytes) { - POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Reading 0x%zx bytes from buffer at 0x%zx " - "with address 0x%zx\n", - bytes, PhysAddress, source); - auto dst = (uint32_t *)destination; - size_t offset = source - PhysAddress; - assert(offset < Size && "Attempt to access data outside XRT memory"); - - assert((offset & 0b11) == 0 && - "Xrt copyfrommmap source must be 4 byte aligned"); - assert(((size_t)dst & 0b11) == 0 && - "Xrt copyfrommmap destination must be 4 byte aligned"); - assert((bytes % 4) == 0 && "Xrt copyfrommmap size must be 4 byte multiple"); - - for (size_t i = 0; i < bytes / 4; ++i) { - dst[i] = ((xrt::kernel *)Kernel)->read_register(source + 4 * i); - } -} - -void XrtRegion::CopyInMem(size_t source, size_t destination, size_t bytes) { - POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Copying 0x%zx bytes from 0x%zx " - "to 0x%zx\n", - bytes, source, destination); - size_t src_offset = source - PhysAddress; - size_t dst_offset = destination - PhysAddress; - assert(src_offset < Size && (src_offset + bytes) <= Size && - "Attempt to access data outside XRT memory"); - assert(dst_offset < Size && (dst_offset + bytes) <= Size && - "Attempt to access data outside XRT memory"); - assert((bytes % 4) == 0 && "Xrt copyinmem size must be 4 byte multiple"); - xrt::kernel *k = (xrt::kernel *)Kernel; - - for (size_t i = 0; i < bytes / 4; ++i) { - uint32_t m = k->read_register(source + 4 * i); - k->write_register(destination + 4 * i, m); - } -} diff --git a/lib/CL/devices/almaif/almaif.cc b/lib/CL/devices/almaif/almaif.cc index dbac35348e31ffc01a5e4ad5ac935f737074a3ef..14703511281a462792d389fe515a37e03449f484 100644 --- a/lib/CL/devices/almaif/almaif.cc +++ b/lib/CL/devices/almaif/almaif.cc @@ -27,9 +27,17 @@ #include "AlmaIFRegion.hh" #include "MMAPDevice.hh" #include "config.h" + #ifdef HAVE_XRT -#include "XrtDevice.hh" +#include "XilinxXrtDevice.hh" +#define HAVE_DBDEVICE +#endif + +#ifdef HAVE_DBDEVICE +#include "AlmaifDB/AlmaIFBitstreamDatabaseManager.hh" +#include "AlmaifDB/DBDevice.hh" #endif + #include "EmulationDevice.hh" #ifdef TCE_AVAILABLE @@ -42,7 +50,7 @@ #include "common.h" #include "common_driver.h" #include "devices.h" -#include "openasip/AlmaifCompileTCE.hh" +#include "openasip/AlmaifCompileOpenasip.hh" #include "pocl_cl.h" #include "pocl_timing.h" #include "pocl_util.h" @@ -131,21 +139,19 @@ void pocl_almaif_init_device_ops(struct pocl_device_ops *ops) { void pocl_almaif_write(void *data, const void *__restrict__ src_host_ptr, pocl_mem_identifier *dst_mem_id, cl_mem dst_buf, size_t offset, size_t size) { - chunk_info_t *chunk = (chunk_info_t *)dst_mem_id->mem_ptr; - size_t dst = chunk->start_address + offset; AlmaifData *d = (AlmaifData *)data; - d->Dev->writeDataToDevice(dst, (const char *__restrict)src_host_ptr, size); + d->Dev->writeDataToDevice(dst_mem_id, (const char *__restrict)src_host_ptr, + size, offset); } void pocl_almaif_read(void *data, void *__restrict__ dst_host_ptr, pocl_mem_identifier *src_mem_id, cl_mem src_buf, size_t offset, size_t size) { - chunk_info_t *chunk = (chunk_info_t *)src_mem_id->mem_ptr; - size_t src = chunk->start_address + offset; AlmaifData *d = (AlmaifData *)data; - d->Dev->readDataFromDevice((char *__restrict__)dst_host_ptr, src, size); + d->Dev->readDataFromDevice((char *__restrict__)dst_host_ptr, src_mem_id, size, + offset); } void pocl_almaif_copy(void *data, pocl_mem_identifier *dst_mem_id, @@ -157,6 +163,9 @@ void pocl_almaif_copy(void *data, pocl_mem_identifier *dst_mem_id, chunk_info_t *dst_chunk = (chunk_info_t *)dst_mem_id->mem_ptr; size_t src = src_chunk->start_address + src_offset; size_t dst = dst_chunk->start_address + dst_offset; + if (src == dst) { + return; + } AlmaifData *d = (AlmaifData *)data; if (d->Dev->DataMemory->isInRange(dst)) { @@ -195,41 +204,24 @@ cl_int pocl_almaif_alloc_mem_obj(cl_device_id device, cl_mem mem_obj, void *host_ptr) { AlmaifData *data = (AlmaifData *)device->data; - pocl_mem_identifier *p = &mem_obj->device_ptrs[device->global_mem_id]; - assert(p->mem_ptr == NULL); - chunk_info_t *chunk = NULL; /* almaif driver doesn't preallocate */ if ((mem_obj->flags & CL_MEM_ALLOC_HOST_PTR) && (mem_obj->mem_host_ptr == NULL)) return CL_MEM_OBJECT_ALLOCATION_FAILURE; - chunk = pocl_alloc_buffer(data->Dev->AllocRegions, mem_obj->size); - if (chunk == NULL) - return CL_MEM_OBJECT_ALLOCATION_FAILURE; - - POCL_MSG_PRINT_MEMORY("almaif: allocated %zu bytes from 0x%zx\n", - mem_obj->size, chunk->start_address); - - p->mem_ptr = chunk; - p->version = 0; + pocl_mem_identifier *p = &mem_obj->device_ptrs[device->global_mem_id]; + cl_int alloc_success = data->Dev->allocateBuffer(p, mem_obj->size); - return CL_SUCCESS; + return alloc_success; } void pocl_almaif_free(cl_device_id device, cl_mem mem) { pocl_mem_identifier *p = &mem->device_ptrs[device->global_mem_id]; - //AlmaifData *data = (AlmaifData *)device->data; - - chunk_info_t *chunk = - (chunk_info_t *)p->mem_ptr; - - POCL_MSG_PRINT_MEMORY("almaif: freed %zu bytes from 0x%zx\n", mem->size, - chunk->start_address); + AlmaifData *data = (AlmaifData *)device->data; - assert(chunk != NULL); - pocl_free_chunk(chunk); + data->Dev->freeBuffer(p); p->mem_ptr = NULL; p->version = 0; @@ -264,9 +256,9 @@ cl_int pocl_almaif_init(unsigned j, cl_device_id dev, const char *parameters) { SETUP_DEVICE_CL_VERSION(1, 2); dev->type = CL_DEVICE_TYPE_CUSTOM; dev->long_name = (char *)"memory mapped custom device"; + dev->short_name = "almaif"; dev->vendor = "pocl"; dev->version = "1.2"; - dev->available = CL_TRUE; dev->extensions = ""; dev->profile = "FULL_PROFILE"; @@ -305,6 +297,8 @@ cl_int pocl_almaif_init(unsigned j, cl_device_id dev, const char *parameters) { dev->preferred_wg_size_multiple = 8; AlmaifData *D = new AlmaifData; + D->Available = CL_TRUE; + dev->available = &(D->Available); dev->data = (void *)D; char *scanParams; @@ -322,74 +316,51 @@ cl_int pocl_almaif_init(unsigned j, cl_device_id dev, const char *parameters) { D->BaseAddress = strtoull(paramToken, NULL, 0); std::string supportedList; - char *device_init_file = NULL; - if (D->BaseAddress != 0xE) { + std::string device_init_file = ""; + if (D->BaseAddress != POCL_ALMAIFDEVICE_EMULATION) { paramToken = strtok_r(NULL, ",", &savePtr); assert(paramToken); - device_init_file = (char *)malloc(strlen(paramToken) + 1); - assert(device_init_file); - strcpy(device_init_file, paramToken); - POCL_MSG_PRINT_ALMAIF("Enabling device with device init file name %s", - device_init_file); + device_init_file = paramToken; + POCL_MSG_PRINT_ALMAIF("Enabling device with device init file name %s\n", + device_init_file.c_str()); } bool enable_compilation = false; - while ((paramToken = strtok_r(NULL, ",", &savePtr))) { - auto token = strtoul(paramToken, NULL, 0); - BuiltinKernelId kernelId = static_cast<BuiltinKernelId>(token); - - bool found = false; - for (size_t i = 0; i < BIKERNELS; ++i) { - if (pocl_BIDescriptors[i].KernelId == kernelId) { - if (supportedList.size() > 0) - supportedList += ";"; - supportedList += pocl_BIDescriptors[i].name; - D->SupportedKernels.insert(&pocl_BIDescriptors[i]); - found = true; - break; - } - } - if (kernelId == POCL_CDBI_JIT_COMPILER) { - enable_compilation = true; - } else if (!found) { - POCL_ABORT("almaif: Unknown Kernel ID (%lu) given\n", token); - } - } - free(scanParams); - // almaif devices are little endian by default, but the emulation device is // host dependant - dev->endian_little = D->BaseAddress == 0xE ? !(WORDS_BIGENDIAN) : CL_TRUE; - if (D->BaseAddress == 0xE) { + dev->endian_little = D->BaseAddress == POCL_ALMAIFDEVICE_EMULATION + ? !(WORDS_BIGENDIAN) + : CL_TRUE; + if (D->BaseAddress == POCL_ALMAIFDEVICE_EMULATION) { dev->long_name = (char *)"almaif emulation device"; } - dev->builtin_kernel_list = strdup(supportedList.c_str()); - dev->num_builtin_kernels = D->SupportedKernels.size(); - pocl_setup_builtin_kernels_with_version(dev); - if (!pocl_offline_compile) { - POCL_MSG_PRINT_ALMAIF( - "almaif: accelerator at 0x%zx with %zu builtin kernels (%s)\n", - D->BaseAddress, D->SupportedKernels.size(), dev->builtin_kernel_list); // Recognize whether we are emulating or not - if (D->BaseAddress == EMULATING_ADDRESS) { + if (D->BaseAddress == POCL_ALMAIFDEVICE_EMULATION) { D->Dev = new EmulationDevice(); - } + } else if (D->BaseAddress == POCL_ALMAIFDEVICE_XRT) { #ifdef HAVE_XRT - else if (D->BaseAddress == 0xA) { - D->Dev = new XrtDevice(device_init_file); - } + D->Dev = new XilinxXrtDevice(device_init_file, j); +#else + POCL_ABORT( + "Almaif: tried enabling XilinxXrtDevice but it's not available\n"); #endif - else if (D->BaseAddress == 0xB) { + } else if (D->BaseAddress == POCL_ALMAIFDEVICE_TTASIM) { #ifdef TCE_AVAILABLE D->Dev = new TTASimDevice(device_init_file); enable_compilation = true; #else POCL_ABORT("almaif: Tried enabling TTASim device, but it's not available. " "Did you set ENABLE_TCE=1?\n"); +#endif + } else if (D->BaseAddress == POCL_ALMAIFDEVICE_BITSTREAMDATABASE) { +#ifdef HAVE_DBDEVICE + D->Dev = new DBDevice(device_init_file); +#else + POCL_ABORT("Almaif: tried enabling DBDevice but it's not available\n"); #endif } else { D->Dev = new MMAPDevice(D->BaseAddress, device_init_file); @@ -405,26 +376,81 @@ cl_int pocl_almaif_init(unsigned j, cl_device_id dev, const char *parameters) { } POCL_UNLOCK(globalMemIDLock); } - dev->global_mem_size = D->Dev->DataMemory->Size; + dev->global_mem_size = D->Dev->DataMemory->Size(); if (D->Dev->ExternalMemory != nullptr && - D->Dev->ExternalMemory->Size > D->Dev->DataMemory->Size) - dev->global_mem_size = D->Dev->ExternalMemory->Size; + D->Dev->ExternalMemory->Size() > D->Dev->DataMemory->Size()) + dev->global_mem_size = D->Dev->ExternalMemory->Size(); } else { POCL_MSG_PRINT_ALMAIF( "Starting offline compilation device initialization\n"); } + if (D->Dev->isDBDevice()) { +#ifdef HAVE_DBDEVICE + std::vector<BuiltinKernelId> bik_list = + ((DBDevice *)(D->Dev))->supportedBuiltinKernels(); + + for (const BuiltinKernelId &kernelId : bik_list) { + + bool found = false; + for (size_t i = 0; i < BIKERNELS; ++i) { + if (pocl_BIDescriptors[i].KernelId == kernelId) { + if (supportedList.size() > 0) + supportedList += ";"; + supportedList += pocl_BIDescriptors[i].name; + D->SupportedKernels.insert(&pocl_BIDescriptors[i]); + found = true; + break; + } + } + if (kernelId == POCL_CDBI_JIT_COMPILER) { + enable_compilation = true; + } else if (!found) { + POCL_ABORT("almaif: Unknown Kernel ID (%lu) coming from database\n", + kernelId); + } + } +#endif + } else { + while ((paramToken = strtok_r(NULL, ",", &savePtr))) { + auto token = strtoul(paramToken, NULL, 0); + BuiltinKernelId kernelId = static_cast<BuiltinKernelId>(token); + + bool found = false; + for (size_t i = 0; i < BIKERNELS; ++i) { + if (pocl_BIDescriptors[i].KernelId == kernelId) { + if (supportedList.size() > 0) + supportedList += ";"; + supportedList += pocl_BIDescriptors[i].name; + D->SupportedKernels.insert(&pocl_BIDescriptors[i]); + found = true; + break; + } + } + if (kernelId == POCL_CDBI_JIT_COMPILER) { + enable_compilation = true; + } else if (!found) { + POCL_ABORT("almaif: Unknown Kernel ID (%lu) given\n", token); + } + } + } + + dev->builtin_kernel_list = strdup(supportedList.c_str()); + dev->num_builtin_kernels = D->SupportedKernels.size(); + pocl_setup_builtin_kernels_with_version(dev); + POCL_MSG_PRINT_ALMAIF( + "almaif: accelerator at 0x%zx with %zu builtin kernels (%s)\n", + D->BaseAddress, D->SupportedKernels.size(), dev->builtin_kernel_list); + + free(scanParams); + if (enable_compilation) { dev->compiler_available = CL_TRUE; dev->linker_available = CL_TRUE; - unsigned adf_file_length = strlen(device_init_file) + 5; - char *adf_file = (char *)malloc(adf_file_length); - assert(adf_file); - snprintf(adf_file, adf_file_length, "%s.adf", device_init_file); + std::string adf_file = device_init_file + ".adf"; pocl_almaif_compile_init(j, dev, adf_file); - free(adf_file); } else { D->compilationData = NULL; @@ -432,26 +458,45 @@ cl_int pocl_almaif_init(unsigned j, cl_device_id dev, const char *parameters) { dev->linker_available = CL_FALSE; } - free(device_init_file); + dev->device_side_printf = 1; + dev->printf_buffer_size = PRINTF_BUFFER_SIZE / 4; + chunk_info_t *chunk = NULL; + chunk = pocl_alloc_buffer(D->Dev->AllocRegions, dev->printf_buffer_size); + if (chunk == NULL) { + POCL_MSG_WARN("Almaif: Can't allocate %d bytes for printf buffer\n", + dev->printf_buffer_size); + dev->device_side_printf = 0; + } else { + POCL_MSG_PRINT_ALMAIF("Allocated printf buffer of size %d from %d\n", + dev->printf_buffer_size, chunk->start_address); + D->PrintfBuffer = chunk; + + D->PrintfPosition = pocl_alloc_buffer(D->Dev->AllocRegions, 4); + if (D->PrintfPosition == NULL) { + POCL_ABORT("Almaif: Can't allocate 4 bytes for printf index\n"); + } + } POCL_MSG_PRINT_ALMAIF("almaif: mmap done\n"); if (pocl_offline_compile) { std::cout << "Offline compilation device initialized" << std::endl; return CL_SUCCESS; } - for (unsigned i = 0; i < (D->Dev->DataMemory->Size >> 2); i++) { + for (unsigned i = 0; i < (D->Dev->DataMemory->Size() >> 2); i++) { // D->Dev->DataMemory->Write32(4 * i, 0); } - for (unsigned i = 0; i < (D->Dev->CQMemory->Size >> 2); i++) { + for (unsigned i = 0; i < (D->Dev->CQMemory->Size() >> 2); i++) { // D->Dev->CQMemory->Write32(4 * i, 0); } // Initialize AQL queue by setting all headers to invalid POCL_MSG_PRINT_ALMAIF("Initializing AQL Packet cqmemory size=%zu\n", - D->Dev->CQMemory->Size); - for (uint32_t i = AQL_PACKET_LENGTH; i < D->Dev->CQMemory->Size; + D->Dev->CQMemory->Size()); + for (uint32_t i = AQL_PACKET_LENGTH; i < D->Dev->CQMemory->Size(); i += AQL_PACKET_LENGTH) { D->Dev->CQMemory->Write16(i, AQL_PACKET_INVALID); } + D->Dev->CQMemory->Write32(ALMAIF_CQ_WRITE, 0); + D->Dev->CQMemory->Write32(ALMAIF_CQ_READ, 0); #ifdef ALMAIF_DUMP_MEMORY POCL_MSG_PRINT_ALMAIF("INIT MEMORY DUMP\n"); @@ -475,7 +520,7 @@ cl_int pocl_almaif_init(unsigned j, cl_device_id dev, const char *parameters) { runningDeviceCount++; POCL_UNLOCK(runningDeviceLock); - if (D->BaseAddress == EMULATING_ADDRESS) { + if (D->BaseAddress == POCL_ALMAIFDEVICE_EMULATION) { POCL_MSG_PRINT_ALMAIF("Custom emulation device %d initialized \n", j); } else { POCL_MSG_PRINT_ALMAIF("Custom device %d initialized \n", j); @@ -539,7 +584,7 @@ void pocl_almaif_update_event(cl_device_id device, cl_event event) { assert(ed); size_t commandMetaAddress = ed->chunk->start_address; assert(commandMetaAddress); - commandMetaAddress -= D->Dev->DataMemory->PhysAddress; + commandMetaAddress -= D->Dev->DataMemory->PhysAddress(); timestamp.u32.a = D->Dev->DataMemory->Read32( commandMetaAddress + offsetof(CommandMetadata, start_timestamp)); @@ -570,20 +615,22 @@ void pocl_almaif_update_event(cl_device_id device, cl_event event) { } if (device->device_side_printf) { - chunk_info_t *printf_buffer_chunk = (chunk_info_t *)D->printf_buffer; - assert(printf_buffer_chunk); - chunk_info_t *printf_position_chunk = - (chunk_info_t *)D->printf_position; - assert(printf_position_chunk); - unsigned position = 0; - D->Dev->readDataFromDevice((char *)&position, - printf_position_chunk->start_address, 4); + chunk_info_t *PrintfBufferChunk = (chunk_info_t *)D->PrintfBuffer; + assert(PrintfBufferChunk); + chunk_info_t *PrintfPositionChunk = (chunk_info_t *)D->PrintfPosition; + assert(PrintfPositionChunk); + unsigned position = + D->Dev->DataMemory->Read32(PrintfPositionChunk->start_address - + D->Dev->DataMemory->PhysAddress()); POCL_MSG_PRINT_ALMAIF( "Device wrote %u bytes to stdout. Printing them now:\n", position); if (position > 0) { char *tmp_printf_buf = (char *)malloc(position); - D->Dev->readDataFromDevice( - tmp_printf_buf, printf_buffer_chunk->start_address, position); + D->Dev->DataMemory->CopyFromMMAP( + tmp_printf_buf, PrintfBufferChunk->start_address, position); + D->Dev->DataMemory->Write32(PrintfPositionChunk->start_address - + D->Dev->DataMemory->PhysAddress(), + 0); write(STDOUT_FILENO, tmp_printf_buf, position); free(tmp_printf_buf); } @@ -786,6 +833,14 @@ void scheduleNDRange(AlmaifData *data, _cl_command_node *cmd, size_t arg_size, break; } } +#ifdef HAVE_DBDEVICE + if (data->Dev->isDBDevice()) { + ((DBDevice *)(data->Dev)) + ->programBIKernelBitstream((BuiltinKernelId)kernelID); + ((DBDevice *)(data->Dev)) + ->programBIKernelFirmware((BuiltinKernelId)kernelID); + } +#endif if (kernelID == -1) { if (data->compilationData == NULL) { @@ -824,7 +879,23 @@ void scheduleNDRange(AlmaifData *data, _cl_command_node *cmd, size_t arg_size, // clear the timestamps and initial signal value for (unsigned offset = 0; offset < sizeof(CommandMetadata); offset += 4) data->Dev->DataMemory->Write32( - commandMetaAddress - data->Dev->DataMemory->PhysAddress + offset, 0); + commandMetaAddress - data->Dev->DataMemory->PhysAddress() + offset, 0); + if (cmd->device->device_side_printf) { + data->Dev->DataMemory->Write32( + commandMetaAddress - data->Dev->DataMemory->PhysAddress() + + offsetof(CommandMetadata, reserved0), + ((chunk_info_t *)data->PrintfBuffer)->start_address); + data->Dev->DataMemory->Write32(commandMetaAddress - + data->Dev->DataMemory->PhysAddress() + + offsetof(CommandMetadata, reserved1), + cmd->device->printf_buffer_size); + + data->Dev->DataMemory->Write32( + commandMetaAddress - data->Dev->DataMemory->PhysAddress() + + offsetof(CommandMetadata, reserved1) + 4, + ((chunk_info_t *)data->PrintfPosition)->start_address); + } + // Set arguments data->Dev->DataMemory->CopyToMMAP(argsAddress, arguments, arg_size); @@ -862,19 +933,19 @@ void scheduleNDRange(AlmaifData *data, _cl_command_node *cmd, size_t arg_size, pc.global_var_buffer = 0; if (cmd->device->device_side_printf) { - pc.printf_buffer = ((chunk_info_t *)data->printf_buffer)->start_address; + pc.printf_buffer = ((chunk_info_t *)data->PrintfBuffer)->start_address; pc.printf_buffer_capacity = cmd->device->printf_buffer_size; assert(pc.printf_buffer_capacity); pc.printf_buffer_position = - ((chunk_info_t *)data->printf_position)->start_address; + ((chunk_info_t *)data->PrintfPosition)->start_address; POCL_MSG_PRINT_ALMAIF( "Device side printf buffer=%d, position: %d and capacity %d \n", pc.printf_buffer, pc.printf_buffer_position, pc.printf_buffer_capacity); data->Dev->DataMemory->Write32( - pc.printf_buffer_position - data->Dev->DataMemory->PhysAddress, 0); + pc.printf_buffer_position - data->Dev->DataMemory->PhysAddress(), 0); } size_t pc_start_addr = data->compilationData->pocl_context->start_address; @@ -882,7 +953,7 @@ void scheduleNDRange(AlmaifData *data, _cl_command_node *cmd, size_t arg_size, sizeof(pocl_context32)); if (data->Dev->RelativeAddressing) { - pc_start_addr -= data->Dev->DataMemory->PhysAddress; + pc_start_addr -= data->Dev->DataMemory->PhysAddress(); } packet.reserved = pc_start_addr; @@ -895,9 +966,9 @@ void scheduleNDRange(AlmaifData *data, _cl_command_node *cmd, size_t arg_size, } if (data->Dev->RelativeAddressing) { - packet.kernarg_address = argsAddress - data->Dev->DataMemory->PhysAddress; + packet.kernarg_address = argsAddress - data->Dev->DataMemory->PhysAddress(); packet.command_meta_address = - commandMetaAddress - data->Dev->DataMemory->PhysAddress; + commandMetaAddress - data->Dev->DataMemory->PhysAddress(); } else { packet.kernarg_address = argsAddress; packet.command_meta_address = commandMetaAddress; @@ -908,20 +979,24 @@ void scheduleNDRange(AlmaifData *data, _cl_command_node *cmd, size_t arg_size, packet.kernarg_address, packet.command_meta_address); POCL_LOCK(data->AQLQueueLock); - uint32_t queue_length = data->Dev->CQMemory->Size / AQL_PACKET_LENGTH - 1; + uint32_t queue_length = data->Dev->CQMemory->Size() / AQL_PACKET_LENGTH - 1; uint32_t write_iter = data->Dev->CQMemory->Read32(ALMAIF_CQ_WRITE); uint32_t read_iter = data->Dev->CQMemory->Read32(ALMAIF_CQ_READ); while (write_iter >= read_iter + queue_length) { - // POCL_MSG_PRINT_ALMAIF("write_iter=%u, read_iter=%u length=%u", write_iter, - // read_iter, queue_length); + POCL_MSG_PRINT_ALMAIF("write_iter=%u, read_iter=%u length=%u", write_iter, + read_iter, queue_length); usleep(ALMAIF_DRIVER_SLEEP); read_iter = data->Dev->CQMemory->Read32(ALMAIF_CQ_READ); +#ifdef ALMAIF_DUMP_MEMORY + POCL_MSG_PRINT_ALMAIF("WAITING FOR CQMEMORY TO EMPTY DUMP\n"); + data->Dev->printMemoryDump(); +#endif } uint32_t packet_loc = (write_iter % queue_length) * AQL_PACKET_LENGTH + AQL_PACKET_LENGTH; - data->Dev->CQMemory->CopyToMMAP(packet_loc + data->Dev->CQMemory->PhysAddress, - &packet, 64); + data->Dev->CQMemory->CopyToMMAP( + packet_loc + data->Dev->CQMemory->PhysAddress(), &packet, 64); #ifdef ALMAIF_DUMP_MEMORY POCL_MSG_PRINT_ALMAIF("PRELAUNCH MEMORY DUMP\n"); @@ -956,7 +1031,7 @@ bool isEventDone(AlmaifData *data, cl_event event) { assert(commandMetaAddress); size_t signalAddress = commandMetaAddress + offsetof(CommandMetadata, completion_signal); - signalAddress -= data->Dev->DataMemory->PhysAddress; + signalAddress -= data->Dev->DataMemory->PhysAddress(); uint32_t status = data->Dev->DataMemory->Read32(signalAddress); @@ -1049,19 +1124,19 @@ void submit_and_barrier(AlmaifData *D, _cl_command_node *cmd) { packet.signal_count = i + 1; POCL_LOCK(D->AQLQueueLock); - uint32_t queue_length = D->Dev->CQMemory->Size / AQL_PACKET_LENGTH - 1; + uint32_t queue_length = D->Dev->CQMemory->Size() / AQL_PACKET_LENGTH - 1; uint32_t write_iter = D->Dev->CQMemory->Read32(ALMAIF_CQ_WRITE); uint32_t read_iter = D->Dev->CQMemory->Read32(ALMAIF_CQ_READ); while (write_iter >= read_iter + queue_length) { - // POCL_MSG_PRINT_ALMAIF("write_iter=%u, read_iter=%u length=%u", - // write_iter, read_iter, queue_length); + POCL_MSG_PRINT_ALMAIF("write_iter=%u, read_iter=%u length=%u", write_iter, + read_iter, queue_length); read_iter = D->Dev->CQMemory->Read32(ALMAIF_CQ_READ); usleep(ALMAIF_DRIVER_SLEEP); } uint32_t packet_loc = (write_iter % queue_length) * AQL_PACKET_LENGTH + AQL_PACKET_LENGTH; - D->Dev->CQMemory->CopyToMMAP(packet_loc + D->Dev->CQMemory->PhysAddress, + D->Dev->CQMemory->CopyToMMAP(packet_loc + D->Dev->CQMemory->PhysAddress(), &packet, 64); D->Dev->CQMemory->Write16(packet_loc, (1 << AQL_PACKET_BARRIER_AND) | @@ -1120,15 +1195,14 @@ void submit_kernel_packet(AlmaifData *D, _cl_command_node *cmd) { // almaif doesn't support SVM pointers assert(al->is_svm == 0); cl_mem m = (*(cl_mem *)(al->value)); - auto chunk = - (chunk_info_t *)m->device_ptrs[cmd->device->global_mem_id].mem_ptr; - size_t buffer = (size_t)chunk->start_address; + size_t buffer = D->Dev->pointerDeviceOffset( + &(m->device_ptrs[cmd->device->global_mem_id])); buffer += al->offset; if (D->Dev->RelativeAddressing) { if (D->Dev->DataMemory->isInRange(buffer)) { - buffer -= D->Dev->DataMemory->PhysAddress; + buffer -= D->Dev->DataMemory->PhysAddress(); } else if (D->Dev->ExternalMemory->isInRange(buffer)) { - buffer -= D->Dev->ExternalMemory->PhysAddress; + buffer -= D->Dev->ExternalMemory->PhysAddress(); } else { POCL_ABORT("almaif: buffer outside of memory"); } @@ -1165,6 +1239,7 @@ void pocl_almaif_free_event_data(cl_event event) { } void *runningThreadFunc(void *) { + int counter = 0; while (!runningJoinRequested) { POCL_LOCK(runningLock); if (runningList) { @@ -1183,6 +1258,43 @@ void *runningThreadFunc(void *) { POCL_UPDATE_EVENT_COMPLETE_MSG(E, "Almaif, asynchronous NDRange "); POCL_LOCK(runningLock); } + +#ifdef ALMAIF_DUMP_MEMORY + if ((counter % 3) == 0) { + if (Node->device->device_side_printf) { + chunk_info_t *PrintfBufferChunk = (chunk_info_t *)AD->PrintfBuffer; + assert(PrintfBufferChunk); + chunk_info_t *PrintfPositionChunk = + (chunk_info_t *)AD->PrintfPosition; + assert(PrintfPositionChunk); + unsigned position = + AD->Dev->DataMemory->Read32(PrintfPositionChunk->start_address - + AD->Dev->DataMemory->PhysAddress()); + POCL_MSG_PRINT_ALMAIF( + "Device wrote %u bytes to stdout. Printing them now:\n", + position); + if (position > 0) { + char *tmp_printf_buf = (char *)malloc(position); + AD->Dev->DataMemory->CopyFromMMAP( + tmp_printf_buf, PrintfBufferChunk->start_address, position); + write(STDOUT_FILENO, tmp_printf_buf, position); + free(tmp_printf_buf); + } + } + } else { + uint32_t pc = AD->Dev->ControlMemory->Read32(ALMAIF_STATUS_REG_PC); + uint64_t cc = + AD->Dev->ControlMemory->Read64(ALMAIF_STATUS_REG_CC_LOW); + uint64_t sc = + AD->Dev->ControlMemory->Read64(ALMAIF_STATUS_REG_SC_LOW); + POCL_MSG_PRINT_ALMAIF( + "PC:%" PRId32 " CC:%" PRId64 " SC:%" PRId64 "\n", pc, cc, sc); + + POCL_MSG_PRINT_ALMAIF("RUNNING MEMORY DUMP\n"); + AD->Dev->printMemoryDump(); + } +#endif + counter++; } } POCL_UNLOCK(runningLock); @@ -1202,8 +1314,6 @@ void pocl_almaif_copy_rect(void *data, pocl_mem_identifier *dst_mem_id, size_t const src_row_pitch, size_t const src_slice_pitch) { AlmaifData *d = (AlmaifData *)data; - chunk_info_t *src_chunk = (chunk_info_t *)src_mem_id->mem_ptr; - chunk_info_t *dst_chunk = (chunk_info_t *)dst_mem_id->mem_ptr; size_t src_offset = src_origin[0] + src_row_pitch * src_origin[1] + src_slice_pitch * src_origin[2]; @@ -1218,15 +1328,12 @@ void pocl_almaif_copy_rect(void *data, pocl_mem_identifier *dst_mem_id, for (j = 0; j < region[1]; ++j) for (i = 0; i < region[0]; i++) { char val; - d->Dev->readDataFromDevice(&val, - src_chunk->start_address + src_offset + - src_row_pitch * j + src_slice_pitch * k + - i, - 1); - d->Dev->writeDataToDevice(dst_chunk->start_address + dst_offset + - dst_row_pitch * j + dst_slice_pitch * k + - i, - &val, 1); + d->Dev->readDataFromDevice(&val, src_mem_id, 1, + src_offset + src_row_pitch * j + + src_slice_pitch * k + i); + d->Dev->writeDataToDevice(dst_mem_id, &val, 1, + dst_offset + dst_row_pitch * j + + dst_slice_pitch * k + i); } } @@ -1240,10 +1347,9 @@ void pocl_almaif_write_rect(void *data, const void *__restrict__ src_host_ptr, size_t const host_row_pitch, size_t const host_slice_pitch) { AlmaifData *d = (AlmaifData *)data; - chunk_info_t *dst_chunk = (chunk_info_t *)dst_mem_id->mem_ptr; - size_t adjusted_dst_ptr = dst_chunk->start_address + buffer_origin[0] + - buffer_row_pitch * buffer_origin[1] + - buffer_slice_pitch * buffer_origin[2]; + size_t adjusted_dst_offset = buffer_origin[0] + + buffer_row_pitch * buffer_origin[1] + + buffer_slice_pitch * buffer_origin[2]; char const *__restrict__ const adjusted_host_ptr = (char const *)src_host_ptr + host_origin[0] + @@ -1259,8 +1365,8 @@ void pocl_almaif_write_rect(void *data, const void *__restrict__ src_host_ptr, size_t d_offset = buffer_row_pitch * j + buffer_slice_pitch * k; - d->Dev->writeDataToDevice(adjusted_dst_ptr + d_offset, - adjusted_host_ptr + s_offset, region[0]); + d->Dev->writeDataToDevice(dst_mem_id, adjusted_host_ptr + s_offset, + region[0], adjusted_dst_offset + d_offset); } } @@ -1274,10 +1380,9 @@ void pocl_almaif_read_rect(void *data, void *__restrict__ dst_host_ptr, size_t const host_row_pitch, size_t const host_slice_pitch) { AlmaifData *d = (AlmaifData *)data; - chunk_info_t *src_chunk = (chunk_info_t *)src_mem_id->mem_ptr; - size_t adjusted_src_ptr = src_chunk->start_address + buffer_origin[0] + - buffer_row_pitch * buffer_origin[1] + - buffer_slice_pitch * buffer_origin[2]; + size_t adjusted_src_offset = buffer_origin[0] + + buffer_row_pitch * buffer_origin[1] + + buffer_slice_pitch * buffer_origin[2]; char *__restrict__ const adjusted_host_ptr = (char *)dst_host_ptr + host_origin[0] + host_row_pitch * host_origin[1] + @@ -1291,7 +1396,7 @@ void pocl_almaif_read_rect(void *data, void *__restrict__ dst_host_ptr, for (j = 0; j < region[1]; ++j) { size_t d_offset = host_row_pitch * j + host_slice_pitch * k; size_t s_offset = buffer_row_pitch * j + buffer_slice_pitch * k; - d->Dev->readDataFromDevice(adjusted_host_ptr + d_offset, - adjusted_src_ptr + s_offset, region[0]); + d->Dev->readDataFromDevice(adjusted_host_ptr + d_offset, src_mem_id, + region[0], adjusted_src_offset + s_offset); } } diff --git a/lib/CL/devices/almaif/openasip/AlmaifCompileTCE.cc b/lib/CL/devices/almaif/openasip/AlmaifCompileOpenasip.cc similarity index 69% rename from lib/CL/devices/almaif/openasip/AlmaifCompileTCE.cc rename to lib/CL/devices/almaif/openasip/AlmaifCompileOpenasip.cc index d49aad3a029ababde7881ef46dd14c9f651a1e4e..ec2d9fc07e0f861e171eb4091157b4d39a4475e5 100644 --- a/lib/CL/devices/almaif/openasip/AlmaifCompileTCE.cc +++ b/lib/CL/devices/almaif/openasip/AlmaifCompileOpenasip.cc @@ -1,4 +1,4 @@ -/* AlmaifCompileTCE.cc - compiler support for custom devices +/* AlmaifCompileOpenasip.cc - compiler support for custom devices Copyright (c) 2022 Topi Leppänen / Tampere University @@ -21,7 +21,6 @@ IN THE SOFTWARE. */ - #include "stdint.h" #include "unistd.h" @@ -50,41 +49,40 @@ #include "../AlmaifCompile.hh" #include "../AlmaifShared.hh" -#include "AlmaifCompileTCE.hh" +#include "AlmaifCompileOpenasip.hh" #include "TTASimDevice.hh" -int pocl_almaif_tce_initialize(cl_device_id device, const char *parameters) { +int pocl_almaif_openasip_initialize(cl_device_id device, + const std::string ¶meters) { AlmaifData *d = (AlmaifData *)(device->data); - tce_backend_data_t *bd = (tce_backend_data_t *)pocl_aligned_malloc( - HOST_CPU_CACHELINE_SIZE, sizeof(tce_backend_data_t)); + openasip_backend_data_t *bd = new openasip_backend_data_t(); if (bd == NULL) { - POCL_MSG_WARN("couldn't allocate tce_backend_data\n"); + POCL_MSG_WARN("couldn't allocate openasip_backend_data\n"); return CL_OUT_OF_HOST_MEMORY; } - POCL_INIT_LOCK(bd->tce_compile_lock); + POCL_INIT_LOCK(bd->openasip_compile_lock); if (1) // pocl_offline_compile { - assert(parameters); + assert(parameters != ""); /* Convert the filename from env variable to absolute filename. * This is required, since generatebits must be run in * destination (output) directory with ADF argument */ - bd->machine_file = realpath(parameters, NULL); - if ((bd->machine_file == NULL) || (!pocl_exists(bd->machine_file))) - POCL_ABORT("Can't find ADF file: %s\n", bd->machine_file); + char *tmp_path = realpath(parameters.c_str(), NULL); + if ((tmp_path == NULL) || (!pocl_exists(tmp_path))) + POCL_ABORT("Can't find ADF file: %s\n", tmp_path); + bd->machine_file.assign(tmp_path); + free(tmp_path); - size_t len = strlen(bd->machine_file); - assert(len > 0); - // char* dev_name = malloc (len+20); - // snprintf (dev_name, 1024, "ALMAIF TCE: %s", bd->machine_file); + // snprintf (dev_name, 1024, "ALMAIF openasip: %s", bd->machine_file); /* grep the ADF file for endiannes flag */ char *content = NULL; uint64_t size = 0; - pocl_read_file(bd->machine_file, &content, &size); + pocl_read_file(bd->machine_file.c_str(), &content, &size); if ((size == 0) || (content == NULL)) POCL_ABORT("Can't read ADF file: %s\n", bd->machine_file); @@ -100,12 +98,12 @@ int pocl_almaif_tce_initialize(cl_device_id device, const char *parameters) { bd->core_count); POCL_MEM_FREE(content); } else { - bd->machine_file = NULL; + bd->machine_file = ""; device->max_compute_units = d->Dev->ControlMemory->Read32(ALMAIF_INFO_CORE_COUNT); } - device->long_name = device->short_name = "ALMAIF TCE"; + device->long_name = device->short_name = "ALMAIF OPENASIP"; device->vendor = "pocl"; device->extensions = TCE_DEVICE_EXTENSIONS; if (device->endian_little) { @@ -121,42 +119,24 @@ int pocl_almaif_tce_initialize(cl_device_id device, const char *parameters) { d->compilationData->backend_data = (void *)bd; device->builtins_sources_path = "tce_builtins.cl"; - device->device_side_printf = 1; - device->printf_buffer_size = PRINTF_BUFFER_SIZE; - chunk_info_t *chunk = NULL; - chunk = pocl_alloc_buffer(d->Dev->AllocRegions, device->printf_buffer_size); - if (chunk == NULL) { - POCL_ABORT("Almaif: Can't allocate %z bytes for printf buffer\n", - device->printf_buffer_size); - } else { - POCL_MSG_PRINT_ALMAIF("Allocated printf buffer of size %d from %d\n", - device->printf_buffer_size, chunk->start_address); - d->printf_buffer = chunk; - } - - d->printf_position = pocl_alloc_buffer(d->Dev->AllocRegions, 4); - if (d->printf_position == NULL) { - POCL_ABORT("Almaif: Can't allocate 4 bytes for printf index\n"); - } - return 0; } -int pocl_almaif_tce_cleanup(cl_device_id device) { +int pocl_almaif_openasip_cleanup(cl_device_id device) { void *data = device->data; AlmaifData *d = (AlmaifData *)data; - pocl_free_chunk((chunk_info_t *)d->printf_buffer); - pocl_free_chunk((chunk_info_t *)d->printf_position); - - tce_backend_data_t *bd = - (tce_backend_data_t *)d->compilationData->backend_data; + if (device->device_side_printf) { + pocl_free_chunk((chunk_info_t *)d->PrintfBuffer); + pocl_free_chunk((chunk_info_t *)d->PrintfPosition); + } - POCL_DESTROY_LOCK(bd->tce_compile_lock); + openasip_backend_data_t *bd = + (openasip_backend_data_t *)d->compilationData->backend_data; - POCL_MEM_FREE(bd->machine_file); + POCL_DESTROY_LOCK(bd->openasip_compile_lock); - pocl_aligned_free(bd); + delete bd; return 0; } @@ -165,77 +145,74 @@ int pocl_almaif_tce_cleanup(cl_device_id device) { #define OFFSET_ARG(c) SUBST(c) #define MAX_CMDLINE_LEN (32 * POCL_MAX_PATHNAME_LENGTH) -void tceccCommandLine(char *commandline, size_t max_cmdline_len, - _cl_command_run *run_cmd, AlmaifData *D, - const char *tempDir, const char *inputSrc, - const char *outputTpef, const char *machine_file, - int is_multicore, int little_endian, - const char *extraParams, bool standalone_mode) { - - const char *mainC; +std::string oaccCommandLine(_cl_command_run *run_cmd, AlmaifData *D, + const std::string &tempDir, + const std::string &inputSrc, + const std::string &outputTpef, + const std::string &machine_file, int is_multicore, + int little_endian, const std::string &extraParams, + bool standalone_mode) { + std::string mainC; if (is_multicore) mainC = "tta_device_main_dthread.c"; else mainC = "tta_device_main.c"; - char deviceMainSrc[POCL_MAX_PATHNAME_LENGTH]; - const char *poclIncludePathSwitch; + std::string deviceMainSrc; + std::string poclIncludePathSwitch; if (pocl_get_bool_option("POCL_BUILDING", 0)) { - snprintf(deviceMainSrc, POCL_MAX_PATHNAME_LENGTH, "%s%s%s", SRCDIR, - "/lib/CL/devices/almaif/openasip/", mainC); - assert(access(deviceMainSrc, R_OK) == 0); - poclIncludePathSwitch = " -I " SRCDIR "/include" - " -I " SRCDIR "/lib/CL/devices/almaif/openasip"; + deviceMainSrc = + std::string(SRCDIR) + "/lib/CL/devices/almaif/openasip/" + mainC; + assert(access(deviceMainSrc.c_str(), R_OK) == 0); + poclIncludePathSwitch = " -I " + std::string(SRCDIR) + "/include" + " -I " + + std::string(SRCDIR) + + "/lib/CL/devices/almaif/openasip"; } else { - snprintf(deviceMainSrc, POCL_MAX_PATHNAME_LENGTH, "%s%s%s", - POCL_INSTALL_PRIVATE_DATADIR, "/", mainC); - assert(access(deviceMainSrc, R_OK) == 0); - poclIncludePathSwitch = " -I " POCL_INSTALL_PRIVATE_DATADIR "/include"; + deviceMainSrc = std::string(POCL_INSTALL_PRIVATE_DATADIR) + "/" + mainC; + assert(access(deviceMainSrc.c_str(), R_OK) == 0); + poclIncludePathSwitch = + " -I " + std::string(POCL_INSTALL_PRIVATE_DATADIR) + "/include"; } - char extraFlags[MAX_CMDLINE_LEN]; - const char *multicoreFlags = ""; + std::string extraFlags; + std::string multicoreFlags = ""; if (is_multicore) multicoreFlags = " -ldthread -lsync-lu -llockunit"; - char preprocessor_directives[MAX_CMDLINE_LEN]; - set_preprocessor_directives(preprocessor_directives, D, machine_file, - standalone_mode); + std::string preprocessor_directives = + set_preprocessor_directives(D, machine_file, standalone_mode); - const char *userFlags = pocl_get_string_option("POCL_TCECC_EXTRA_FLAGS", ""); - const char *endianFlags = little_endian ? "--little-endian" : ""; - snprintf(extraFlags, MAX_CMDLINE_LEN, "%s %s %s %s %s -k dummy_argbuffer", - extraParams, multicoreFlags, userFlags, endianFlags, - preprocessor_directives); + const std::string userFlags = + pocl_get_string_option("POCL_TCECC_EXTRA_FLAGS", ""); + const std::string endianFlags = little_endian ? "--little-endian" : ""; + extraFlags = extraParams + " " + multicoreFlags + " " + userFlags + " " + + endianFlags + " " + preprocessor_directives + + " -k dummy_argbuffer"; - char kernelObjSrc[POCL_MAX_PATHNAME_LENGTH]; - snprintf(kernelObjSrc, POCL_MAX_PATHNAME_LENGTH, "%s%s", tempDir, - "/../descriptor.so.kernel_obj.c"); + std::string kernelObjSrc = tempDir + "/../descriptor.so.kernel_obj.c"; - char kernelMdSymbolName[POCL_MAX_PATHNAME_LENGTH]; - snprintf(kernelMdSymbolName, POCL_MAX_PATHNAME_LENGTH, "_%s_md", - run_cmd->kernel->name); + std::string kernelMdSymbolName = + "_" + std::string(run_cmd->kernel->name) + "_md"; - char programBcFile[POCL_MAX_PATHNAME_LENGTH]; - snprintf(programBcFile, POCL_MAX_PATHNAME_LENGTH, "%s%s", tempDir, - "/program.bc"); + std::string programBcFile = tempDir + "/program.bc"; /* Compile in steps to save the program.bc for automated exploration use case when producing the kernel capture scripts. */ - snprintf(commandline, max_cmdline_len, - "tcecc -llwpr %s %s %s %s -k %s -g -O3 --emit-llvm -o %s %s;" - "tcecc -a %s %s -O3 -o %s %s\n", - poclIncludePathSwitch, deviceMainSrc, kernelObjSrc, inputSrc, - kernelMdSymbolName, programBcFile, extraFlags, - - machine_file, programBcFile, outputTpef, extraFlags); + std::string commandline = + "oacc -llwpr " + poclIncludePathSwitch + " " + deviceMainSrc + " " + + kernelObjSrc + " " + inputSrc + " -k " + kernelMdSymbolName + + " -g -O3 --emit-llvm" + " -o " + programBcFile + " " + extraFlags + ";" + + "oacc -a " + machine_file + " " + programBcFile + " -O3 -o " + + outputTpef + " " + extraFlags + "\n"; + return commandline; } -void pocl_tce_write_kernel_descriptor(char *content, size_t content_size, - _cl_command_node *command, - cl_kernel kernel, cl_device_id device, - int specialize) { +void pocl_openasip_write_kernel_descriptor(char *content, size_t content_size, + _cl_command_node *command, + cl_kernel kernel, + cl_device_id device, + int specialize) { // Generate the kernel_obj.c file. This should be optional // and generated only for the heterogeneous standalone devices which // need the definitions to accompany the kernels, for the launcher @@ -280,8 +257,8 @@ void pocl_tce_write_kernel_descriptor(char *content, size_t content_size, content_len); } -void pocl_almaif_tce_compile(_cl_command_node *cmd, cl_kernel kernel, - cl_device_id device, int specialize) { +void pocl_almaif_openasip_compile(_cl_command_node *cmd, cl_kernel kernel, + cl_device_id device, int specialize) { if (cmd->type != CL_COMMAND_NDRANGE_KERNEL) { POCL_ABORT("Almaif: trying to compile non-ndrange command\n"); @@ -289,8 +266,8 @@ void pocl_almaif_tce_compile(_cl_command_node *cmd, cl_kernel kernel, void *data = cmd->device->data; AlmaifData *d = (AlmaifData *)data; - tce_backend_data_t *bd = - (tce_backend_data_t *)d->compilationData->backend_data; + openasip_backend_data_t *bd = + (openasip_backend_data_t *)d->compilationData->backend_data; if (!kernel) kernel = cmd->command.run.kernel; @@ -299,13 +276,13 @@ void pocl_almaif_tce_compile(_cl_command_node *cmd, cl_kernel kernel, assert(kernel); assert(device); POCL_MSG_PRINT_ALMAIF("COMPILATION BEFORE WG FUNC\n"); - POCL_LOCK(bd->tce_compile_lock); + POCL_LOCK(bd->openasip_compile_lock); int error = pocl_llvm_generate_workgroup_function( cmd->program_device_i, device, kernel, cmd, specialize); POCL_MSG_PRINT_ALMAIF("COMPILATION AFTER WG FUNC\n"); if (error) { - POCL_UNLOCK(bd->tce_compile_lock); + POCL_UNLOCK(bd->openasip_compile_lock); POCL_ABORT("TCE: pocl_llvm_generate_workgroup_function()" " failed for kernel %s\n", kernel->name); @@ -333,29 +310,29 @@ void pocl_almaif_tce_compile(_cl_command_node *cmd, cl_kernel kernel, if (!pocl_exists(assemblyFileName)) { char descriptor_content[64 * 1024]; - pocl_tce_write_kernel_descriptor(descriptor_content, (64 * 1024), cmd, - kernel, device, specialize); + pocl_openasip_write_kernel_descriptor(descriptor_content, (64 * 1024), cmd, + kernel, device, specialize); error = snprintf(inputBytecode, POCL_MAX_PATHNAME_LENGTH, "%s%s", cachedir, POCL_PARALLEL_BC_FILENAME); - char commandLine[MAX_CMDLINE_LEN]; - tceccCommandLine(commandLine, MAX_CMDLINE_LEN, &cmd->command.run, d, - tempDir, - inputBytecode, // inputSrc - assemblyFileName, bd->machine_file, bd->core_count > 1, - device->endian_little, "", false); + std::string commandLine = + oaccCommandLine(&cmd->command.run, d, tempDir, + inputBytecode, // inputSrc + assemblyFileName, bd->machine_file, bd->core_count > 1, + device->endian_little, "", false); - POCL_MSG_PRINT_ALMAIF("build command: \n%s", commandLine); + POCL_MSG_PRINT_ALMAIF("build command: \n%s", commandLine.c_str()); - error = system(commandLine); + error = system(commandLine.c_str()); if (error != 0) - POCL_ABORT("Error while running tcecc.\n"); + POCL_ABORT("Error while running oacc.\n"); // Dump disassembled tpef for debugging - char tcedisasmCmd[MAX_CMDLINE_LEN]; - snprintf(tcedisasmCmd, MAX_CMDLINE_LEN, "tcedisasm -n %s %s", bd->machine_file, assemblyFileName); - error = system(tcedisasmCmd); + char OpenasipDisAsmCmd[MAX_CMDLINE_LEN]; + snprintf(OpenasipDisAsmCmd, MAX_CMDLINE_LEN, "tcedisasm -n %s %s", + bd->machine_file.c_str(), assemblyFileName); + error = system(OpenasipDisAsmCmd); if (error != 0) POCL_MSG_WARN("Error while running tcedisasm.\n"); } @@ -383,16 +360,17 @@ void pocl_almaif_tce_compile(_cl_command_node *cmd, cl_kernel kernel, assemblyFileName); } - char wg_func_name[4 * POCL_MAX_PATHNAME_LENGTH]; - snprintf(wg_func_name, sizeof(wg_func_name), "%s_workgroup_argbuffer", - cmd->command.run.kernel->name); + std::string wg_func_name = + std::string(cmd->command.run.kernel->name) + "_workgroup_argbuffer"; if (prog->hasProcedure(wg_func_name)) { const TTAProgram::Procedure &proc = prog->procedure(wg_func_name); int kernel_address = proc.startAddress().location(); - char content[64]; - snprintf(content, 64, "kernel address = %d", kernel_address); - pocl_write_file(md_path, content, strlen(content), 0, 0); + std::string md_path = std::string(cachedir) + "/kernel_address.txt"; + std::string content = + "kernel address = " + std::to_string(kernel_address); + pocl_write_file(md_path.c_str(), content.c_str(), content.length(), 0, + 0); } else { POCL_ABORT("Couldn't find wg_function procedure %s from the program\n", wg_func_name); @@ -406,15 +384,13 @@ void pocl_almaif_tce_compile(_cl_command_node *cmd, cl_kernel kernel, "/parallel.img"); if (!pocl_exists(imem_file)) { - char genbits_command[POCL_MAX_PATHNAME_LENGTH * 8]; - // --dmemwidthinmaus 4 - snprintf(genbits_command, (POCL_MAX_PATHNAME_LENGTH * 8), - "SAVEDIR=$PWD; cd %s; generatebits --dmemwidthinmaus 4 " - "--piformat=bin2n --diformat=bin2n --program " - "parallel.tpef %s ; cd $SAVEDIR", - cachedir, bd->machine_file); - POCL_MSG_PRINT_ALMAIF("running genbits: \n %s \n", genbits_command); - error = system(genbits_command); + std::string genbits_command = + "SAVEDIR=$PWD; cd " + std::string(cachedir) + + "; generatebits --dmemwidthinmaus 4 " + + "--piformat=bin2n --diformat=bin2n --program " + "parallel.tpef " + + bd->machine_file + "; cd $SAVEDIR"; + POCL_MSG_PRINT_ALMAIF("running genbits: \n %s \n", genbits_command.c_str()); + error = system(genbits_command.c_str()); if (error != 0) POCL_ABORT("Error while running generatebits.\n"); } @@ -422,7 +398,7 @@ void pocl_almaif_tce_compile(_cl_command_node *cmd, cl_kernel kernel, error = pocl_exists(imem_file); assert(error != 0 && "parallel.img does not exist!"); - POCL_UNLOCK(bd->tce_compile_lock); + POCL_UNLOCK(bd->openasip_compile_lock); } /* This is a version number that is supposed to increase when there is @@ -430,8 +406,8 @@ void pocl_almaif_tce_compile(_cl_command_node *cmd, cl_kernel kernel, * incompatible (e.g. a change in generated device image file names, etc) */ #define POCL_TCE_ALMAIF_BINARY_VERSION "2" -int pocl_almaif_tce_device_hash(const char *adf_file, const char *llvm_triplet, - char *output) { +int pocl_almaif_openasip_device_hash(const char *adf_file, + const char *llvm_triplet, char *output) { SHA1_CTX ctx; uint8_t bin_dig[SHA1_DIGEST_SIZE]; @@ -464,10 +440,10 @@ int pocl_almaif_tce_device_hash(const char *adf_file, const char *llvm_triplet, return 0; } -char *pocl_tce_init_build(void *data) { +char *pocl_almaif_openasip_init_build(void *data) { AlmaifData *D = (AlmaifData *)data; - tce_backend_data_t *bd = - (tce_backend_data_t *)D->compilationData->backend_data; + openasip_backend_data_t *bd = + (openasip_backend_data_t *)D->compilationData->backend_data; assert(bd); TCEString mach_tmpdir = Environment::llvmtceCachePath(); @@ -491,11 +467,11 @@ char *pocl_tce_init_build(void *data) { char tempfile[POCL_MAX_PATHNAME_LENGTH]; pocl_mk_tempname(tempfile, mach_tmpdir.c_str(), ".devext", NULL); - std::string tceopgenCmd = std::string("tceopgen > ") + tempfile; + std::string OpenasipOpgenCmd = std::string("tceopgen > ") + tempfile; - POCL_MSG_PRINT_TCE("Running: %s \n", tceopgenCmd.c_str()); + POCL_MSG_PRINT_TCE("Running: %s \n", OpenasipOpgenCmd.c_str()); - error = system(tceopgenCmd.c_str()); + error = system(OpenasipOpgenCmd.c_str()); if (error == -1) return NULL; @@ -521,11 +497,11 @@ char *pocl_tce_init_build(void *data) { return include_switch; } -void pocl_almaif_tce_produce_standalone_program(AlmaifData *D, - _cl_command_node *cmd, - pocl_context32 *pc, - size_t arg_size, - void *arguments) { +void pocl_almaif_openasip_produce_standalone_program(AlmaifData *D, + _cl_command_node *cmd, + pocl_context32 *pc, + size_t arg_size, + void *arguments) { _cl_command_run *run_cmd = &cmd->command.run; static int runCounter = 0; @@ -538,8 +514,8 @@ void pocl_almaif_tce_produce_standalone_program(AlmaifData *D, TCEString fname = baseFname + ".c"; TCEString parallel_bc = tempDir + "/parallel.bc"; - tce_backend_data_t *bd = - (tce_backend_data_t *)D->compilationData->backend_data; + openasip_backend_data_t *bd = + (openasip_backend_data_t *)D->compilationData->backend_data; std::ofstream out(fname.c_str()); @@ -701,10 +677,9 @@ void pocl_almaif_tce_produce_standalone_program(AlmaifData *D, TCEString inputFiles = fname + " " + parallel_bc; std::ofstream scriptout(buildScriptFname.c_str()); - char commandLine[MAX_CMDLINE_LEN]; - tceccCommandLine(commandLine, MAX_CMDLINE_LEN, run_cmd, D, tempDir.c_str(), - inputFiles.c_str(), "standalone.tpef", bd->machine_file, - bd->core_count > 1, 1, " -D_STANDALONE_MODE=1", true); + std::string commandLine = oaccCommandLine( + run_cmd, D, tempDir.c_str(), inputFiles.c_str(), "standalone.tpef", + bd->machine_file, bd->core_count > 1, 1, " -D_STANDALONE_MODE=1", true); scriptout << commandLine; scriptout.close(); @@ -724,9 +699,10 @@ void pocl_almaif_tce_produce_standalone_program(AlmaifData *D, ++runCounter; } -void set_preprocessor_directives(char *output, AlmaifData *d, const char *adf, - bool standalone_mode) { +std::string set_preprocessor_directives(AlmaifData *d, const std::string &adf, + bool standalone_mode) { TTAMachine::Machine *mach = NULL; + std::string output = ""; try { mach = TTAMachine::Machine::loadFromADF(adf); } catch (Exception &e) { @@ -762,15 +738,15 @@ void set_preprocessor_directives(char *output, AlmaifData *d, const char *adf, POCL_ABORT("Couldn't find the global address space from machine\n"); } - int AQL_queue_length = d->Dev->CQMemory->Size / AQL_PACKET_LENGTH - 1; - unsigned dmem_size = d->Dev->DataMemory->Size; - unsigned cq_size = d->Dev->CQMemory->Size; + int AQL_queue_length = d->Dev->CQMemory->Size() / AQL_PACKET_LENGTH - 1; + unsigned DmemSize = d->Dev->DataMemory->Size(); + unsigned CQSize = d->Dev->CQMemory->Size(); bool relativeAddressing = d->Dev->RelativeAddressing; int i = 0; - i = snprintf(output, MAX_CMDLINE_LEN, "-DQUEUE_LENGTH=%i ", AQL_queue_length); + output += "-DQUEUE_LENGTH=" + std::to_string(AQL_queue_length) + " "; if (!separatePrivateMem) { - unsigned initsp = dmem_size; + unsigned initsp = DmemSize; unsigned private_mem_start = 0; if (!standalone_mode) { // The standalone mode, cannot separate the automatic allocation of @@ -783,42 +759,47 @@ void set_preprocessor_directives(char *output, AlmaifData *d, const char *adf, int private_mem_size = pocl_get_int_option( "POCL_ALMAIF_PRIVATE_MEM_SIZE", ALMAIF_DEFAULT_PRIVATE_MEM_SIZE); initsp += private_mem_size; - private_mem_start += dmem_size; + private_mem_start += DmemSize; if (!separateCQMem) { - initsp += cq_size; - private_mem_start += cq_size; + initsp += CQSize; + private_mem_start += CQSize; } } if (!relativeAddressing) { - initsp += d->Dev->DataMemory->PhysAddress; - private_mem_start += d->Dev->DataMemory->PhysAddress; + initsp += d->Dev->DataMemory->PhysAddress(); + private_mem_start += d->Dev->DataMemory->PhysAddress(); } - i += - snprintf(output + i, MAX_CMDLINE_LEN, "--init-sp=%u --data-start=%s,%u", - initsp, private_as_name, private_mem_start); + output += "--init-sp="; + output += std::to_string(initsp); + output += " --data-start="; + output += private_as_name; + output += ","; + output += std::to_string(private_mem_start); } if (!relativeAddressing && standalone_mode) { // Appends to the data-start option - char data_start_option_string[MAX_CMDLINE_LEN]; + std::string data_start_option_string; if (!separatePrivateMem) { - strcpy(data_start_option_string, ","); + data_start_option_string = ","; } else { - strcpy(data_start_option_string, " --data-start="); + data_start_option_string = " --data-start="; } - i += snprintf(output + i, MAX_CMDLINE_LEN, - "%s%s,${STANDALONE_GLOBAL_AS_OFFSET}", - data_start_option_string, global_as_name); + output += data_start_option_string + global_as_name + + ",${STANDALONE_GLOBAL_AS_OFFSET}"; } if (!separateCQMem) { - unsigned queue_start = d->Dev->CQMemory->PhysAddress; + unsigned queue_start = d->Dev->CQMemory->PhysAddress(); if (relativeAddressing) { - queue_start -= d->Dev->DataMemory->PhysAddress; + queue_start -= d->Dev->DataMemory->PhysAddress(); } - i += snprintf(output + i, MAX_CMDLINE_LEN, " -DQUEUE_START=%u ", - queue_start); + output += " -DQUEUE_START="; + output += std::to_string(queue_start); + output += " "; } delete mach; + + return output; } diff --git a/lib/CL/devices/almaif/openasip/AlmaifCompileOpenasip.hh b/lib/CL/devices/almaif/openasip/AlmaifCompileOpenasip.hh new file mode 100644 index 0000000000000000000000000000000000000000..2fd35034359e960068f139b11e58cfcdfffac578 --- /dev/null +++ b/lib/CL/devices/almaif/openasip/AlmaifCompileOpenasip.hh @@ -0,0 +1,69 @@ +/* AlmaifCompileOpenasip.hh - compiler support for custom devices + + Copyright (c) 2022 Topi Leppänen / Tampere University + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal in the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + IN THE SOFTWARE. +*/ + +#ifndef POCL_ALMAIFCOMPILETCE_H +#define POCL_ALMAIFCOMPILETCE_H + +#include "pocl_util.h" +// #include "AlmaifShared.hh" +// #include "AlmaifCompile.hh" + +int pocl_almaif_openasip_initialize(cl_device_id device, + const std::string ¶meters); +int pocl_almaif_openasip_cleanup(cl_device_id device); +void pocl_almaif_openasip_compile(_cl_command_node *cmd, cl_kernel kernel, + cl_device_id device, int specialize); +void pocl_almaif_openasip_produce_standalone_program(AlmaifData *D, + _cl_command_node *cmd, + pocl_context32 *pc, + size_t arg_size, + void *arguments); + +char *pocl_almaif_openasip_init_build(void *data); + +typedef struct openasip_backend_data_s { + pocl_lock_t openasip_compile_lock + __attribute__((aligned(HOST_CPU_CACHELINE_SIZE))); + std::string machine_file; + int core_count; +} openasip_backend_data_t; + +std::string oaccCommandLine(_cl_command_run *run_cmd, AlmaifData *D, + const std::string &tempDir, + const std::string &inputSrc, + const std::string &outputTpef, + const std::string &machine_file, int is_multicore, + int little_endian, const std::string &extraParams, + bool standalone_mode); +void pocl_openasip_write_kernel_descriptor(char *content, size_t content_size, + _cl_command_node *command, + cl_kernel kernel, + cl_device_id device, int specialize); + +int pocl_almaif_openasip_device_hash(const char *adf_file, + const char *llvm_triplet, char *output); + +std::string set_preprocessor_directives(AlmaifData *d, const std::string &adf, + bool standalone_mode); + +#endif diff --git a/lib/CL/devices/almaif/openasip/AlmaifCompileTCE.hh b/lib/CL/devices/almaif/openasip/AlmaifCompileTCE.hh deleted file mode 100644 index d26728ebf4a7e4030eb4a75c698939eb954f2a67..0000000000000000000000000000000000000000 --- a/lib/CL/devices/almaif/openasip/AlmaifCompileTCE.hh +++ /dev/null @@ -1,67 +0,0 @@ -/* AlmaifCompileTCE.hh - compiler support for custom devices - - Copyright (c) 2022 Topi Leppänen / Tampere University - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to - deal in the Software without restriction, including without limitation the - rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - sell copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - IN THE SOFTWARE. -*/ - -#ifndef POCL_ALMAIFCOMPILETCE_H -#define POCL_ALMAIFCOMPILETCE_H - -#include "pocl_util.h" -//#include "AlmaifShared.hh" -//#include "AlmaifCompile.hh" - -int pocl_almaif_tce_initialize(cl_device_id device, const char *parameters); -int pocl_almaif_tce_cleanup(cl_device_id device); -void pocl_almaif_tce_compile(_cl_command_node *cmd, cl_kernel kernel, - cl_device_id device, int specialize); -void pocl_almaif_tce_produce_standalone_program(AlmaifData *D, - _cl_command_node *cmd, - pocl_context32 *pc, - size_t arg_size, - void *arguments); - -char *pocl_tce_init_build(void *data); - -typedef struct tce_backend_data_s { - pocl_lock_t tce_compile_lock - __attribute__((aligned(HOST_CPU_CACHELINE_SIZE))); - char *machine_file; - int core_count; -} tce_backend_data_t; - -void tceccCommandLine(char *commandline, size_t max_cmdline_len, - _cl_command_run *run_cmd, AlmaifData *D, - const char *tempDir, const char *inputSrc, - const char *outputTpef, const char *machine_file, - int is_multicore, int little_endian, - const char *extraParams, bool standalone_mode); -void pocl_tce_write_kernel_descriptor(char *content, size_t content_size, - _cl_command_node *command, - cl_kernel kernel, cl_device_id device, - int specialize); - -int pocl_almaif_tce_device_hash(const char *adf_file, const char *llvm_triplet, - char *output); - -void set_preprocessor_directives(char *output, AlmaifData *d, const char *adf, - bool standalone_mode); - -#endif diff --git a/lib/CL/devices/almaif/openasip/TTASimControlRegion.cc b/lib/CL/devices/almaif/openasip/TTASimControlRegion.cc index aaad80984eaa878b91e3fb3335577a7fae459394..1687e15028fc022f52239620b118c66b8e4b103d 100644 --- a/lib/CL/devices/almaif/openasip/TTASimControlRegion.cc +++ b/lib/CL/devices/almaif/openasip/TTASimControlRegion.cc @@ -37,8 +37,8 @@ TTASimControlRegion::TTASimControlRegion(const TTAMachine::Machine &mach, TTASimDevice *parent) { POCL_MSG_PRINT_ALMAIF_MMAP("TTASim: Initializing TTASimControlRegion\n"); - PhysAddress = 0; - Size = ALMAIF_DEFAULT_CTRL_SIZE; + PhysAddress_ = 0; + Size_ = ALMAIF_DEFAULT_CTRL_SIZE; parent_ = parent; assert(parent_ != nullptr && "simulator parent handle NULL, is the sim opened properly?"); @@ -49,17 +49,17 @@ TTASimControlRegion::TTASimControlRegion(const TTAMachine::Machine &mach, uint32_t TTASimControlRegion::Read32(size_t offset) { POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Reading from physical address 0x%zx with " - "offset 0x%zx\n", - PhysAddress, offset); - assert(offset < Size && "Attempt to access data outside MMAP'd buffer"); + "offset 0x%zx\n", + PhysAddress_, offset); + assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer"); auto value = ControlRegisters_[offset / sizeof(uint32_t)]; return value; } void TTASimControlRegion::Write32(size_t offset, uint32_t value) { POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Writing to physical address 0x%zx with " - "offset 0x%zx\n", - PhysAddress, offset); + "offset 0x%zx\n", + PhysAddress_, offset); if (offset == ALMAIF_CONTROL_REG_COMMAND) { switch (value) { @@ -84,9 +84,9 @@ void TTASimControlRegion::Write16(size_t offset, uint16_t value) { uint64_t TTASimControlRegion::Read64(size_t offset) { POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Reading from physical address 0x%zx with " - "offset 0x%zx\n", - PhysAddress, offset); - assert(offset < Size && "Attempt to access data outside MMAP'd buffer"); + "offset 0x%zx\n", + PhysAddress_, offset); + assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer"); auto value = reinterpret_cast<uint64_t *>( ControlRegisters_)[offset / sizeof(uint64_t)]; return value; @@ -114,66 +114,66 @@ void TTASimControlRegion::setupControlRegisters( bool hasPrivateMem = false; bool sharedDataAndCq = false; bool relativeAddressing = true; - int dmem_size = 0; - int cq_size = 0; - int imem_size = 0; + int DmemSize = 0; + int CQSize = 0; + int ImemSize = 0; const TTAMachine::Machine::AddressSpaceNavigator &nav = mach.addressSpaceNavigator(); for (int i = 0; i < nav.count(); i++) { TTAMachine::AddressSpace *as = nav.item(i); if (as->hasNumericalId(TTA_ASID_GLOBAL)) { if (as->end() == UINT32_MAX) { - dmem_size = pow(2, 15); // TODO magic number from almaifintegrator.cc + DmemSize = pow(2, 15); // TODO magic number from almaifintegrator.cc relativeAddressing = false; } else { - dmem_size = as->end() + 1; + DmemSize = as->end() + 1; } if (as->hasNumericalId(TTA_ASID_CQ)) { sharedDataAndCq = true; } } else if (as->hasNumericalId(TTA_ASID_CQ)) { - cq_size = as->end() + 1; + CQSize = as->end() + 1; } else if (as->hasNumericalId(TTA_ASID_PRIVATE)) { hasPrivateMem = true; } else if (as->name() == "instructions") { - imem_size = (as->end() + 1) * as->width(); + ImemSize = (as->end() + 1) * as->width(); } } - int segment_size = dmem_size > imem_size ? dmem_size : imem_size; + int segment_size = DmemSize > ImemSize ? DmemSize : ImemSize; - int dmem_start, cq_start; + int DmemStart, CQStart; if (relativeAddressing) { - dmem_start = 0; - cq_start = 0; + DmemStart = 0; + CQStart = 0; } else { - cq_start = 2 * segment_size; - dmem_start = 3 * segment_size; + CQStart = 2 * segment_size; + DmemStart = 3 * segment_size; } if (!hasPrivateMem) { // No private mem, so the latter half of the dmem is reserved for it int fallback_mem_size = pocl_get_int_option("POCL_ALMAIF_PRIVATE_MEM_SIZE", ALMAIF_DEFAULT_PRIVATE_MEM_SIZE); - dmem_size -= fallback_mem_size; + DmemSize -= fallback_mem_size; POCL_MSG_PRINT_ALMAIF( "Almaif: No separate private mem found. Setting it to %d\n", fallback_mem_size); } if (sharedDataAndCq) { // No separate Cq so reserve small slice of dmem for it - cq_size = 4 * AQL_PACKET_LENGTH; - dmem_size -= cq_size; - cq_start = dmem_start + dmem_size; + CQSize = 4 * AQL_PACKET_LENGTH; + DmemSize -= CQSize; + CQStart = DmemStart + DmemSize; } - int imem_start = 0; + int ImemStart = 0; if (!relativeAddressing) { unsigned default_baseaddress = 0x40000000; // TODO get from env variable - cq_start += default_baseaddress; - dmem_start += default_baseaddress; + CQStart += default_baseaddress; + DmemStart += default_baseaddress; } memset(ControlRegisters_, 0, ALMAIF_DEFAULT_CTRL_SIZE); @@ -183,12 +183,12 @@ void TTASimControlRegion::setupControlRegisters( ControlRegisters_[ALMAIF_INFO_IF_TYPE / 4] = 3; ControlRegisters_[ALMAIF_INFO_CORE_COUNT / 4] = 1; ControlRegisters_[ALMAIF_INFO_CTRL_SIZE / 4] = 1024; - ControlRegisters_[ALMAIF_INFO_IMEM_SIZE / 4] = imem_size; - ControlRegisters_[ALMAIF_INFO_IMEM_START_LOW / 4] = imem_start; - ControlRegisters_[ALMAIF_INFO_CQMEM_SIZE_LOW / 4] = cq_size; - ControlRegisters_[ALMAIF_INFO_CQMEM_START_LOW / 4] = cq_start; - ControlRegisters_[ALMAIF_INFO_DMEM_SIZE_LOW / 4] = dmem_size; - ControlRegisters_[ALMAIF_INFO_DMEM_START_LOW / 4] = dmem_start; + ControlRegisters_[ALMAIF_INFO_IMEM_SIZE / 4] = ImemSize; + ControlRegisters_[ALMAIF_INFO_IMEM_START_LOW / 4] = ImemStart; + ControlRegisters_[ALMAIF_INFO_CQMEM_SIZE_LOW / 4] = CQSize; + ControlRegisters_[ALMAIF_INFO_CQMEM_START_LOW / 4] = CQStart; + ControlRegisters_[ALMAIF_INFO_DMEM_SIZE_LOW / 4] = DmemSize; + ControlRegisters_[ALMAIF_INFO_DMEM_START_LOW / 4] = DmemStart; ControlRegisters_[ALMAIF_INFO_FEATURE_FLAGS_LOW / 4] = (relativeAddressing) ? 0 : 1; ControlRegisters_[ALMAIF_INFO_PTR_SIZE / 4] = 4; diff --git a/lib/CL/devices/almaif/openasip/TTASimDevice.cc b/lib/CL/devices/almaif/openasip/TTASimDevice.cc index c38f728832f8ea19c0aacaa98d4f9f376cf2bb43..194ebf6a59bb7e8b5a066f0eb42181646cdbff8d 100644 --- a/lib/CL/devices/almaif/openasip/TTASimDevice.cc +++ b/lib/CL/devices/almaif/openasip/TTASimDevice.cc @@ -70,23 +70,18 @@ private: TTASimDevice *d_; }; -TTASimDevice::TTASimDevice(char *adf_name) { +TTASimDevice::TTASimDevice(const std::string &adf_name) { #ifdef ALMAIF_TTASimMMAP_DEBUG POCL_MSG_PRINT_ALMAIF("TTASimMMAP: Initializing TTASimMMAPregion with Address " "%zu and Size %zu\n", Address, RegionSize); #endif - unsigned adf_name_length = strlen(adf_name) + 5; - char *adf_char = (char *)malloc(adf_name_length); - assert(adf_char); - snprintf(adf_char, adf_name_length, "%s.adf", adf_name); + std::string adf_char = adf_name + ".adf"; simulator_ = new SimpleSimulatorFrontend(adf_char, false); assert(simulator_ != NULL && "simulator null\n"); - free(adf_char); - simulatorCLI_ = new SimulatorCLI(simulator_->frontend()); SigINTHandler *ctrlcHandler = new SigINTHandler(this); @@ -124,27 +119,24 @@ TTASimDevice::TTASimDevice(char *adf_name) { // Doesn't exist and should not ever be accessed InstructionMemory = nullptr; if ((global_as != cq_as) && !RelativeAddressing) { - CQMemory = new TTASimRegion(0, cq_size, cq_mem); + CQMemory = new TTASimRegion(0, CQSize, cq_mem); } else { - CQMemory = new TTASimRegion(cq_start, cq_size, cq_mem); + CQMemory = new TTASimRegion(CQStart, CQSize, cq_mem); } - DataMemory = new TTASimRegion(dmem_start, dmem_size, mem); + DataMemory = new TTASimRegion(DmemStart, DmemSize, mem); // For built-in kernel use-case. If the firmware.tpef exists, load it in - int tpef_file_length = strlen(adf_name) + 6; - char *tpef_file = (char *)malloc(tpef_file_length); - assert(tpef_file); - snprintf(tpef_file, tpef_file_length, "%s.tpef", adf_name); - if (pocl_exists(tpef_file)) { + std::string tpef_file = adf_name + ".tpef"; + if (pocl_exists(tpef_file.c_str())) { POCL_MSG_PRINT_ALMAIF( "Almaif: Found built-in kernel firmware for ttasim. Loading it in.\n"); loadProgram(tpef_file); } else { - POCL_MSG_PRINT_ALMAIF("File %s not found. Skipping program initialization\n", - tpef_file); + POCL_MSG_PRINT_ALMAIF( + "File %s not found. Skipping program initialization\n", + tpef_file.c_str()); } - free(tpef_file); if (!RelativeAddressing) { if (pocl_is_option_set("POCL_ALMAIF_EXTERNALREGION")) { @@ -191,7 +183,7 @@ TTASimDevice::~TTASimDevice() { delete simulatorCLI_; } -void TTASimDevice::loadProgram(char *tpef_file) { +void TTASimDevice::loadProgram(const std::string &tpef_file) { if (simulator_->isRunning()) ControlMemory->Write32(ALMAIF_CONTROL_REG_COMMAND, ALMAIF_RESET_CMD); while (simulator_->isRunning()) @@ -200,7 +192,7 @@ void TTASimDevice::loadProgram(char *tpef_file) { over all the simulations. */ // if (currentProgram != NULL) // globalCycleCount += simulator_.cycleCount(); - simulator_->loadProgram(tpef_file); + simulator_->loadProgram(tpef_file.c_str()); } void TTASimDevice::loadProgramToDevice(almaif_kernel_data_s *kd, @@ -234,9 +226,7 @@ void TTASimDevice::loadProgramToDevice(almaif_kernel_data_s *kd, loadProgram(tpef_file); - char wg_func_name[120]; - snprintf(wg_func_name, sizeof(wg_func_name), "%s_workgroup_argbuffer", - kernel->name); + std::string wg_func_name = std::string(kernel->name) + "_workgroup_argbuffer"; const TTAProgram::Program *prog = &simulator_->program(); if (prog->hasProcedure(wg_func_name)) { const TTAProgram::Procedure &proc = prog->procedure(wg_func_name); diff --git a/lib/CL/devices/almaif/openasip/TTASimDevice.hh b/lib/CL/devices/almaif/openasip/TTASimDevice.hh index f6f792942ff256084af1c7082a0dac461f5240f4..8d92ea7e80f3c1305e0e45f32c9822e63268890c 100644 --- a/lib/CL/devices/almaif/openasip/TTASimDevice.hh +++ b/lib/CL/devices/almaif/openasip/TTASimDevice.hh @@ -32,7 +32,7 @@ class SimulatorCLI; class TTASimDevice : public AlmaIFDevice { public: - TTASimDevice(char *adf_name); + TTASimDevice(const std::string &adf_name); ~TTASimDevice() override; virtual void loadProgramToDevice(almaif_kernel_data_s *kd, cl_kernel kernel, @@ -51,7 +51,7 @@ public: void stopProgram(); private: - void loadProgram(char *loadProgram); + void loadProgram(const std::string &loadProgram); }; #endif diff --git a/lib/CL/devices/almaif/openasip/TTASimRegion.cc b/lib/CL/devices/almaif/openasip/TTASimRegion.cc index 1de638734fedc1bf7c4c44532fd4d3b18bb6bc64..b2c8ba43785190982943079f082fd956b3df4ea3 100644 --- a/lib/CL/devices/almaif/openasip/TTASimRegion.cc +++ b/lib/CL/devices/almaif/openasip/TTASimRegion.cc @@ -37,8 +37,8 @@ TTASimRegion::TTASimRegion(size_t Address, size_t RegionSize, "TTASim: Initializing TTASimRegion with Address %zu " "and Size %zu and memptr %p\n", Address, RegionSize, (void*)mem.get()); - PhysAddress = Address; - Size = RegionSize; + PhysAddress_ = Address; + Size_ = RegionSize; mem_ = mem; assert(mem != nullptr && "memory handle NULL, is the sim opened properly?"); } @@ -46,57 +46,57 @@ TTASimRegion::TTASimRegion(size_t Address, size_t RegionSize, uint32_t TTASimRegion::Read32(size_t offset) { POCL_MSG_PRINT_ALMAIF_MMAP("TTASim: Reading from physical address 0x%zx with " - "offset 0x%zx\n", - PhysAddress, offset); + "offset 0x%zx\n", + PhysAddress_, offset); assert(mem_ != nullptr && "No memory handle; read before mapping?"); - assert(offset < Size && "Attempt to access data outside MMAP'd buffer"); + assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer"); uint64_t result = 0; - mem_->read(PhysAddress + offset, 4, result); + mem_->read(PhysAddress_ + offset, 4, result); return result; } void TTASimRegion::Write32(size_t offset, uint32_t value) { POCL_MSG_PRINT_ALMAIF_MMAP("TTASim: Writing to physical address 0x%zx with " - "offset 0x%zx\n", - PhysAddress, offset); + "offset 0x%zx\n", + PhysAddress_, offset); assert(mem_ != nullptr && "No memory handle; write before mapping?"); - assert(offset < Size && "Attempt to access data outside MMAP'd buffer"); - mem_->writeDirectlyLE(PhysAddress + offset, 4, value); + assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer"); + mem_->writeDirectlyLE(PhysAddress_ + offset, 4, value); } void TTASimRegion::Write16(size_t offset, uint16_t value) { POCL_MSG_PRINT_ALMAIF_MMAP("TTASim: Writing to physical address 0x%zx with " - "offset 0x%zx\n", - PhysAddress, offset); + "offset 0x%zx\n", + PhysAddress_, offset); assert(mem_ != nullptr && "No memory handle; write before mapping?"); - assert(offset < Size && "Attempt to access data outside MMAP'd buffer"); + assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer"); - mem_->writeDirectlyLE(PhysAddress + offset, 2, value); + mem_->writeDirectlyLE(PhysAddress_ + offset, 2, value); } uint64_t TTASimRegion::Read64(size_t offset) { POCL_MSG_PRINT_ALMAIF_MMAP("TTASim: Reading from physical address 0x%zx with " - "offset 0x%zx\n", - PhysAddress, offset); + "offset 0x%zx\n", + PhysAddress_, offset); assert(mem_ != nullptr && "No memory handle; write before mapping?"); - assert(offset < Size && "Attempt to access data outside MMAP'd buffer"); + assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer"); uint64_t result = 0; - mem_->read(PhysAddress + offset, 8, result); + mem_->read(PhysAddress_ + offset, 8, result); return result; } void TTASimRegion::Write64(size_t offset, uint64_t value) { POCL_MSG_PRINT_ALMAIF_MMAP("TTASim: Writing to physical address 0x%zx with " - "offset 0x%zx\n", - PhysAddress, offset); + "offset 0x%zx\n", + PhysAddress_, offset); assert(mem_ != nullptr && "No memory handle; write before mapping?"); - assert(offset < Size && "Attempt to access data outside MMAP'd buffer"); - mem_->writeDirectlyLE(PhysAddress + offset, 8, value); + assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer"); + mem_->writeDirectlyLE(PhysAddress_ + offset, 8, value); } void TTASimRegion::CopyToMMAP(size_t destination, const void *source, @@ -104,10 +104,10 @@ void TTASimRegion::CopyToMMAP(size_t destination, const void *source, POCL_MSG_PRINT_ALMAIF_MMAP( "TTASim: Writing 0x%zx bytes to buffer at 0x%zx with " "address 0x%zx\n", - bytes, PhysAddress, destination); + bytes, PhysAddress_, destination); auto src = (uint8_t *)source; - size_t offset = destination - PhysAddress; - assert(offset < Size && "Attempt to access data outside TTASim Region"); + size_t offset = destination - PhysAddress_; + assert(offset < Size_ && "Attempt to access data outside TTASim Region"); for (size_t i = 0; i < bytes; ++i) { mem_->writeDirectlyLE(destination + i, 1, (Memory::MAU)src[i]); @@ -117,11 +117,11 @@ void TTASimRegion::CopyToMMAP(size_t destination, const void *source, void TTASimRegion::CopyFromMMAP(void *destination, size_t source, size_t bytes) { POCL_MSG_PRINT_ALMAIF_MMAP("TTASim: Reading 0x%zx bytes from buffer at 0x%zx " - "with address 0x%zx\n", - bytes, PhysAddress, source); + "with address 0x%zx\n", + bytes, PhysAddress_, source); auto dst = (uint8_t *)destination; - size_t offset = source - PhysAddress; - assert(offset < Size && "Attempt to access data outside TTASim Region"); + size_t offset = source - PhysAddress_; + assert(offset < Size_ && "Attempt to access data outside TTASim Region"); for (size_t i = 0; i < bytes; ++i) { dst[i] = mem_->read(source + i); @@ -132,11 +132,11 @@ void TTASimRegion::CopyInMem(size_t source, size_t destination, size_t bytes) { POCL_MSG_PRINT_ALMAIF_MMAP("TTASim: Copying 0x%zx bytes from 0x%zx " "to 0x%zx\n", bytes, source, destination); - size_t src_offset = source - PhysAddress; - size_t dst_offset = destination - PhysAddress; - assert(src_offset < Size && (src_offset + bytes) <= Size && + size_t src_offset = source - PhysAddress_; + size_t dst_offset = destination - PhysAddress_; + assert(src_offset < Size_ && (src_offset + bytes) <= Size_ && "Attempt to access data outside TTASim Region"); - assert(dst_offset < Size && (dst_offset + bytes) <= Size && + assert(dst_offset < Size_ && (dst_offset + bytes) <= Size_ && "Attempt to access data outside TTASim Region"); for (size_t i = 0; i < bytes; ++i) { Memory::MAU m = mem_->read(source + i); diff --git a/lib/CL/devices/builtin_kernels.cc b/lib/CL/devices/builtin_kernels.cc index 06dabfc86bcd296618f6cc23542fc81761f4d36d..f8f7642238b9fa7cefe87c2b8c6dd7fbd913f23f 100644 --- a/lib/CL/devices/builtin_kernels.cc +++ b/lib/CL/devices/builtin_kernels.cc @@ -214,6 +214,44 @@ BIKD pocl_BIDescriptors[BIKERNELS] = { BIArg("unsigned int*", "minloc", WRITE_BUF), BIArg("unsigned int*", "maxloc", WRITE_BUF), }), + BIKD(POCL_CDBI_SOBEL3X3_U8, + "pocl.sobel3x3.u8", + { + BIArg("unsigned char*", "input", READ_BUF), + BIArg("unsigned short*", "sobel_x", WRITE_BUF), + BIArg("unsigned short*", "sobel_y", WRITE_BUF), + }), + BIKD(POCL_CDBI_PHASE_U8, + "pocl.phase.u8", + { + BIArg("unsigned short*", "in_x", READ_BUF), + BIArg("unsigned short*", "in_y", READ_BUF), + BIArg("unsigned char*", "output", WRITE_BUF), + }), + BIKD(POCL_CDBI_MAGNITUDE_U16, + "pocl.magnitude.u16", + { + BIArg("unsigned short*", "in_x", READ_BUF), + BIArg("unsigned short*", "in_y", READ_BUF), + BIArg("unsigned short*", "output", WRITE_BUF), + }), + BIKD(POCL_CDBI_ORIENTED_NONMAX_U16, + "pocl.oriented.nonmaxsuppression.u16", + { + BIArg("unsigned short*", "magnitude", READ_BUF), + BIArg("unsigned char*", "phase", READ_BUF), + BIArg("unsigned char*", "output", WRITE_BUF), + BIArg("unsigned short", "threshold_lower", POD_ARG_32b), + BIArg("unsigned short", "threshold_upper", POD_ARG_32b), + }), + BIKD(POCL_CDBI_CANNY_U8, + "pocl.canny.u8", + { + BIArg("unsigned char*", "input", READ_BUF), + BIArg("unsigned char*", "output", WRITE_BUF), + BIArg("unsigned short", "threshold_lower", POD_ARG_32b), + BIArg("unsigned short", "threshold_upper", POD_ARG_32b), + }), }; BIKD::BIKD(BuiltinKernelId KernelIdentifier, const char *KernelName, diff --git a/lib/CL/devices/builtin_kernels.hh b/lib/CL/devices/builtin_kernels.hh index a8603c141efe700c515aeea5153b54a04bdc0d4a..2f47b789ea9e7b90831a437006ac34c3bce0df61 100644 --- a/lib/CL/devices/builtin_kernels.hh +++ b/lib/CL/devices/builtin_kernels.hh @@ -38,8 +38,7 @@ #include <vector> -enum BuiltinKernelId : uint16_t -{ +enum BuiltinKernelId : uint16_t { // CD = custom device, BI = built-in // 1D array byte copy, get_global_size(0) defines the size of data to copy // kernel prototype: pocl.copy(char *input, char *output) @@ -68,6 +67,11 @@ enum BuiltinKernelId : uint16_t POCL_CDBI_OPENVX_SCALEIMAGE_BL_U8 = 22, POCL_CDBI_OPENVX_TENSORCONVERTDEPTH_WRAP_U8_F32 = 23, POCL_CDBI_OPENVX_MINMAXLOC_R1_U8 = 24, + POCL_CDBI_SOBEL3X3_U8 = 25, + POCL_CDBI_PHASE_U8 = 26, + POCL_CDBI_MAGNITUDE_U16 = 27, + POCL_CDBI_ORIENTED_NONMAX_U16 = 28, + POCL_CDBI_CANNY_U8 = 29, POCL_CDBI_LAST, POCL_CDBI_JIT_COMPILER = 0xFFFF }; diff --git a/tools/data/tta_test_machines/axim_sep.adf b/tools/data/tta_test_machines/axim_sep.adf index ee1eb7f37cc9df75cd3102f4cbff8376edb0ebd6..996e523544cd98caca1b2d94cf8e91dd05f0dbca 100644 --- a/tools/data/tta_test_machines/axim_sep.adf +++ b/tools/data/tta_test_machines/axim_sep.adf @@ -1000,7 +1000,7 @@ <address-space name="instructions"> <width>8</width> <min-address>0</min-address> - <max-address>4095</max-address> + <max-address>2047</max-address> </address-space> <address-space name="data"> diff --git a/tools/data/tta_test_machines/relative_sep.adf b/tools/data/tta_test_machines/relative_sep.adf index 604ee92c3af34593b913df7dc3241ca8dd2f0d06..4d275599656b04865dce33102f888efbc37a2217 100644 --- a/tools/data/tta_test_machines/relative_sep.adf +++ b/tools/data/tta_test_machines/relative_sep.adf @@ -1,5 +1,5 @@ <?xml version="1.0" encoding="UTF-8" standalone="no" ?> -<adf version="1.10"> +<adf version="1.20"> <little-endian/> @@ -835,7 +835,7 @@ <address-space name="param"> <width>8</width> <min-address>0</min-address> - <max-address>32767</max-address> + <max-address>16383</max-address> <numerical-id>1</numerical-id> <numerical-id>2</numerical-id> </address-space> @@ -843,7 +843,7 @@ <address-space name="instructions"> <width>8</width> <min-address>0</min-address> - <max-address>4095</max-address> + <max-address>2047</max-address> </address-space> <address-space name="data">