Skip to content
Snippets Groups Projects
Commit d17eb932 authored by Blaise Tine's avatar Blaise Tine Committed by ncrouzet
Browse files

vortex 64-bit support fix

parent bafd5c47
Branches
No related tags found
No related merge requests found
...@@ -7,3 +7,7 @@ typedef struct { ...@@ -7,3 +7,7 @@ typedef struct {
uint32_t global_offset[3]; uint32_t global_offset[3];
uint32_t kernel_id; uint32_t kernel_id;
} kernel_args_t; } kernel_args_t;
inline uint32_t alignOffset(uint32_t offset, uint32_t alignment) {
return (offset + alignment - 1) & ~(alignment - 1);
}
...@@ -18,8 +18,8 @@ int main(void) { ...@@ -18,8 +18,8 @@ int main(void) {
for (int i = 0, n = kargs->work_dim; i < 3; i++) { for (int i = 0, n = kargs->work_dim; i < 3; i++) {
g_global_offset.m[i] = (i < n) ? kargs->global_offset[i] : 0; g_global_offset.m[i] = (i < n) ? kargs->global_offset[i] : 0;
} }
uint32_t aligned_kernel_args_size = alignOffset(sizeof(kernel_args_t), sizeof(size_t));
void* arg = (void*)((uint8_t*)kargs + sizeof(kernel_args_t)); void* arg = (void*)((uint8_t*)kargs + aligned_kernel_args_size);
vx_kernel_func_cb kernel_func = (vx_kernel_func_cb)__vx_get_kernel_callback(kargs->kernel_id); vx_kernel_func_cb kernel_func = (vx_kernel_func_cb)__vx_get_kernel_callback(kargs->kernel_id);
return vx_spawn_threads(kargs->work_dim, kargs->num_groups, kargs->local_size, kernel_func, arg); return vx_spawn_threads(kargs->work_dim, kargs->num_groups, kargs->local_size, kernel_func, arg);
} }
...@@ -179,7 +179,7 @@ pocl_vortex_init (unsigned j, cl_device_id dev, const char* parameters) ...@@ -179,7 +179,7 @@ pocl_vortex_init (unsigned j, cl_device_id dev, const char* parameters)
dev->llvm_cpu = NULL; dev->llvm_cpu = NULL;
dev->address_bits = is_64bit ? 64 : 32; dev->address_bits = is_64bit ? 64 : 32;
dev->llvm_target_triplet = is_64bit ? "riscv64-unknown-unknown" : "riscv32-unknown-unknown"; dev->llvm_target_triplet = is_64bit ? "riscv64-unknown-unknown-elf" : "riscv32-unknown-unknown-elf";
dev->llvm_abi = is_64bit ? "lp64d" : "ilp32f"; dev->llvm_abi = is_64bit ? "lp64d" : "ilp32f";
dev->llvm_cpu = is_64bit ? "generic-rv64" : "generic-rv32"; dev->llvm_cpu = is_64bit ? "generic-rv64" : "generic-rv32";
dev->kernellib_name = is_64bit ? "kernel-riscv64" : "kernel-riscv32"; dev->kernellib_name = is_64bit ? "kernel-riscv64" : "kernel-riscv32";
...@@ -424,9 +424,9 @@ void pocl_vortex_run (void *data, _cl_command_node *cmd) { ...@@ -424,9 +424,9 @@ void pocl_vortex_run (void *data, _cl_command_node *cmd) {
struct pocl_context *pc = &cmd->command.run.pc; struct pocl_context *pc = &cmd->command.run.pc;
int vx_err; int vx_err;
int num_groups = 1; uint32_t num_groups = 1;
int group_size = 1; uint32_t group_size = 1;
for (int i = 0; i < pc->work_dim; ++i) { for (uint32_t i = 0; i < pc->work_dim; ++i) {
num_groups *= pc->num_groups[i]; num_groups *= pc->num_groups[i];
group_size *= pc->local_size[i]; group_size *= pc->local_size[i];
} }
...@@ -436,37 +436,39 @@ void pocl_vortex_run (void *data, _cl_command_node *cmd) { ...@@ -436,37 +436,39 @@ void pocl_vortex_run (void *data, _cl_command_node *cmd) {
assert (data != NULL); assert (data != NULL);
dd = (vortex_device_data_t *)data; dd = (vortex_device_data_t *)data;
int ptr_size = dd->is_64bit ? 8 : 4; uint32_t ptr_size = dd->is_64bit ? 8 : 4;
uint32_t aligned_kernel_args_size = alignOffset(sizeof(kernel_args_t), ptr_size);
// calculate kernel arguments buffer size // calculate kernel arguments buffer size
int local_mem_size = 0; uint32_t local_mem_size = 0;
size_t abuf_size = 0; size_t abuf_size = 0;
for (int i = 0; i < meta->num_args; ++i) { for (int i = 0; i < meta->num_args; ++i) {
struct pocl_argument* al = &(cmd->command.run.arguments[i]); struct pocl_argument* al = &(cmd->command.run.arguments[i]);
if (ARG_IS_LOCAL(meta->arg_info[i])) { if (ARG_IS_LOCAL(meta->arg_info[i])) {
local_mem_size += al->size; local_mem_size += al->size;
abuf_size += 4; abuf_size = alignOffset(abuf_size + 4, ptr_size);
} else } else
if ((meta->arg_info[i].type == POCL_ARG_TYPE_POINTER) if ((meta->arg_info[i].type == POCL_ARG_TYPE_POINTER)
|| (meta->arg_info[i].type == POCL_ARG_TYPE_IMAGE) || (meta->arg_info[i].type == POCL_ARG_TYPE_IMAGE)
|| (meta->arg_info[i].type == POCL_ARG_TYPE_SAMPLER)) { || (meta->arg_info[i].type == POCL_ARG_TYPE_SAMPLER)) {
abuf_size += ptr_size; abuf_size = alignOffset(abuf_size + ptr_size, ptr_size);
} else { } else {
// scalar argument // scalar argument
abuf_size += al->size; abuf_size = alignOffset(abuf_size + al->size, ptr_size);
} }
} }
// local buffers // local buffers
for (int i = 0; i < meta->num_locals; ++i) { for (int i = 0; i < meta->num_locals; ++i) {
local_mem_size += meta->local_sizes[i]; local_mem_size += meta->local_sizes[i];
abuf_size += 4; abuf_size = alignOffset(abuf_size + 4, ptr_size);
} }
// add local size // add local size
if (local_mem_size != 0) { if (local_mem_size != 0) {
abuf_size += 4; abuf_size = alignOffset(abuf_size + 4, ptr_size);
} }
// check occupancy // check occupancy
...@@ -483,7 +485,7 @@ void pocl_vortex_run (void *data, _cl_command_node *cmd) { ...@@ -483,7 +485,7 @@ void pocl_vortex_run (void *data, _cl_command_node *cmd) {
} }
// allocate arguments host buffer // allocate arguments host buffer
size_t kargs_buffer_size = sizeof(kernel_args_t) + abuf_size; size_t kargs_buffer_size = aligned_kernel_args_size + abuf_size;
uint8_t* const host_kargs_base_ptr = malloc(kargs_buffer_size); uint8_t* const host_kargs_base_ptr = malloc(kargs_buffer_size);
assert(host_kargs_base_ptr); assert(host_kargs_base_ptr);
...@@ -514,30 +516,31 @@ void pocl_vortex_run (void *data, _cl_command_node *cmd) { ...@@ -514,30 +516,31 @@ void pocl_vortex_run (void *data, _cl_command_node *cmd) {
// write arguments // write arguments
uint8_t* host_args_ptr = host_kargs_base_ptr + sizeof(kernel_args_t); uint8_t* const host_args_ptr = host_kargs_base_ptr + aligned_kernel_args_size;
int local_mem_offset = 0; uint32_t host_args_offset = 0;
uint32_t local_mem_offset = 0;
for (int i = 0; i < meta->num_args; ++i) { for (int i = 0; i < meta->num_args; ++i) {
struct pocl_argument* al = &(cmd->command.run.arguments[i]); struct pocl_argument* al = &(cmd->command.run.arguments[i]);
if (ARG_IS_LOCAL(meta->arg_info[i])) { if (ARG_IS_LOCAL(meta->arg_info[i])) {
if (local_mem_offset == 0) { if (local_mem_offset == 0) {
memcpy(host_args_ptr, &local_mem_size, 4); // local_size memcpy(host_args_ptr + host_args_offset, &local_mem_size, 4); // local_size
host_args_ptr += 4; host_args_offset = alignOffset(host_args_offset + 4, ptr_size);
} }
memcpy(host_args_ptr, &local_mem_offset, 4); // arg offset memcpy(host_args_ptr + host_args_offset, &local_mem_offset, 4); // arg offset
host_args_ptr += 4; host_args_offset = alignOffset(host_args_offset + 4, ptr_size);
local_mem_offset += al->size; local_mem_offset += al->size;
} else } else
if (meta->arg_info[i].type == POCL_ARG_TYPE_POINTER) { if (meta->arg_info[i].type == POCL_ARG_TYPE_POINTER) {
if (al->value == NULL) { if (al->value == NULL) {
memset(host_args_ptr, 0, ptr_size); // NULL pointer value memset(host_args_ptr + host_args_offset, 0, ptr_size); // NULL pointer value
host_args_ptr += ptr_size; host_args_offset = alignOffset(host_args_offset + ptr_size, ptr_size);
} else { } else {
cl_mem m = (*(cl_mem *)(al->value)); cl_mem m = (*(cl_mem *)(al->value));
vortex_buffer_data_t* buf_data = (vortex_buffer_data_t *) m->device_ptrs[cmd->device->global_mem_id].mem_ptr; vortex_buffer_data_t* buf_data = (vortex_buffer_data_t *) m->device_ptrs[cmd->device->global_mem_id].mem_ptr;
uint64_t dev_mem_addr = buf_data->buf_address + al->offset; uint64_t dev_mem_addr = buf_data->buf_address + al->offset;
memcpy(host_args_ptr, &buf_data->buf_address, ptr_size); // pointer value memcpy(host_args_ptr + host_args_offset, &buf_data->buf_address, ptr_size); // pointer value
host_args_ptr += ptr_size; host_args_offset = alignOffset(host_args_offset + ptr_size, ptr_size);
} }
} else } else
if (meta->arg_info[i].type == POCL_ARG_TYPE_IMAGE) { if (meta->arg_info[i].type == POCL_ARG_TYPE_IMAGE) {
...@@ -547,19 +550,19 @@ void pocl_vortex_run (void *data, _cl_command_node *cmd) { ...@@ -547,19 +550,19 @@ void pocl_vortex_run (void *data, _cl_command_node *cmd) {
POCL_ABORT("POCL_VORTEX_RUN\n"); POCL_ABORT("POCL_VORTEX_RUN\n");
} else { } else {
// scalar argument // scalar argument
memcpy(host_args_ptr, al->value, al->size); // scalar value memcpy(host_args_ptr + host_args_offset, al->value, al->size); // scalar value
host_args_ptr += al->size; host_args_offset = alignOffset(host_args_offset + al->size, ptr_size);
} }
} }
// write local arguments // write local arguments
for (int i = 0; i < meta->num_locals; ++i) { for (int i = 0; i < meta->num_locals; ++i) {
if (local_mem_offset == 0) { if (local_mem_offset == 0) {
memcpy(host_args_ptr, &local_mem_size, 4); // local_size memcpy(host_args_ptr + host_args_offset, &local_mem_size, 4); // local_size
host_args_ptr += 4; host_args_offset = alignOffset(host_args_offset + 4, ptr_size);
} }
memcpy(host_args_ptr, &local_mem_offset, 4); // arg offset memcpy(host_args_ptr + host_args_offset, &local_mem_offset, 4); // arg offset
host_args_ptr += 4; host_args_offset = alignOffset(host_args_offset + 4, ptr_size);
local_mem_offset += meta->local_sizes[i]; local_mem_offset += meta->local_sizes[i];
} }
......
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#include "pocl_util.h" #include "pocl_util.h"
#include "LLVMUtils.h" #include "LLVMUtils.h"
#include "kernel_args.h"
static int exec(const char* cmd, std::ostream& out) { static int exec(const char* cmd, std::ostream& out) {
char buffer[128]; char buffer[128];
...@@ -99,6 +100,9 @@ static bool createArgumentsBuffer(llvm::Function *function, llvm::Module *module ...@@ -99,6 +100,9 @@ static bool createArgumentsBuffer(llvm::Function *function, llvm::Module *module
auto &Context = module->getContext(); auto &Context = module->getContext();
const llvm::DataLayout &DL = module->getDataLayout(); const llvm::DataLayout &DL = module->getDataLayout();
std::string TargetTriple = module->getTargetTriple();
bool is64Bit = TargetTriple.find("riscv64") != std::string::npos;
auto I32Ty = llvm::Type::getInt32Ty(Context); auto I32Ty = llvm::Type::getInt32Ty(Context);
auto I8Ty = llvm::Type::getInt8Ty(Context); auto I8Ty = llvm::Type::getInt8Ty(Context);
auto I8PtrTy = I8Ty->getPointerTo(); auto I8PtrTy = I8Ty->getPointerTo();
...@@ -124,6 +128,8 @@ static bool createArgumentsBuffer(llvm::Function *function, llvm::Module *module ...@@ -124,6 +128,8 @@ static bool createArgumentsBuffer(llvm::Function *function, llvm::Module *module
auto MDS = llvm::MDNode::get(Context, llvm::MDString::get(Context, "vortex.uniform")); auto MDS = llvm::MDNode::get(Context, llvm::MDString::get(Context, "vortex.uniform"));
uint32_t BaseAlignment = is64Bit ? 8 : 4;
for (auto& OldArg : function->args()) { for (auto& OldArg : function->args()) {
auto ArgType = OldArg.getType(); auto ArgType = OldArg.getType();
auto ArgOffset = llvm::ConstantInt::get(I32Ty, arg_offset); auto ArgOffset = llvm::ConstantInt::get(I32Ty, arg_offset);
...@@ -132,8 +138,8 @@ static bool createArgumentsBuffer(llvm::Function *function, llvm::Module *module ...@@ -132,8 +138,8 @@ static bool createArgumentsBuffer(llvm::Function *function, llvm::Module *module
if (allocated_local_mem == nullptr) { if (allocated_local_mem == nullptr) {
// Load __local_size // Load __local_size
auto local_size_ptr = Builder.CreateGEP(I8Ty, ArgBuffer, ArgOffset, "__local_size_ptr"); auto local_size_ptr = Builder.CreateGEP(I8Ty, ArgBuffer, ArgOffset, "__local_size_ptr");
arg_offset += 4;
auto local_size = Builder.CreateLoad(I32Ty, local_size_ptr, "__local_size"); auto local_size = Builder.CreateLoad(I32Ty, local_size_ptr, "__local_size");
arg_offset = alignOffset(arg_offset + 4, BaseAlignment);
// Call vx_local_alloc(__local_size) // Call vx_local_alloc(__local_size)
auto function_type = llvm::FunctionType::get(I8PtrTy, {I32Ty}, false); auto function_type = llvm::FunctionType::get(I8PtrTy, {I32Ty}, false);
auto vx_local_alloc_func = module->getOrInsertFunction("vx_local_alloc", function_type); auto vx_local_alloc_func = module->getOrInsertFunction("vx_local_alloc", function_type);
...@@ -142,13 +148,13 @@ static bool createArgumentsBuffer(llvm::Function *function, llvm::Module *module ...@@ -142,13 +148,13 @@ static bool createArgumentsBuffer(llvm::Function *function, llvm::Module *module
// Load argument __offset // Load argument __offset
auto offset_ptr = Builder.CreateGEP(I8Ty, ArgBuffer, ArgOffset, OldArg.getName() + "_offset_ptr"); auto offset_ptr = Builder.CreateGEP(I8Ty, ArgBuffer, ArgOffset, OldArg.getName() + "_offset_ptr");
auto offset = Builder.CreateLoad(I32Ty, offset_ptr, OldArg.getName() + "_offset"); auto offset = Builder.CreateLoad(I32Ty, offset_ptr, OldArg.getName() + "_offset");
arg_offset += 4; arg_offset = alignOffset(arg_offset + 4, BaseAlignment);
// Apply pointer offset // Apply pointer offset
Arg = Builder.CreateGEP(I8PtrTy, allocated_local_mem, offset, OldArg.getName() + "_byte_ptr"); Arg = Builder.CreateGEP(I8PtrTy, allocated_local_mem, offset, OldArg.getName() + "_byte_ptr");
} else { } else {
auto offset_ptr = Builder.CreateGEP(I8Ty, ArgBuffer, ArgOffset, OldArg.getName() + "_offset_ptr"); auto offset_ptr = Builder.CreateGEP(I8Ty, ArgBuffer, ArgOffset, OldArg.getName() + "_offset_ptr");
Arg = Builder.CreateLoad(ArgType, offset_ptr, OldArg.getName() + "_loaded"); Arg = Builder.CreateLoad(ArgType, offset_ptr, OldArg.getName() + "_loaded");
arg_offset += DL.getTypeAllocSize(ArgType); arg_offset = alignOffset(arg_offset + DL.getTypeAllocSize(ArgType), BaseAlignment);
} }
auto instr = llvm::cast<llvm::Instruction>(Arg); auto instr = llvm::cast<llvm::Instruction>(Arg);
assert(instr != nullptr); assert(instr != nullptr);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment