Skip to content
Snippets Groups Projects
Commit 00fd1a0d authored by Blaise Tine's avatar Blaise Tine Committed by ncrouzet
Browse files

minor fixes

parent 445f2858
No related branches found
No related tags found
No related merge requests found
...@@ -70,6 +70,10 @@ static cl_bool vortex_available = CL_TRUE; ...@@ -70,6 +70,10 @@ static cl_bool vortex_available = CL_TRUE;
static const char *vortex_native_device_aux_funcs[] = {NULL}; static const char *vortex_native_device_aux_funcs[] = {NULL};
char* pocl_vortex_init_build(void *data) {
return strdup("-target-feature +m -target-feature +a -target-feature +f -target-feature +d");
}
void pocl_vortex_init_device_ops(struct pocl_device_ops *ops) { void pocl_vortex_init_device_ops(struct pocl_device_ops *ops) {
ops->device_name = "vortex"; ops->device_name = "vortex";
...@@ -95,6 +99,7 @@ void pocl_vortex_init_device_ops(struct pocl_device_ops *ops) { ...@@ -95,6 +99,7 @@ void pocl_vortex_init_device_ops(struct pocl_device_ops *ops) {
ops->supports_binary = pocl_driver_supports_binary; ops->supports_binary = pocl_driver_supports_binary;
ops->build_poclbinary = pocl_driver_build_poclbinary; ops->build_poclbinary = pocl_driver_build_poclbinary;
ops->build_builtin = pocl_driver_build_opencl_builtins; ops->build_builtin = pocl_driver_build_opencl_builtins;
ops->init_build = pocl_vortex_init_build;
ops->post_build_program = pocl_vortex_post_build_program; ops->post_build_program = pocl_vortex_post_build_program;
ops->free_program = pocl_vortex_free_program; ops->free_program = pocl_vortex_free_program;
...@@ -175,6 +180,7 @@ pocl_vortex_init (unsigned j, cl_device_id dev, const char* parameters) ...@@ -175,6 +180,7 @@ pocl_vortex_init (unsigned j, cl_device_id dev, const char* parameters)
dev->address_bits = VORTEX_XLEN; dev->address_bits = VORTEX_XLEN;
dev->llvm_target_triplet = is64bit ? "riscv64-unknown-unknown" : "riscv32-unknown-unknown"; dev->llvm_target_triplet = is64bit ? "riscv64-unknown-unknown" : "riscv32-unknown-unknown";
dev->llvm_abi = is64bit ? "lp64d" : "ilp32f"; dev->llvm_abi = is64bit ? "lp64d" : "ilp32f";
dev->llvm_cpu = is64bit ? "generic-rv64" : "generic-rv32";
dev->kernellib_name = is64bit ? "kernel-riscv64" : "kernel-riscv32"; dev->kernellib_name = is64bit ? "kernel-riscv64" : "kernel-riscv32";
dev->kernellib_fallback_name = NULL; dev->kernellib_fallback_name = NULL;
dev->kernellib_subdir = "vortex"; dev->kernellib_subdir = "vortex";
...@@ -214,9 +220,31 @@ pocl_vortex_init (unsigned j, cl_device_id dev, const char* parameters) ...@@ -214,9 +220,31 @@ pocl_vortex_init (unsigned j, cl_device_id dev, const char* parameters)
return CL_DEVICE_NOT_FOUND; return CL_DEVICE_NOT_FOUND;
} }
uint64_t num_warps;
vx_err = vx_dev_caps(vx_device, VX_CAPS_NUM_WARPS, &num_warps);
if (vx_err != 0) {
vx_dev_close(vx_device);
free(dd);
return CL_DEVICE_NOT_FOUND;
}
uint64_t num_threads;
vx_err = vx_dev_caps(vx_device, VX_CAPS_NUM_THREADS, &num_threads);
if (vx_err != 0) {
vx_dev_close(vx_device);
free(dd);
return CL_DEVICE_NOT_FOUND;
}
uint64_t max_work_group_size = num_warps * num_threads;
dev->global_mem_size = global_mem_size; dev->global_mem_size = global_mem_size;
dev->max_mem_alloc_size = global_mem_size; dev->max_mem_alloc_size = global_mem_size;
dev->local_mem_size = local_mem_size; dev->local_mem_size = local_mem_size;
dev->max_work_group_size = max_work_group_size;
dev->max_work_item_sizes[0] = max_work_group_size;
dev->max_work_item_sizes[1] = max_work_group_size;
dev->max_work_item_sizes[2] = max_work_group_size;
dev->max_compute_units = num_cores; dev->max_compute_units = num_cores;
dd->vx_kernel_buffer = NULL; dd->vx_kernel_buffer = NULL;
...@@ -345,7 +373,7 @@ int pocl_vortex_create_kernel (cl_device_id device, cl_program program, ...@@ -345,7 +373,7 @@ int pocl_vortex_create_kernel (cl_device_id device, cl_program program,
const char* current = pdata->kernel_names; const char* current = pdata->kernel_names;
int i = 0; int i = 0;
int found = 0; int found = 0;
for (int i = 0; i < pdata->num_kernels; ++i) { for (; i < pdata->num_kernels; ++i) {
if (strcmp(current, kernel->name) == 0) { if (strcmp(current, kernel->name) == 0) {
found = 1; found = 1;
break; break;
...@@ -439,7 +467,7 @@ void pocl_vortex_run (void *data, _cl_command_node *cmd) { ...@@ -439,7 +467,7 @@ void pocl_vortex_run (void *data, _cl_command_node *cmd) {
} }
// check occupancy // check occupancy
if (local_mem_size != 0) { if (group_size != 1) {
int available_localmem; int available_localmem;
vx_err = vx_check_occupancy(dd->vx_device, group_size, &available_localmem); vx_err = vx_check_occupancy(dd->vx_device, group_size, &available_localmem);
if (vx_err != 0) { if (vx_err != 0) {
......
...@@ -101,10 +101,10 @@ static bool createArgumentsBuffer(llvm::Function *function, llvm::Module *module ...@@ -101,10 +101,10 @@ static bool createArgumentsBuffer(llvm::Function *function, llvm::Module *module
auto I32Ty = llvm::Type::getInt32Ty(Context); auto I32Ty = llvm::Type::getInt32Ty(Context);
auto I8Ty = llvm::Type::getInt8Ty(Context); auto I8Ty = llvm::Type::getInt8Ty(Context);
auto I8PtrTy = I8Ty->getPointerTo();
// Create new function signature // Create new function signature
auto ArgBufferType = llvm::PointerType::get(llvm::Type::getInt8Ty(Context), 0); auto NewFuncType = llvm::FunctionType::get(function->getReturnType(), {I8PtrTy}, false);
auto NewFuncType = llvm::FunctionType::get(function->getReturnType(), {ArgBufferType}, false);
auto NewFunc = llvm::Function::Create(NewFuncType, function->getLinkage(), function->getName() + "_vortex"); auto NewFunc = llvm::Function::Create(NewFuncType, function->getLinkage(), function->getName() + "_vortex");
module->getFunctionList().insert(function->getIterator(), NewFunc); module->getFunctionList().insert(function->getIterator(), NewFunc);
NewFunc->takeName(function); NewFunc->takeName(function);
...@@ -116,13 +116,14 @@ static bool createArgumentsBuffer(llvm::Function *function, llvm::Module *module ...@@ -116,13 +116,14 @@ static bool createArgumentsBuffer(llvm::Function *function, llvm::Module *module
auto ai = NewFunc->arg_begin(); auto ai = NewFunc->arg_begin();
auto ArgBuffer = &*ai++; auto ArgBuffer = &*ai++;
ArgBuffer->setName("ArgBuffer"); ArgBuffer->setName("ArgBuffer");
auto I8PtrTy = I8Ty->getPointerTo();
unsigned arg_idx = 0; unsigned arg_idx = 0;
unsigned arg_offset = 0; unsigned arg_offset = 0;
llvm::Value* allocated_local_mem = nullptr; llvm::Value* allocated_local_mem = nullptr;
auto MDS = llvm::MDNode::get(Context, llvm::MDString::get(Context, "vortex.uniform"));
for (auto& OldArg : function->args()) { for (auto& OldArg : function->args()) {
auto ArgType = OldArg.getType(); auto ArgType = OldArg.getType();
auto ArgOffset = llvm::ConstantInt::get(I32Ty, arg_offset); auto ArgOffset = llvm::ConstantInt::get(I32Ty, arg_offset);
...@@ -149,6 +150,10 @@ static bool createArgumentsBuffer(llvm::Function *function, llvm::Module *module ...@@ -149,6 +150,10 @@ static bool createArgumentsBuffer(llvm::Function *function, llvm::Module *module
Arg = Builder.CreateLoad(ArgType, offset_ptr, OldArg.getName() + "_loaded"); Arg = Builder.CreateLoad(ArgType, offset_ptr, OldArg.getName() + "_loaded");
arg_offset += DL.getTypeAllocSize(ArgType); arg_offset += DL.getTypeAllocSize(ArgType);
} }
auto instr = llvm::cast<llvm::Instruction>(Arg);
assert(instr != nullptr);
instr->setMetadata("vortex.uniform", MDS);
OldArg.replaceAllUsesWith(Arg); OldArg.replaceAllUsesWith(Arg);
arg_idx += 1; arg_idx += 1;
} }
...@@ -186,9 +191,9 @@ static void addKernelSelect(llvm::SmallVector<std::string, 8>& funcNames, llvm:: ...@@ -186,9 +191,9 @@ static void addKernelSelect(llvm::SmallVector<std::string, 8>& funcNames, llvm::
auto& Context = module->getContext(); auto& Context = module->getContext();
auto I32Ty = llvm::Type::getInt32Ty(Context); auto I32Ty = llvm::Type::getInt32Ty(Context);
auto VoidTy = llvm::Type::getVoidTy(Context); auto I8Ty = llvm::Type::getInt8Ty(Context);
auto VoidPtrTy = llvm::PointerType::getUnqual(VoidTy); auto I8PtrTy = I8Ty->getPointerTo();
auto GetKernelCallbackTy = llvm::FunctionType::get(VoidPtrTy, {I32Ty}, false); auto GetKernelCallbackTy = llvm::FunctionType::get(I8PtrTy, {I32Ty}, false);
auto GetKernelCallbackFunc = llvm::Function::Create( auto GetKernelCallbackFunc = llvm::Function::Create(
GetKernelCallbackTy, llvm::Function::ExternalLinkage, "__vx_get_kernel_callback", module); GetKernelCallbackTy, llvm::Function::ExternalLinkage, "__vx_get_kernel_callback", module);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment