Skip to content
Snippets Groups Projects
Commit bafd5c47 authored by Blaise Tine's avatar Blaise Tine Committed by ncrouzet
Browse files

adding support for vortex 64-bit

parent 00fd1a0d
Branches
No related tags found
No related merge requests found
......@@ -1880,9 +1880,6 @@ if (ENABLE_VORTEX)
if(NOT DEFINED VORTEX_PREFIX)
message(FATAL_ERROR "should set 'VORTEX_PREFIX' option")
endif()
if(NOT DEFINED VORTEX_XLEN)
message(FATAL_ERROR "should set 'VORTEX_XLEN' option")
endif()
set(BUILD_VORTEX 1)
set(VORTEX_DEVICE_EXTENSIONS "cl_khr_byte_addressable_store")
set(VORTEX_DEVICE_CL_VERSION_MAJOR 1)
......
......@@ -5,7 +5,7 @@
- export POCL_PATH=$TOOLDIR/pocl
- export VORTEX_PREFIX=$HOME/tools/vortex
- export LLVM_PREFIX=$TOOLDIR/llvm-vortex
- cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=$POCL_PATH -DWITH_LLVM_CONFIG=$LLVM_PREFIX/bin/llvm-config -DVORTEX_PREFIX=$VORTEX_PREFIX -DVORTEX_XLEN=32 -DENABLE_VORTEX=ON -DKERNEL_CACHE_DEFAULT=OFF -DENABLE_HOST_CPU_DEVICES=OFF -DENABLE_TESTS=OFF -DPOCL_DEBUG_MESSAGES=ON -DENABLE_ICD=OFF ..
- cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=$POCL_PATH -DWITH_LLVM_CONFIG=$LLVM_PREFIX/bin/llvm-config -DVORTEX_PREFIX=$VORTEX_PREFIX -DENABLE_VORTEX=ON -DKERNEL_CACHE_DEFAULT=OFF -DENABLE_HOST_CPU_DEVICES=OFF -DENABLE_TESTS=OFF -DPOCL_DEBUG_MESSAGES=ON -DENABLE_ICD=OFF ..
- make -j`nproc`
- make install
- cp -r ../include $POCL_RT_PATH
\ No newline at end of file
......@@ -123,8 +123,6 @@
#define VORTEX_DEVICE_EXTENSIONS "@VORTEX_DEVICE_EXTENSIONS@"
#define VORTEX_XLEN @VORTEX_XLEN@
#endif
#if defined(ENABLE_HOST_CPU_DEVICES)
......
......@@ -47,6 +47,8 @@ typedef struct {
pocl_lock_t compile_lock;
int is_64bit;
size_t ctx_refcount;
} vortex_device_data_t;
......@@ -120,15 +122,14 @@ void pocl_vortex_init_device_ops(struct pocl_device_ops *ops) {
ops->free_mapping_ptr = pocl_driver_free_mapping_ptr;
}
char * pocl_vortex_build_hash (cl_device_id device)
char * pocl_vortex_build_hash (cl_device_id dev)
{
char *res = (char *)calloc(1000, sizeof(char));
vortex_device_data_t *dd = (vortex_device_data_t *)device->data;
#if (VORTEX_XLEN == 64)
snprintf(res, 1000, "vortex-riscv64-unknown-unknown-elf");
#else
snprintf(res, 1000, "vortex-riscv32-unknown-unknown-elf");
#endif
if (dev->address_bits == 64) {
snprintf(res, 1000, "vortex-riscv64-unknown-unknown-elf");
} else {
snprintf(res, 1000, "vortex-riscv32-unknown-unknown-elf");
}
return res;
}
......@@ -143,9 +144,9 @@ pocl_vortex_init (unsigned j, cl_device_id dev, const char* parameters)
int vx_err;
vortex_device_data_t *dd;
const char* sz_cflags = pocl_get_string_option("POCL_VORTEX_CFLAGS", "");
const char* sz_xlen = pocl_get_string_option("POCL_VORTEX_XLEN", "32");
int is64bit = (VORTEX_XLEN == 64);
int is_64bit = (strcmp(sz_xlen, "64") == 0);
assert (dev->data == NULL);
......@@ -169,19 +170,19 @@ pocl_vortex_init (unsigned j, cl_device_id dev, const char* parameters)
dev->run_workgroup_pass = CL_FALSE;
dev->execution_capabilities = CL_EXEC_KERNEL;
//dev->global_as_id = VX_ADDR_SPACE_GLOBAL;
//dev->local_as_id = VX_ADDR_SPACE_LOCAL;
//dev->local_as_id = VX_ADDR_SPACE_LOCAL;439
//dev->constant_as_id = VX_ADDR_SPACE_CONSTANT;
dev->autolocals_to_args = POCL_AUTOLOCALS_TO_ARGS_ALWAYS;
dev->device_alloca_locals = CL_FALSE;
dev->device_side_printf = 0;
dev->has_64bit_long = is64bit;
dev->has_64bit_long = is_64bit;
dev->llvm_cpu = NULL;
dev->address_bits = VORTEX_XLEN;
dev->llvm_target_triplet = is64bit ? "riscv64-unknown-unknown" : "riscv32-unknown-unknown";
dev->llvm_abi = is64bit ? "lp64d" : "ilp32f";
dev->llvm_cpu = is64bit ? "generic-rv64" : "generic-rv32";
dev->kernellib_name = is64bit ? "kernel-riscv64" : "kernel-riscv32";
dev->address_bits = is_64bit ? 64 : 32;
dev->llvm_target_triplet = is_64bit ? "riscv64-unknown-unknown" : "riscv32-unknown-unknown";
dev->llvm_abi = is_64bit ? "lp64d" : "ilp32f";
dev->llvm_cpu = is_64bit ? "generic-rv64" : "generic-rv32";
dev->kernellib_name = is_64bit ? "kernel-riscv64" : "kernel-riscv32";
dev->kernellib_fallback_name = NULL;
dev->kernellib_subdir = "vortex";
dev->device_aux_functions = vortex_native_device_aux_funcs;
......@@ -252,6 +253,8 @@ pocl_vortex_init (unsigned j, cl_device_id dev, const char* parameters)
dd->ctx_refcount = 0;
dd->is_64bit = is_64bit;
POCL_INIT_LOCK(dd->compile_lock);
POCL_INIT_LOCK(dd->cq_lock);
......@@ -261,8 +264,8 @@ pocl_vortex_init (unsigned j, cl_device_id dev, const char* parameters)
return CL_SUCCESS;
}
cl_int pocl_vortex_uninit (unsigned j, cl_device_id device) {
vortex_device_data_t *dd = (vortex_device_data_t *)device->data;
cl_int pocl_vortex_uninit (unsigned j, cl_device_id dev) {
vortex_device_data_t *dd = (vortex_device_data_t *)dev->data;
if (NULL == dd)
return CL_SUCCESS;
......@@ -274,12 +277,12 @@ cl_int pocl_vortex_uninit (unsigned j, cl_device_id device) {
POCL_DESTROY_LOCK (dd->compile_lock);
POCL_DESTROY_LOCK (dd->cq_lock);
POCL_MEM_FREE(dd);
device->data = NULL;
dev->data = NULL;
return CL_SUCCESS;
}
int pocl_vortex_init_context (cl_device_id device, cl_context context) {
vortex_device_data_t *dd = (vortex_device_data_t *)device->data;
int pocl_vortex_init_context (cl_device_id dev, cl_context context) {
vortex_device_data_t *dd = (vortex_device_data_t *)dev->data;
if (NULL == dd)
return CL_SUCCESS;
......@@ -288,13 +291,13 @@ int pocl_vortex_init_context (cl_device_id device, cl_context context) {
return CL_SUCCESS;
}
int pocl_vortex_free_context (cl_device_id device, cl_context context) {
vortex_device_data_t *dd = (vortex_device_data_t *)device->data;
int pocl_vortex_free_context (cl_device_id dev, cl_context context) {
vortex_device_data_t *dd = (vortex_device_data_t *)dev->data;
if (NULL == dd)
return CL_SUCCESS;
if (--dd->ctx_refcount == 0) {
pocl_vortex_uninit(0, device);
pocl_vortex_uninit(0, dev);
}
return CL_SUCCESS;
......@@ -302,8 +305,8 @@ int pocl_vortex_free_context (cl_device_id device, cl_context context) {
int pocl_vortex_post_build_program (cl_program program, cl_uint device_i) {
int result;
cl_device_id device = program->devices[device_i];
vortex_device_data_t *ddata = (vortex_device_data_t *)device->data;
cl_device_id dev = program->devices[device_i];
vortex_device_data_t *ddata = (vortex_device_data_t *)dev->data;
vortex_program_data_t *pdata = NULL;
POCL_LOCK (ddata->compile_lock);
......@@ -339,14 +342,14 @@ int pocl_vortex_post_build_program (cl_program program, cl_uint device_i) {
return result;
}
int pocl_vortex_free_program (cl_device_id device, cl_program program,
int pocl_vortex_free_program (cl_device_id dev, cl_program program,
unsigned device_i) {
vortex_device_data_t *ddata = (vortex_device_data_t *)device->data;
vortex_device_data_t *dd = (vortex_device_data_t *)dev->data;
vortex_program_data_t *pdata = (vortex_program_data_t *)program->data[device_i];
if (pdata == NULL)
return CL_SUCCESS;
pocl_driver_free_program (device, program, device_i);
pocl_driver_free_program (dev, program, device_i);
POCL_MEM_FREE (pdata->kernel_names);
POCL_MEM_FREE (pdata);
......@@ -355,7 +358,7 @@ int pocl_vortex_free_program (cl_device_id device, cl_program program,
return CL_SUCCESS;
}
int pocl_vortex_create_kernel (cl_device_id device, cl_program program,
int pocl_vortex_create_kernel (cl_device_id dev, cl_program program,
cl_kernel kernel, unsigned device_i) {
int result = CL_SUCCESS;
pocl_kernel_metadata_t *meta = kernel->meta;
......@@ -392,7 +395,7 @@ int pocl_vortex_create_kernel (cl_device_id device, cl_program program,
return result;
}
int pocl_vortex_free_kernel (cl_device_id device, cl_program program,
int pocl_vortex_free_kernel (cl_device_id dev, cl_program program,
cl_kernel kernel, unsigned device_i) {
pocl_kernel_metadata_t *meta = kernel->meta;
assert(meta->data != NULL);
......@@ -433,7 +436,7 @@ void pocl_vortex_run (void *data, _cl_command_node *cmd) {
assert (data != NULL);
dd = (vortex_device_data_t *)data;
int ptr_size = VORTEX_XLEN / 8;
int ptr_size = dd->is_64bit ? 8 : 4;
// calculate kernel arguments buffer size
int local_mem_size = 0;
......@@ -602,9 +605,9 @@ void pocl_vortex_run (void *data, _cl_command_node *cmd) {
vx_mem_free(vx_kargs_buffer);
}
cl_int pocl_vortex_alloc_mem_obj(cl_device_id device, cl_mem mem_obj, void *host_ptr) {
cl_int pocl_vortex_alloc_mem_obj(cl_device_id dev, cl_mem mem_obj, void *host_ptr) {
int vx_err;
pocl_mem_identifier *p = &mem_obj->device_ptrs[device->global_mem_id];
pocl_mem_identifier *p = &mem_obj->device_ptrs[dev->global_mem_id];
/* let other drivers preallocate */
if ((mem_obj->flags & CL_MEM_ALLOC_HOST_PTR) && (mem_obj->mem_host_ptr == NULL))
......@@ -626,7 +629,7 @@ cl_int pocl_vortex_alloc_mem_obj(cl_device_id device, cl_mem mem_obj, void *host
if ((flags & CL_MEM_WRITE_ONLY) != 0)
vx_flags = VX_MEM_WRITE;
vortex_device_data_t* dd = (vortex_device_data_t *)device->data;
vortex_device_data_t* dd = (vortex_device_data_t *)dev->data;
vx_buffer_h vx_buffer;
vx_err = vx_mem_alloc(dd->vx_device, mem_obj->size, vx_flags, &vx_buffer);
......@@ -663,8 +666,8 @@ cl_int pocl_vortex_alloc_mem_obj(cl_device_id device, cl_mem mem_obj, void *host
return CL_SUCCESS;
}
void pocl_vortex_free(cl_device_id device, cl_mem mem_obj) {
pocl_mem_identifier *p = &mem_obj->device_ptrs[device->global_mem_id];
void pocl_vortex_free(cl_device_id dev, cl_mem mem_obj) {
pocl_mem_identifier *p = &mem_obj->device_ptrs[dev->global_mem_id];
cl_mem_flags flags = mem_obj->flags;
vortex_buffer_data_t* buf_data = (vortex_buffer_data_t*)p->mem_ptr;
......@@ -742,16 +745,16 @@ void pocl_vortex_submit (_cl_command_node *node, cl_command_queue cq) {
return;
}
void pocl_vortex_flush (cl_device_id device, cl_command_queue cq) {
vortex_device_data_t *dd = (vortex_device_data_t *)device->data;
void pocl_vortex_flush (cl_device_id dev, cl_command_queue cq) {
vortex_device_data_t *dd = (vortex_device_data_t *)dev->data;
POCL_LOCK (dd->cq_lock);
vortex_command_scheduler (dd);
POCL_UNLOCK (dd->cq_lock);
}
void pocl_vortex_join (cl_device_id device, cl_command_queue cq) {
vortex_device_data_t *dd = (vortex_device_data_t *)device->data;
void pocl_vortex_join (cl_device_id dev, cl_command_queue cq) {
vortex_device_data_t *dd = (vortex_device_data_t *)dev->data;
POCL_LOCK (dd->cq_lock);
vortex_command_scheduler (dd);
......@@ -760,8 +763,8 @@ void pocl_vortex_join (cl_device_id device, cl_command_queue cq) {
return;
}
void pocl_vortex_notify (cl_device_id device, cl_event event, cl_event finished) {
vortex_device_data_t *dd = (vortex_device_data_t *)device->data;
void pocl_vortex_notify (cl_device_id dev, cl_event event, cl_event finished) {
vortex_device_data_t *dd = (vortex_device_data_t *)dev->data;
_cl_command_node * volatile node = event->command;
if (finished->status < CL_COMPLETE)
......
......@@ -25,17 +25,15 @@
include("bitcode_rules")
if( ${VORTEX_XLEN} EQUAL 64 )
set(LLVM_TARGET riscv64)
set(TARGET_MARCH rv64imafd)
else( ${VORTEX_XLEN} EQUAL 64 )
set(LLVM_TARGET riscv32)
set(TARGET_MARCH rv32imaf)
endif( ${VORTEX_XLEN} EQUAL 64 )
set(LLVM_TARGET_64 riscv64)
set(TARGET_MARCH_64 rv64imafd)
set(LLVM_TARGET_32 riscv32)
set(TARGET_MARCH_32 rv32imaf)
set(KERNEL_SOURCES ${SOURCES_GENERIC})
foreach(FILE printf.c print_base.c
foreach(FILE printf.c printf_base.c
get_work_dim.c get_num_groups.c get_local_size.c get_global_offset.c get_global_size.c
get_group_id.c get_local_id.c get_global_id.c get_linear_id.c
barrier.ll
......@@ -48,32 +46,38 @@ foreach(FILE workitems.c printf.c barrier.c)
list(APPEND KERNEL_SOURCES "vortex/${FILE}")
endforeach()
set(CLANG_FLAGS "-ffreestanding" "-target" "${LLVM_TARGET}" "-march=${TARGET_MARCH}" "-emit-llvm" "-D_CL_DISABLE_HALF" "-I${VORTEX_PREFIX}/kernel/include")
set(KERNEL_CL_FLAGS "-Xclang" "-cl-std=CL${VORTEX_DEVICE_CL_STD}" "-D__OPENCL_C_VERSION__=${VORTEX_DEVICE_CL_VERSION}" ${KERNEL_CL_FLAGS})
set(LLC_FLAGS "")
set(DEVICE_CL_FLAGS "-D__OPENCL_VERSION__=${VORTEX_DEVICE_CL_VERSION} -DPOCL_DEVICE_ADDRESS_BITS=${VORTEX_XLEN}")
separate_arguments(VORTEX_DEVICE_EXTENSIONS)
foreach(EXT ${VORTEX_DEVICE_EXTENSIONS})
set(DEVICE_CL_FLAGS "${DEVICE_CL_FLAGS} -D${EXT}")
endforeach()
separate_arguments(DEVICE_CL_FLAGS)
make_kernel_bc(KERNEL_BC "${LLVM_TARGET}" "BCs" 0 0 0 ${KERNEL_SOURCES})
set(CLANG_FLAGS "-ffreestanding" "-target" "${LLVM_TARGET_32}" "-march=${TARGET_MARCH_32}" "-emit-llvm" "-DPOCL_DEVICE_ADDRESS_BITS=32" "-D_CL_DISABLE_HALF" "-I${VORTEX_PREFIX}/kernel/include")
set(DEVICE_CL_FLAGS "-D__OPENCL_VERSION__=${VORTEX_DEVICE_CL_VERSION} -DPOCL_DEVICE_ADDRESS_BITS=32")
make_kernel_bc(KERNEL_BC_32 "${LLVM_TARGET_32}" "VX32-BCs" 0 0 0 ${KERNEL_SOURCES})
set(CLANG_FLAGS "-ffreestanding" "-target" "${LLVM_TARGET_64}" "-march=${TARGET_MARCH_64}" "-emit-llvm" "-DPOCL_DEVICE_ADDRESS_BITS=64" "-D_CL_DISABLE_HALF" "-I${VORTEX_PREFIX}/kernel/include")
set(DEVICE_CL_FLAGS "-D__OPENCL_VERSION__=${VORTEX_DEVICE_CL_VERSION} -DPOCL_DEVICE_ADDRESS_BITS=64")
make_kernel_bc(KERNEL_BC_64 "${LLVM_TARGET_64}" "VX64-BCs" 0 0 0 ${KERNEL_SOURCES})
# just debug
message(STATUS "${LLVM_TARGET} Kernel BC: ${KERNEL_BC}")
message(STATUS "${LLVM_TARGET_32} Kernel BC: ${KERNEL_BC_32}")
message(STATUS "${LLVM_TARGET_64} Kernel BC: ${KERNEL_BC_64}")
list(APPEND KERNEL_BC_LIST "${KERNEL_BC}")
list(APPEND KERNEL_BC_LIST "${KERNEL_BC_32}")
list(APPEND KERNEL_BC_LIST "${KERNEL_BC_64}")
set(KERNEL_BC_LIST "${KERNEL_BC_LIST}" PARENT_SCOPE)
# a target is needed...
add_custom_target("kernel_${LLVM_TARGET}" DEPENDS ${KERNEL_BC})
add_custom_target("kernel_${LLVM_TARGET_32}" DEPENDS ${KERNEL_BC_32})
add_custom_target("kernel_${LLVM_TARGET_64}" DEPENDS ${KERNEL_BC_64})
list(APPEND KERNEL_TARGET_LIST "kernel_${LLVM_TARGET}")
list(APPEND KERNEL_TARGET_LIST "kernel_${LLVM_TARGET_32}")
list(APPEND KERNEL_TARGET_LIST "kernel_${LLVM_TARGET_64}")
set(KERNEL_TARGET_LIST "${KERNEL_TARGET_LIST}" PARENT_SCOPE)
install(FILES "${KERNEL_BC}" DESTINATION "${POCL_INSTALL_PRIVATE_DATADIR}" COMPONENT "lib")
install(FILES "${KERNEL_BC_32}" "${KERNEL_BC_64}" DESTINATION "${POCL_INSTALL_PRIVATE_DATADIR}" COMPONENT "lib")
#include <vx_spawn.h>
#if __riscv_xlen == 64
typedef uint64_t SizeT;
#elif __riscv_xlen == 32
typedef uint32_t SizeT;
#else
#error "Unsupported RISC-V XLEN"
#endif
extern int g_work_dim;
extern dim3_t g_global_offset;
uint32_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE
uint32_t _CL_OVERLOADABLE
get_work_dim (void) {
return g_work_dim;
}
size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE
SizeT _CL_OVERLOADABLE
get_num_groups(uint32_t dimindx) {
switch (dimindx) {
default: return gridDim.x;
......@@ -17,7 +25,7 @@ get_num_groups(uint32_t dimindx) {
}
}
size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE
SizeT _CL_OVERLOADABLE
get_local_size(uint32_t dimindx) {
switch (dimindx) {
default: return blockDim.x;
......@@ -26,7 +34,7 @@ get_local_size(uint32_t dimindx) {
}
}
size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE
SizeT _CL_OVERLOADABLE
get_global_offset(uint32_t dimindx) {
switch (dimindx) {
default: return g_global_offset.x;
......@@ -35,7 +43,7 @@ get_global_offset(uint32_t dimindx) {
}
}
size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE
SizeT _CL_OVERLOADABLE
get_group_id(uint32_t dimindx) {
switch (dimindx) {
default: return blockIdx.x;
......@@ -44,7 +52,7 @@ get_group_id(uint32_t dimindx) {
}
}
size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE
SizeT _CL_OVERLOADABLE
get_local_id(uint32_t dimindx) {
switch (dimindx) {
default: return threadIdx.x;
......@@ -53,7 +61,7 @@ get_local_id(uint32_t dimindx) {
}
}
size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE
SizeT _CL_OVERLOADABLE
get_global_size(uint32_t dimindx) {
switch (dimindx) {
default: return blockDim.x * gridDim.x;
......@@ -62,7 +70,7 @@ get_global_size(uint32_t dimindx) {
}
}
size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE
SizeT _CL_OVERLOADABLE
get_global_id(uint32_t dimindx) {
switch (dimindx) {
default: return blockIdx.x * blockDim.x + threadIdx.x + g_global_offset.x;
......@@ -71,14 +79,14 @@ get_global_id(uint32_t dimindx) {
}
}
size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE
SizeT _CL_OVERLOADABLE
get_global_linear_id(void) {
return ((blockIdx.z * blockDim.z + threadIdx.z) * blockDim.y * gridDim.y * blockDim.x * gridDim.x)
+ ((blockIdx.y * blockDim.y + threadIdx.y) * blockDim.x * gridDim.x)
+ ((blockIdx.x * blockDim.z + threadIdx.x));
}
size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE
SizeT _CL_OVERLOADABLE
get_local_linear_id(void) {
return (threadIdx.z * blockDim.y * blockDim.x) + (threadIdx.y * blockDim.x) + threadIdx.x;
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment