diff --git a/CMakeLists.txt b/CMakeLists.txt
index 775c07d2ca33794855f7bbede8760df23aeab860..0b9c206bdfa3b36e415c43cb96fa1be7feca4e0c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -216,6 +216,8 @@ option(POCL_DEBUG_MESSAGES
 
 option(ENABLE_LOADABLE_DRIVERS "Enable drivers to be dlopen()-ed at pocl runtime, instead of being linked into libpocl" ON)
 
+option(ENABLE_VORTEX "Enable the Vortex OpenGPU device driver" OFF)
+
 option(ENABLE_HSA "Enable the HSA base profile runtime device driver" OFF)
 
 option(ENABLE_CUDA "Enable the CUDA device driver for NVIDIA devices" OFF)
@@ -1156,9 +1158,9 @@ endif()
 
 endif()
 
-setup_cached_var(ENABLE_ICD "Using an ICD loader"
-  "Requested build with icd, but ICD loader not found! some examples will not work.."
-  "ICD loader found, but requested build without it")
+  setup_cached_var(ENABLE_ICD "Using an ICD loader"
+    "Requested build with icd, but ICD loader not found! some examples will not work.."
+    "ICD loader found, but requested build without it")
 
 if(ENABLE_ICD)
   # only meaningful to link tests with ocl-icd
@@ -1860,7 +1862,7 @@ if(ENABLE_CUDA)
   endif()
 
   # FP64 is always enabled for CUDA, it should be available since Compute Capability 1.3
-  set(CUDA_DEVICE_EXTENSIONS "${CUDA_DEVICE_EXTENSIONS} cl_khr_fp64")
+    set(CUDA_DEVICE_EXTENSIONS "${CUDA_DEVICE_EXTENSIONS} cl_khr_fp64")
   set(CUDA_DEVICE_FEATURES_30 "${CUDA_DEVICE_FEATURES_30} __opencl_c_fp64")
 
   if(ENABLE_CUDNN)
@@ -1872,6 +1874,25 @@ endif()
 
 ##########################################################
 
+if (ENABLE_VORTEX)
+  set(OCL_DRIVERS "${OCL_DRIVERS} vortex")
+  set(OCL_TARGETS "${OCL_TARGETS} vortex")
+  if(NOT DEFINED VORTEX_PREFIX)
+    message(FATAL_ERROR "should set 'VORTEX_PREFIX' option")
+  endif()
+  if(NOT DEFINED VORTEX_XLEN)
+    message(FATAL_ERROR "should set 'VORTEX_XLEN' option")
+  endif()
+  set(BUILD_VORTEX 1)
+  set(VORTEX_DEVICE_EXTENSIONS "cl_khr_byte_addressable_store")
+  set(VORTEX_DEVICE_CL_VERSION_MAJOR 1)
+  set(VORTEX_DEVICE_CL_VERSION_MINOR 2)
+  set(VORTEX_DEVICE_CL_VERSION "120")
+  set(VORTEX_DEVICE_CL_STD "1.2")
+endif()
+
+##########################################################
+
 message(STATUS "Building the following device drivers: ${OCL_DRIVERS}")
 
 set(BUILDDIR "${CMAKE_BINARY_DIR}")
@@ -2371,6 +2392,7 @@ MESSAGE(STATUS "ENABLE_TCEMC: ${ENABLE_TCEMC}")
 MESSAGE(STATUS "ENABLE_HSA: ${ENABLE_HSA}")
 MESSAGE(STATUS "ENABLE_ALMAIF_DEVICE: ${ENABLE_ALMAIF_DEVICE}")
 MESSAGE(STATUS "ENABLE_CUDA: ${ENABLE_CUDA}")
+MESSAGE(STATUS "ENABLE_VORTEX: ${ENABLE_VORTEX}")
 MESSAGE(STATUS "ENABLE_CUDNN: ${ENABLE_CUDNN}")
 MESSAGE(STATUS "ENABLE_HOST_CPU_DEVICES: ${ENABLE_HOST_CPU_DEVICES}")
 MESSAGE(STATUS "ENABLE_VULKAN: ${ENABLE_VULKAN}")
diff --git a/README.vortex b/README.vortex
new file mode 100644
index 0000000000000000000000000000000000000000..ebdbcb1255d0a10729644f8952602bf544237adc
--- /dev/null
+++ b/README.vortex
@@ -0,0 +1,11 @@
+## build POCL runtime
+- git clone --branch vortex --recursive https://github.com/vortexgpgpu/pocl
+- cd pocl
+- mkdir build && cd build
+- export POCL_PATH=$TOOLDIR/pocl
+- export VORTEX_PREFIX=$HOME/tools/vortex
+- export LLVM_PREFIX=$TOOLDIR/llvm-vortex
+- cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Debug -DCMAKE_INSTALL_PREFIX=$POCL_PATH -DWITH_LLVM_CONFIG=$LLVM_PREFIX/bin/llvm-config -DVORTEX_PREFIX=$VORTEX_PREFIX -DVORTEX_XLEN=32 -DENABLE_VORTEX=ON -DKERNEL_CACHE_DEFAULT=OFF -DENABLE_HOST_CPU_DEVICES=OFF -DENABLE_TESTS=OFF -DPOCL_DEBUG_MESSAGES=ON -DENABLE_ICD=OFF ..
+- make -j`nproc`
+- make install
+- cp -r ../include $POCL_RT_PATH
\ No newline at end of file
diff --git a/cmake/LLVM.cmake b/cmake/LLVM.cmake
index eb3788518192b46578f72cc2855e37f2c4db0bfc..428ec61822a928667dd3e57cc19f455d4f8c4a90 100644
--- a/cmake/LLVM.cmake
+++ b/cmake/LLVM.cmake
@@ -297,9 +297,13 @@ endif()
 
 find_program_or_die(LLVM_OPT  "opt"       "LLVM optimizer")
 find_program_or_die(LLVM_LLC  "llc"       "LLVM static compiler")
+find_program_or_die(LLVM_AR   "llvm-ar"   "LLVM archiver")
 find_program_or_die(LLVM_AS   "llvm-as"   "LLVM assembler")
+find_program_or_die(LLVM_DIS  "llvm-dis"  "LLVM disassembler")
 find_program_or_die(LLVM_LINK "llvm-link" "LLVM IR linker")
 find_program_or_die(LLVM_LLI  "lli"       "LLVM interpreter")
+find_program_or_die(LLVM_OBJCOPY "llvm-objcopy" "LLVM object copying")
+find_program_or_die(LLVM_OBJDUMP "llvm-objdump" "LLVM object dump")
 
 if(ENABLE_LLVM_FILECHECKS)
   if(IS_ABSOLUTE "${LLVM_FILECHECK_BIN}" AND EXISTS "${LLVM_FILECHECK_BIN}")
diff --git a/config.h.in.cmake b/config.h.in.cmake
index c518bcc62af2120a9794c7db76f23bca58096ece..3c48360f3179d64590bdbae63fde014ea79d1eca 100644
--- a/config.h.in.cmake
+++ b/config.h.in.cmake
@@ -5,6 +5,7 @@
 
 #cmakedefine BUILD_HSA
 #cmakedefine BUILD_CUDA
+#cmakedefine BUILD_VORTEX
 #cmakedefine BUILD_BASIC
 #cmakedefine BUILD_TBB
 #cmakedefine BUILD_PTHREAD
@@ -118,6 +119,14 @@
 
 #endif
 
+#if defined(BUILD_VORTEX)
+
+#define VORTEX_DEVICE_EXTENSIONS "@VORTEX_DEVICE_EXTENSIONS@"
+
+#define VORTEX_XLEN @VORTEX_XLEN@
+
+#endif
+
 #if defined(ENABLE_HOST_CPU_DEVICES)
 
 #cmakedefine ENABLE_HOST_CPU_DEVICES_OPENMP
@@ -193,6 +202,16 @@
 
 #define LLVM_LLC "@LLVM_LLC@"
 
+#define LLVM_AR "@LLVM_AR@"
+
+#define LLVM_DIS "@LLVM_DIS@"
+
+#define LLVM_OBJCOPY "@LLVM_OBJCOPY@"
+
+#define LLVM_OBJDUMP "@LLVM_OBJDUMP@"
+
+#define LLVM_PREFIX "@LLVM_PREFIX@"
+
 #define LLVM_SPIRV "@LLVM_SPIRV@"
 
 #define LLVM_OPT "@LLVM_OPT@"
@@ -270,6 +289,9 @@
 #define CUDA_DEVICE_CL_VERSION_MAJOR @CUDA_DEVICE_CL_VERSION_MAJOR@
 #define CUDA_DEVICE_CL_VERSION_MINOR @CUDA_DEVICE_CL_VERSION_MINOR@
 
+#define VORTEX_DEVICE_CL_VERSION_MAJOR @VORTEX_DEVICE_CL_VERSION_MAJOR@
+#define VORTEX_DEVICE_CL_VERSION_MINOR @VORTEX_DEVICE_CL_VERSION_MINOR@
+
 #define HOST_DEVICE_CL_VERSION_MAJOR @HOST_DEVICE_CL_VERSION_MAJOR@
 #define HOST_DEVICE_CL_VERSION_MINOR @HOST_DEVICE_CL_VERSION_MINOR@
 
diff --git a/lib/CL/clGetPlatformInfo.c b/lib/CL/clGetPlatformInfo.c
index bd9a4c5461ea55677d29cbd169efc932adb84798..f8c24253aa63abf06a3a6d0b905d782886b4b567 100644
--- a/lib/CL/clGetPlatformInfo.c
+++ b/lib/CL/clGetPlatformInfo.c
@@ -108,6 +108,10 @@ static const char *pocl_version
                                      ", CUDA"
 #endif
 
+#ifdef BUILD_VORTEX
+                                     ", VORTEX"
+#endif
+
 #ifdef BUILD_HSA
                                      ", HSA"
 #endif
diff --git a/lib/CL/devices/CMakeLists.txt b/lib/CL/devices/CMakeLists.txt
index a6d40ad43ce56dfa6ba7403b429ca294e0215928..fed894c44bce55717982c8774261e74379c2b9e1 100644
--- a/lib/CL/devices/CMakeLists.txt
+++ b/lib/CL/devices/CMakeLists.txt
@@ -156,6 +156,15 @@ if(ENABLE_CUDA)
   endif()
 endif()
 
+if(ENABLE_VORTEX)
+  add_subdirectory("vortex")
+  set(POCL_DEVICES_OBJS "${POCL_DEVICES_OBJS}"
+    "$<TARGET_OBJECTS:pocl-devices-vortex>")
+
+  if(NOT ENABLE_LOADABLE_DRIVERS)
+      list(APPEND POCL_DEVICES_LINK_LIST vortex)
+  endif()
+endif()
 
 set(POCL_DEVICES_SOURCES
   devices.h  devices.c
diff --git a/lib/CL/devices/devices.c b/lib/CL/devices/devices.c
index b43ca5146dfad952aa80e105af9c72aa4606ecf2..155b580227302ff7410440a08023d9f8b49f5cf9 100644
--- a/lib/CL/devices/devices.c
+++ b/lib/CL/devices/devices.c
@@ -87,6 +87,10 @@
 #include "cuda/pocl-cuda.h"
 #endif
 
+#ifdef BUILD_VORTEX
+#include "vortex/pocl-vortex.h"
+#endif
+
 #if defined(BUILD_ALMAIF)
 #include "almaif/almaif.h"
 #endif
@@ -174,6 +178,9 @@ static init_device_ops pocl_devices_init_ops[] = {
 #ifdef BUILD_CUDA
   INIT_DEV (cuda),
 #endif
+#ifdef BUILD_VORTEX
+  INIT_DEV (vortex),
+#endif
 #ifdef BUILD_ALMAIF
   INIT_DEV (almaif),
 #endif
@@ -212,6 +219,9 @@ char pocl_device_types[POCL_NUM_DEVICE_TYPES][33] = {
 #ifdef BUILD_CUDA
   "cuda",
 #endif
+#ifdef BUILD_VORTEX
+  "vortex",
+#endif
 #ifdef BUILD_ALMAIF
   "almaif",
 #endif
diff --git a/lib/CL/devices/vortex/CMakeLists.txt b/lib/CL/devices/vortex/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc53db46e706d0f7ea91b71c9be1fa1539fa90c6
--- /dev/null
+++ b/lib/CL/devices/vortex/CMakeLists.txt
@@ -0,0 +1,40 @@
+#=============================================================================
+#   CMake build system files
+#
+#   Copyright (c) 2014-2021 pocl developers
+#
+#   Permission is hereby granted, free of charge, to any person obtaining a copy
+#   of this software and associated documentation files (the "Software"), to deal
+#   in the Software without restriction, including without limitation the rights
+#   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+#   copies of the Software, and to permit persons to whom the Software is
+#   furnished to do so, subject to the following conditions:
+#
+#   The above copyright notice and this permission notice shall be included in
+#   all copies or substantial portions of the Software.
+#
+#   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+#   THE SOFTWARE.
+#
+#=============================================================================
+
+unset(CMAKE_CXX_STANDARD)
+unset(CMAKE_CXX_STANDARD_REQUIRED)
+set_source_files_properties(vortex_utils.cc vortex_utils.h PROPERTIES COMPILE_FLAGS "${LLVM_CXXFLAGS}")
+include_directories(${LLVM_INCLUDE_DIRS} ${VORTEX_PREFIX}/runtime/include ${CMAKE_CURRENT_SOURCE_DIR}/../../../llvmopencl)
+set(CMAKE_CXX_STANDARD ${POCL_CMAKE_CXX_STANDARD})
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+add_pocl_device_library("pocl-devices-vortex" pocl-vortex.c pocl-vortex.h vortex_utils.cc vortex_utils.h)
+
+if(ENABLE_LOADABLE_DRIVERS)
+  target_link_libraries(pocl-devices-vortex PRIVATE ${VORTEX_PREFIX}/runtime/lib/libvortex.so ${PTHREAD_LIBRARY})
+endif()
+
+install(FILES "kernel_main.c" "kernel_args.h"
+        DESTINATION "${POCL_INSTALL_PRIVATE_DATADIR}/vortex" COMPONENT "lib")
\ No newline at end of file
diff --git a/lib/CL/devices/vortex/kernel_args.h b/lib/CL/devices/vortex/kernel_args.h
new file mode 100644
index 0000000000000000000000000000000000000000..00cb384a0824e5d52c3eed3a00f523e47bfdd573
--- /dev/null
+++ b/lib/CL/devices/vortex/kernel_args.h
@@ -0,0 +1,9 @@
+#include <stdint.h>
+
+typedef struct {
+  uint32_t work_dim;
+  uint32_t num_groups[3];
+  uint32_t local_size[3];
+  uint32_t global_offset[3];
+  uint32_t kernel_id;
+} kernel_args_t;
diff --git a/lib/CL/devices/vortex/kernel_main.c b/lib/CL/devices/vortex/kernel_main.c
new file mode 100644
index 0000000000000000000000000000000000000000..0fbfa420d9f04c5634d01f936e6db03c3b66ac07
--- /dev/null
+++ b/lib/CL/devices/vortex/kernel_main.c
@@ -0,0 +1,25 @@
+#include <vx_spawn.h>
+#include <vx_print.h>
+#include "kernel_args.h"
+
+int g_work_dim;
+dim3_t g_global_offset;
+
+void* vx_local_alloc(uint32_t size) {
+  return __local_mem(size);
+}
+
+void* __vx_get_kernel_callback(int kernel_id);
+
+int main(void) {
+  kernel_args_t* kargs = (kernel_args_t*)csr_read(VX_CSR_MSCRATCH);
+
+  g_work_dim = kargs->work_dim;
+  for (int i = 0, n = kargs->work_dim; i < 3; i++) {
+    g_global_offset.m[i] = (i < n) ? kargs->global_offset[i] : 0;
+  }
+
+  void* arg = (void*)((uint8_t*)kargs + sizeof(kernel_args_t));
+  vx_kernel_func_cb kernel_func = (vx_kernel_func_cb)__vx_get_kernel_callback(kargs->kernel_id);
+  return vx_spawn_threads(kargs->work_dim, kargs->num_groups, kargs->local_size, kernel_func, arg);
+}
diff --git a/lib/CL/devices/vortex/pocl-vortex.c b/lib/CL/devices/vortex/pocl-vortex.c
new file mode 100644
index 0000000000000000000000000000000000000000..356e802d9b949f918a4a47c72939163b7be886ff
--- /dev/null
+++ b/lib/CL/devices/vortex/pocl-vortex.c
@@ -0,0 +1,761 @@
+#include "pocl-vortex.h"
+#include "builtin_kernels.hh"
+#include "common.h"
+#include "config.h"
+#include "config2.h"
+#include "cpuinfo.h"
+#include "devices.h"
+#include "pocl_local_size.h"
+#include "pocl_util.h"
+#include "topology/pocl_topology.h"
+#include "utlist.h"
+
+#include <stdint.h>
+#include <assert.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <utlist.h>
+
+#include "pocl_context.h"
+#include "pocl_cache.h"
+#include "pocl_file_util.h"
+#include "pocl_mem_management.h"
+#include "pocl_timing.h"
+#include "pocl_workgroup_func.h"
+
+#include "common_driver.h"
+#include "pocl_llvm.h"
+
+#include "vortex_utils.h"
+#include "kernel_args.h"
+#include <vortex.h>
+
+typedef struct {
+  vx_device_h vx_device;
+  vx_buffer_h vx_kernel_buffer;
+
+  /* List of commands ready to be executed */
+  _cl_command_node *ready_list;
+
+  /* List of commands not yet ready to be executed */
+  _cl_command_node *command_list;
+
+  /* Lock for command list related operations */
+  pocl_lock_t cq_lock;
+
+  pocl_lock_t compile_lock;
+
+  size_t ctx_refcount;
+} vortex_device_data_t;
+
+typedef struct {
+  int num_kernels;
+  char* kernel_names;
+} vortex_program_data_t;
+
+typedef struct {
+  size_t refcount;
+  int kernel_id;
+} vortex_kernel_data_t;
+
+typedef struct {
+  vx_device_h vx_device;
+  vx_buffer_h vx_buffer;
+  uint64_t buf_address;
+} vortex_buffer_data_t;
+
+static cl_bool vortex_available = CL_TRUE;
+
+static const char *vortex_native_device_aux_funcs[] = {NULL};
+
+void pocl_vortex_init_device_ops(struct pocl_device_ops *ops) {
+
+  ops->device_name = "vortex";
+  ops->build_hash = pocl_vortex_build_hash;
+  ops->probe = pocl_vortex_probe;
+  ops->uninit = pocl_vortex_uninit;
+  ops->init = pocl_vortex_init;
+
+  ops->init_context = pocl_vortex_init_context;
+  ops->free_context = pocl_vortex_free_context;
+
+  ops->run = pocl_vortex_run;
+  ops->run_native = NULL;
+
+  ops->alloc_mem_obj = pocl_vortex_alloc_mem_obj;
+  ops->free = pocl_vortex_free;
+
+  ops->build_source = pocl_driver_build_source;
+  ops->link_program = pocl_driver_link_program;
+  ops->build_binary = pocl_driver_build_binary;
+  ops->free_program = pocl_driver_free_program;
+  ops->setup_metadata = pocl_driver_setup_metadata;
+  ops->supports_binary = pocl_driver_supports_binary;
+  ops->build_poclbinary = pocl_driver_build_poclbinary;
+  ops->build_builtin = pocl_driver_build_opencl_builtins;
+
+  ops->post_build_program = pocl_vortex_post_build_program;
+  ops->free_program = pocl_vortex_free_program;
+
+  ops->create_kernel = pocl_vortex_create_kernel;
+  ops->free_kernel = pocl_vortex_free_kernel;
+
+  ops->submit = pocl_vortex_submit;
+  ops->join = pocl_vortex_join;
+  ops->flush = pocl_vortex_flush;
+  ops->notify = pocl_vortex_notify;
+  ops->broadcast = pocl_broadcast;
+
+  ops->read = pocl_vortex_read;
+  ops->write = pocl_vortex_write;
+
+  ops->get_mapping_ptr = pocl_driver_get_mapping_ptr;
+  ops->free_mapping_ptr = pocl_driver_free_mapping_ptr;
+}
+
+char * pocl_vortex_build_hash (cl_device_id device)
+{
+  char *res = (char *)calloc(1000, sizeof(char));
+  vortex_device_data_t *dd = (vortex_device_data_t *)device->data;
+#if (VORTEX_XLEN == 64)
+  snprintf(res, 1000, "vortex-riscv64-unknown-unknown-elf");
+#else
+  snprintf(res, 1000, "vortex-riscv32-unknown-unknown-elf");
+#endif
+  return res;
+}
+
+unsigned int pocl_vortex_probe(struct pocl_device_ops *ops)
+{
+  return (0 == strcmp(ops->device_name, "vortex"));
+}
+
+cl_int
+pocl_vortex_init (unsigned j, cl_device_id dev, const char* parameters)
+{
+  int vx_err;
+  vortex_device_data_t *dd;
+
+  const char* sz_cflags = pocl_get_string_option("POCL_VORTEX_CFLAGS", "");
+
+  int is64bit = (VORTEX_XLEN == 64);
+
+  assert (dev->data == NULL);
+
+  pocl_init_default_device_infos(dev, VORTEX_DEVICE_EXTENSIONS);
+
+  SETUP_DEVICE_CL_VERSION (dev, VORTEX_DEVICE_CL_VERSION_MAJOR,
+                           VORTEX_DEVICE_CL_VERSION_MINOR);
+
+  dd = (vortex_device_data_t *)calloc(1, sizeof(vortex_device_data_t));
+  if (dd == NULL){
+    return CL_OUT_OF_HOST_MEMORY;
+  }
+
+  dev->vendor = "Vortex Group";
+  dev->long_name = "Vortex OpenGPU";
+  dev->short_name = "Vortex";
+  dev->vendor_id = 0;
+  dev->type = CL_DEVICE_TYPE_GPU;
+
+  dev->spmd = CL_TRUE;
+  dev->run_workgroup_pass = CL_FALSE;
+  dev->execution_capabilities = CL_EXEC_KERNEL;
+  //dev->global_as_id = VX_ADDR_SPACE_GLOBAL;
+  //dev->local_as_id = VX_ADDR_SPACE_LOCAL;
+  //dev->constant_as_id = VX_ADDR_SPACE_CONSTANT;
+  dev->autolocals_to_args = POCL_AUTOLOCALS_TO_ARGS_ALWAYS;
+  dev->device_alloca_locals = CL_FALSE;
+  dev->device_side_printf = 0;
+  dev->has_64bit_long = is64bit;
+
+  dev->llvm_cpu = NULL;
+  dev->address_bits = VORTEX_XLEN;
+  dev->llvm_target_triplet = is64bit ? "riscv64-unknown-unknown" : "riscv32-unknown-unknown";
+  dev->llvm_abi = is64bit ? "lp64d" : "ilp32f";
+  dev->kernellib_name = is64bit ? "kernel-riscv64" : "kernel-riscv32";
+  dev->kernellib_fallback_name = NULL;
+  dev->kernellib_subdir = "vortex";
+  dev->device_aux_functions = vortex_native_device_aux_funcs;
+
+  dev->image_support = CL_FALSE;
+
+  vx_device_h vx_device;
+
+  vx_err = vx_dev_open(&vx_device);
+  if (vx_err != 0) {
+    free(dd);
+    return CL_DEVICE_NOT_FOUND;
+  }
+
+  uint64_t num_cores;
+  vx_err = vx_dev_caps(vx_device, VX_CAPS_NUM_CORES, &num_cores);
+  if (vx_err != 0) {
+    vx_dev_close(vx_device);
+    free(dd);
+    return CL_DEVICE_NOT_FOUND;
+  }
+
+  uint64_t global_mem_size;
+  vx_err = vx_dev_caps(vx_device, VX_CAPS_GLOBAL_MEM_SIZE, &global_mem_size);
+  if (vx_err != 0) {
+    vx_dev_close(vx_device);
+    free(dd);
+    return CL_DEVICE_NOT_FOUND;
+  }
+
+  uint64_t local_mem_size;
+  vx_err = vx_dev_caps(vx_device, VX_CAPS_LOCAL_MEM_SIZE, &local_mem_size);
+  if (vx_err != 0) {
+    vx_dev_close(vx_device);
+    free(dd);
+    return CL_DEVICE_NOT_FOUND;
+  }
+
+  dev->global_mem_size = global_mem_size;
+  dev->max_mem_alloc_size = global_mem_size;
+  dev->local_mem_size = local_mem_size;
+  dev->max_compute_units = num_cores;
+
+  dd->vx_kernel_buffer = NULL;
+  dd->vx_device = vx_device;
+
+  dd->ctx_refcount = 0;
+
+  POCL_INIT_LOCK(dd->compile_lock);
+  POCL_INIT_LOCK(dd->cq_lock);
+
+  dev->data = dd;
+  dev->available = &vortex_available;
+
+  return CL_SUCCESS;
+}
+
+cl_int pocl_vortex_uninit (unsigned j, cl_device_id device) {
+  vortex_device_data_t *dd = (vortex_device_data_t *)device->data;
+  if (NULL == dd)
+    return CL_SUCCESS;
+
+  if (dd->vx_kernel_buffer != NULL) {
+    vx_mem_free(dd->vx_kernel_buffer);
+  }
+  vx_dev_close(dd->vx_device);
+
+  POCL_DESTROY_LOCK (dd->compile_lock);
+  POCL_DESTROY_LOCK (dd->cq_lock);
+  POCL_MEM_FREE(dd);
+  device->data = NULL;
+  return CL_SUCCESS;
+}
+
+int pocl_vortex_init_context (cl_device_id device, cl_context context) {
+  vortex_device_data_t *dd = (vortex_device_data_t *)device->data;
+  if (NULL == dd)
+    return CL_SUCCESS;
+
+  dd->ctx_refcount++;
+
+  return CL_SUCCESS;
+}
+
+int pocl_vortex_free_context (cl_device_id device, cl_context context) {
+  vortex_device_data_t *dd = (vortex_device_data_t *)device->data;
+  if (NULL == dd)
+    return CL_SUCCESS;
+
+  if (--dd->ctx_refcount == 0) {
+    pocl_vortex_uninit(0, device);
+  }
+
+  return CL_SUCCESS;
+}
+
+int pocl_vortex_post_build_program (cl_program program, cl_uint device_i) {
+  int result;
+  cl_device_id device = program->devices[device_i];
+  vortex_device_data_t *ddata = (vortex_device_data_t *)device->data;
+  vortex_program_data_t *pdata = NULL;
+
+  POCL_LOCK (ddata->compile_lock);
+
+  do {
+    result = pocl_llvm_run_passes_on_program (program, device_i);
+    if (result != 0)
+      break;
+
+    pdata = (vortex_program_data_t *)calloc (1, sizeof (vortex_program_data_t));
+    pdata->kernel_names = NULL;
+
+    char sz_program_bc[POCL_MAX_PATHNAME_LENGTH];
+    char sz_program_vxbin[POCL_MAX_PATHNAME_LENGTH];
+
+    pocl_cache_program_bc_path(sz_program_bc, program, device_i);
+    remove_extension(sz_program_bc);
+
+    strcpy(sz_program_vxbin, sz_program_bc);
+    strncat(sz_program_vxbin, ".vxbin", POCL_MAX_PATHNAME_LENGTH - 1);
+
+    result = compile_vortex_program(&pdata->kernel_names, &pdata->num_kernels,
+        sz_program_vxbin, program->llvm_irs[device_i]);
+    if (result != 0)
+      break;
+
+  } while (0);
+
+  program->data[device_i] = pdata;
+
+  POCL_UNLOCK (ddata->compile_lock);
+
+  return result;
+}
+
+int pocl_vortex_free_program (cl_device_id device, cl_program program,
+                              unsigned device_i) {
+  vortex_device_data_t *ddata = (vortex_device_data_t *)device->data;
+  vortex_program_data_t *pdata = (vortex_program_data_t *)program->data[device_i];
+  if (pdata == NULL)
+    return CL_SUCCESS;
+
+  pocl_driver_free_program (device, program, device_i);
+
+  POCL_MEM_FREE (pdata->kernel_names);
+  POCL_MEM_FREE (pdata);
+  program->data[device_i] = NULL;
+
+  return CL_SUCCESS;
+}
+
+int pocl_vortex_create_kernel (cl_device_id device, cl_program program,
+                               cl_kernel kernel, unsigned device_i) {
+  int result = CL_SUCCESS;
+  pocl_kernel_metadata_t *meta = kernel->meta;
+  assert(meta->data != NULL);
+  vortex_kernel_data_t *kdata  = (vortex_kernel_data_t *)meta->data[device_i];
+  if (kdata != NULL) {
+    ++kdata->refcount;
+    return CL_SUCCESS;
+  }
+
+  do {
+    vortex_program_data_t *pdata = (vortex_program_data_t *)program->data[device_i];
+    assert(pdata != NULL);
+
+    const char* current = pdata->kernel_names;
+    int i = 0;
+    int found = 0;
+    for (int i = 0; i < pdata->num_kernels; ++i) {
+      if (strcmp(current, kernel->name) == 0) {
+        found = 1;
+        break;
+      }
+      current += strlen(current) + 1;
+    }
+    assert(found);
+    kdata = (void *)calloc (1, sizeof (vortex_kernel_data_t));
+    kdata->kernel_id = i;
+    ++kdata->refcount;
+
+  } while (0);
+
+  meta->data[device_i] = kdata;
+
+  return result;
+}
+
+int pocl_vortex_free_kernel (cl_device_id device, cl_program program,
+                             cl_kernel kernel, unsigned device_i) {
+  pocl_kernel_metadata_t *meta = kernel->meta;
+  assert(meta->data != NULL);
+  vortex_kernel_data_t *kdata = (vortex_kernel_data_t *)meta->data[device_i];
+  if (kdata == NULL)
+    return CL_SUCCESS;
+
+  --kdata->refcount;
+  if (kdata->refcount == 0) {
+    POCL_MEM_FREE (kdata);
+    meta->data[device_i] = NULL;
+  }
+
+  return CL_SUCCESS;
+}
+
+void pocl_vortex_run (void *data, _cl_command_node *cmd) {
+  vortex_device_data_t *dd;
+  struct pocl_argument *al;
+  cl_uint device_i = cmd->program_device_i;
+  cl_kernel kernel = cmd->command.run.kernel;
+  cl_program program = kernel->program;
+  pocl_kernel_metadata_t *meta = kernel->meta;
+  vortex_program_data_t *pdata = (vortex_program_data_t *)program->data[device_i];
+  vortex_kernel_data_t *kdata = (vortex_kernel_data_t *)meta->data[device_i];
+  struct pocl_context *pc = &cmd->command.run.pc;
+  int vx_err;
+
+  int num_groups = 1;
+  int group_size = 1;
+  for (int i = 0; i < pc->work_dim; ++i) {
+    num_groups *= pc->num_groups[i];
+    group_size *= pc->local_size[i];
+  }
+  if (num_groups == 0 || group_size == 0)
+    return;
+
+  assert (data != NULL);
+  dd = (vortex_device_data_t *)data;
+
+  int ptr_size = VORTEX_XLEN / 8;
+
+  // calculate kernel arguments buffer size
+  int local_mem_size = 0;
+  size_t abuf_size = 0;
+
+  for (int i = 0; i < meta->num_args; ++i) {
+    struct pocl_argument* al = &(cmd->command.run.arguments[i]);
+    if (ARG_IS_LOCAL(meta->arg_info[i])) {
+      local_mem_size += al->size;
+      abuf_size += 4;
+    } else
+    if ((meta->arg_info[i].type == POCL_ARG_TYPE_POINTER)
+     || (meta->arg_info[i].type == POCL_ARG_TYPE_IMAGE)
+     || (meta->arg_info[i].type == POCL_ARG_TYPE_SAMPLER)) {
+      abuf_size += ptr_size;
+    } else {
+      // scalar argument
+      abuf_size += al->size;
+    }
+  }
+
+  // local buffers
+  for (int i = 0; i < meta->num_locals; ++i) {
+    local_mem_size += meta->local_sizes[i];
+    abuf_size += 4;
+  }
+
+  // add local size
+  if (local_mem_size != 0) {
+    abuf_size += 4;
+  }
+
+  // check occupancy
+  if (local_mem_size != 0) {
+    int available_localmem;
+    vx_err = vx_check_occupancy(dd->vx_device, group_size, &available_localmem);
+    if (vx_err != 0) {
+      POCL_ABORT("POCL_VORTEX_RUN\n");
+    }
+    if (local_mem_size > available_localmem) {
+      POCL_ABORT("out of local memory: needed=%d bytes, available=%d bytes\n",
+        local_mem_size, available_localmem);
+    }
+  }
+
+  // allocate arguments host buffer
+  size_t kargs_buffer_size = sizeof(kernel_args_t) + abuf_size;
+  uint8_t* const host_kargs_base_ptr = malloc(kargs_buffer_size);
+  assert(host_kargs_base_ptr);
+
+  // allocate kernel arguments buffer
+  vx_buffer_h vx_kargs_buffer;
+  vx_err = vx_mem_alloc(dd->vx_device, kargs_buffer_size, VX_MEM_READ, &vx_kargs_buffer);
+  if (vx_err != 0) {
+    POCL_ABORT("POCL_VORTEX_RUN\n");
+  }
+
+  uint64_t dev_kargs_base_addr;
+  vx_err = vx_mem_address(vx_kargs_buffer, &dev_kargs_base_addr);
+  if (vx_err != 0) {
+    POCL_ABORT("POCL_VORTEX_RUN\n");
+  }
+
+  // write context data
+  {
+    kernel_args_t* const kargs = (kernel_args_t*)host_kargs_base_ptr;
+    kargs->work_dim = pc->work_dim;
+    for (int i = 0; i < 3; ++i) {
+      kargs->num_groups[i] = pc->num_groups[i];
+      kargs->local_size[i] = pc->local_size[i];
+      kargs->global_offset[i] = pc->global_offset[i];
+    }
+    kargs->kernel_id = kdata->kernel_id;
+  }
+
+  // write arguments
+
+  uint8_t* host_args_ptr = host_kargs_base_ptr + sizeof(kernel_args_t);
+  int local_mem_offset = 0;
+
+  for (int i = 0; i < meta->num_args; ++i) {
+    struct pocl_argument* al = &(cmd->command.run.arguments[i]);
+    if (ARG_IS_LOCAL(meta->arg_info[i])) {
+      if (local_mem_offset == 0) {
+        memcpy(host_args_ptr, &local_mem_size, 4); // local_size
+        host_args_ptr += 4;
+      }
+      memcpy(host_args_ptr, &local_mem_offset, 4); // arg offset
+      host_args_ptr += 4;
+      local_mem_offset += al->size;
+    } else
+    if (meta->arg_info[i].type == POCL_ARG_TYPE_POINTER) {
+      if (al->value == NULL) {
+        memset(host_args_ptr, 0, ptr_size); // NULL pointer value
+        host_args_ptr += ptr_size;
+      } else {
+        cl_mem m = (*(cl_mem *)(al->value));
+        vortex_buffer_data_t* buf_data = (vortex_buffer_data_t *) m->device_ptrs[cmd->device->global_mem_id].mem_ptr;
+        uint64_t dev_mem_addr = buf_data->buf_address + al->offset;
+        memcpy(host_args_ptr, &buf_data->buf_address, ptr_size); // pointer value
+        host_args_ptr += ptr_size;
+      }
+    } else
+    if (meta->arg_info[i].type == POCL_ARG_TYPE_IMAGE) {
+        POCL_ABORT("POCL_VORTEX_RUN\n");
+    } else
+    if (meta->arg_info[i].type == POCL_ARG_TYPE_SAMPLER) {
+        POCL_ABORT("POCL_VORTEX_RUN\n");
+    } else {
+      // scalar argument
+      memcpy(host_args_ptr, al->value, al->size); // scalar value
+      host_args_ptr += al->size;
+    }
+  }
+
+  // write local arguments
+  for (int i = 0; i < meta->num_locals; ++i) {
+    if (local_mem_offset == 0) {
+      memcpy(host_args_ptr, &local_mem_size, 4); // local_size
+      host_args_ptr += 4;
+    }
+    memcpy(host_args_ptr, &local_mem_offset, 4); // arg offset
+    host_args_ptr += 4;
+    local_mem_offset += meta->local_sizes[i];
+  }
+
+  // upload kernel arguments buffer
+  vx_err = vx_copy_to_dev(vx_kargs_buffer, host_kargs_base_ptr, 0, kargs_buffer_size);
+  if (vx_err != 0) {
+    POCL_ABORT("POCL_VORTEX_RUN\n");
+  }
+
+  // release argument host buffer
+  free(host_kargs_base_ptr);
+
+  // upload kernel to device
+  if (NULL == dd->vx_kernel_buffer) {
+    char sz_program_bc[POCL_MAX_PATHNAME_LENGTH];
+    char sz_program_vxbin[POCL_MAX_PATHNAME_LENGTH];
+
+    pocl_cache_program_bc_path(sz_program_bc, program, device_i);
+    remove_extension(sz_program_bc);
+
+    strcpy(sz_program_vxbin, sz_program_bc);
+    strncat(sz_program_vxbin, ".vxbin", POCL_MAX_PATHNAME_LENGTH - 1);
+
+    vx_err = vx_upload_kernel_file(dd->vx_device, sz_program_vxbin, &dd->vx_kernel_buffer);
+    if (vx_err != 0) {
+      POCL_ABORT("POCL_VORTEX_RUN\n");
+    }
+  }
+
+  // launch kernel execution
+  vx_err = vx_start(dd->vx_device, dd->vx_kernel_buffer, vx_kargs_buffer);
+  if (vx_err != 0) {
+    POCL_ABORT("POCL_VORTEX_RUN\n");
+  }
+
+  // wait for the execution to complete
+  vx_err = vx_ready_wait(dd->vx_device, -1);
+  if (vx_err != 0) {
+    POCL_ABORT("POCL_VORTEX_RUN\n");
+  }
+
+  // release arguments device buffer
+  vx_mem_free(vx_kargs_buffer);
+}
+
+cl_int pocl_vortex_alloc_mem_obj(cl_device_id device, cl_mem mem_obj, void *host_ptr) {
+  int vx_err;
+  pocl_mem_identifier *p = &mem_obj->device_ptrs[device->global_mem_id];
+
+  /* let other drivers preallocate */
+  if ((mem_obj->flags & CL_MEM_ALLOC_HOST_PTR) && (mem_obj->mem_host_ptr == NULL))
+    return CL_MEM_OBJECT_ALLOCATION_FAILURE;
+
+  p->extra_ptr = NULL;
+  p->version = 0;
+  p->extra = 0;
+  cl_mem_flags flags = mem_obj->flags;
+
+  if (flags & CL_MEM_USE_HOST_PTR) {
+    POCL_ABORT("POCL_VORTEX_MALLOC\n");
+  } else {
+    int vx_flags = 0;
+    if ((flags & CL_MEM_READ_WRITE) != 0)
+      vx_flags = VX_MEM_READ_WRITE;
+    if ((flags & CL_MEM_READ_ONLY) != 0)
+      vx_flags = VX_MEM_READ;
+    if ((flags & CL_MEM_WRITE_ONLY) != 0)
+      vx_flags = VX_MEM_WRITE;
+
+    vortex_device_data_t* dd = (vortex_device_data_t *)device->data;
+
+    vx_buffer_h vx_buffer;
+    vx_err = vx_mem_alloc(dd->vx_device, mem_obj->size, vx_flags, &vx_buffer);
+    if (vx_err != 0) {
+      return CL_MEM_OBJECT_ALLOCATION_FAILURE;
+    }
+
+    uint64_t buf_address;
+    vx_err = vx_mem_address(vx_buffer, &buf_address);
+    if (vx_err != 0) {
+      POCL_ABORT("POCL_VORTEX_RUN\n");
+    }
+
+    if (host_ptr && (flags & CL_MEM_COPY_HOST_PTR)) {
+      vx_err = vx_copy_to_dev(vx_buffer, host_ptr, 0, mem_obj->size);
+      if (vx_err != 0) {
+        return CL_MEM_OBJECT_ALLOCATION_FAILURE;
+      }
+    }
+
+    if (flags & CL_MEM_ALLOC_HOST_PTR) {
+      /* malloc mem_host_ptr then increase refcount */
+      pocl_alloc_or_retain_mem_host_ptr (mem_obj);
+    }
+
+    vortex_buffer_data_t* buf_data = (vortex_buffer_data_t *)malloc(sizeof(vortex_buffer_data_t));
+    buf_data->vx_device = dd->vx_device;
+    buf_data->vx_buffer = vx_buffer;
+    buf_data->buf_address = buf_address;
+
+    p->mem_ptr = buf_data;
+  }
+
+  return CL_SUCCESS;
+}
+
+void pocl_vortex_free(cl_device_id device, cl_mem mem_obj) {
+  pocl_mem_identifier *p = &mem_obj->device_ptrs[device->global_mem_id];
+  cl_mem_flags flags = mem_obj->flags;
+  vortex_buffer_data_t* buf_data = (vortex_buffer_data_t*)p->mem_ptr;
+
+  if (flags & CL_MEM_USE_HOST_PTR) {
+    POCL_ABORT("POCL_VORTEX_FREE\n");
+  } else {
+    if (flags & CL_MEM_ALLOC_HOST_PTR) {
+      pocl_release_mem_host_ptr(mem_obj);
+    }
+    if (buf_data->vx_buffer) {
+      vx_mem_free(buf_data->vx_buffer);
+    }
+  }
+  free(buf_data);
+  p->mem_ptr = NULL;
+  p->version = 0;
+}
+
+void pocl_vortex_write(void *data,
+                       const void *__restrict__ host_ptr,
+                       pocl_mem_identifier *dst_mem_id,
+                       cl_mem dst_buf,
+                       size_t offset,
+                       size_t size) {
+  int vx_err;
+  vortex_buffer_data_t *buf_data = (vortex_buffer_data_t *)dst_mem_id->mem_ptr;
+  vx_err = vx_copy_to_dev(buf_data->vx_buffer, host_ptr, offset, size);
+  if (vx_err != 0) {
+    POCL_ABORT("POCL_VORTEX_WRITE\n");
+  }
+}
+
+void pocl_vortex_read(void *data,
+                      void *__restrict__ host_ptr,
+                      pocl_mem_identifier *src_mem_id,
+                      cl_mem src_buf,
+                      size_t offset,
+                      size_t size) {
+  int vx_err;
+  vortex_buffer_data_t* buf_data = (vortex_buffer_data_t*)src_mem_id->mem_ptr;
+  vx_err = vx_copy_from_dev(host_ptr, buf_data->vx_buffer, offset, size);
+  if (vx_err != 0) {
+    POCL_ABORT("POCL_VORTEX_READ\n");
+  }
+}
+
+static void vortex_command_scheduler (vortex_device_data_t *dd) {
+  _cl_command_node *node;
+
+  /* execute commands from ready list */
+  while ((node = dd->ready_list))
+    {
+      assert (pocl_command_is_ready (node->sync.event.event));
+      assert (node->sync.event.event->status == CL_SUBMITTED);
+      CDL_DELETE (dd->ready_list, node);
+      POCL_UNLOCK (dd->cq_lock);
+      pocl_exec_command (node);
+      POCL_LOCK (dd->cq_lock);
+    }
+
+  return;
+}
+
+void pocl_vortex_submit (_cl_command_node *node, cl_command_queue cq) {
+  vortex_device_data_t *dd = (vortex_device_data_t *)node->device->data;
+
+  node->ready = 1;
+  POCL_LOCK (dd->cq_lock);
+  pocl_command_push(node, &dd->ready_list, &dd->command_list);
+
+  POCL_UNLOCK_OBJ (node->sync.event.event);
+  vortex_command_scheduler (dd);
+  POCL_UNLOCK (dd->cq_lock);
+
+  return;
+}
+
+void pocl_vortex_flush (cl_device_id device, cl_command_queue cq) {
+  vortex_device_data_t *dd = (vortex_device_data_t *)device->data;
+
+  POCL_LOCK (dd->cq_lock);
+  vortex_command_scheduler (dd);
+  POCL_UNLOCK (dd->cq_lock);
+}
+
+void pocl_vortex_join (cl_device_id device, cl_command_queue cq) {
+  vortex_device_data_t *dd = (vortex_device_data_t *)device->data;
+
+  POCL_LOCK (dd->cq_lock);
+  vortex_command_scheduler (dd);
+  POCL_UNLOCK (dd->cq_lock);
+
+  return;
+}
+
+void pocl_vortex_notify (cl_device_id device, cl_event event, cl_event finished) {
+  vortex_device_data_t *dd = (vortex_device_data_t *)device->data;
+  _cl_command_node * volatile node = event->command;
+
+  if (finished->status < CL_COMPLETE)
+    {
+      pocl_update_event_failed (event);
+      return;
+    }
+
+  if (!node->ready)
+    return;
+
+  if (pocl_command_is_ready (event))
+    {
+      if (event->status == CL_QUEUED)
+        {
+          pocl_update_event_submitted (event);
+          POCL_LOCK (dd->cq_lock);
+          CDL_DELETE (dd->command_list, node);
+          CDL_PREPEND (dd->ready_list, node);
+          vortex_command_scheduler (dd);
+          POCL_UNLOCK (dd->cq_lock);
+        }
+      return;
+    }
+}
\ No newline at end of file
diff --git a/lib/CL/devices/vortex/pocl-vortex.h b/lib/CL/devices/vortex/pocl-vortex.h
new file mode 100644
index 0000000000000000000000000000000000000000..16c5f2e58ce3473f1b8133b50346f79863d3a78e
--- /dev/null
+++ b/lib/CL/devices/vortex/pocl-vortex.h
@@ -0,0 +1,9 @@
+#ifndef POCL_VORTEX_H
+#define POCL_VORTEX_H
+
+#include "pocl_cl.h"
+
+#include "prototypes.inc"
+GEN_PROTOTYPES (vortex)
+
+#endif /* POCL_VORTEX_H */
diff --git a/lib/CL/devices/vortex/vortex_utils.cc b/lib/CL/devices/vortex/vortex_utils.cc
new file mode 100644
index 0000000000000000000000000000000000000000..08c77da4109c71f664d81654178fbaae5adfe295
--- /dev/null
+++ b/lib/CL/devices/vortex/vortex_utils.cc
@@ -0,0 +1,339 @@
+#include <assert.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <cstdarg>
+#include <vector>
+#include <cstdio>
+#include <ostream>
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <regex>
+
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Linker/Linker.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/Alignment.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+#include <llvm/Analysis/TargetLibraryInfo.h>
+#include <llvm/Analysis/TargetTransformInfo.h>
+
+#if LLVM_MAJOR >= 17
+#include <llvm/Transforms/IPO/Internalize.h>
+#endif
+
+#include "vortex_utils.h"
+
+#include <llvm/IR/Module.h>
+#include <llvm/Support/raw_ostream.h>
+#include <llvm/Support/FileSystem.h>
+#include <llvm/Bitcode/BitcodeWriter.h>
+
+#include "pocl.h"
+#include "pocl_file_util.h"
+#include "pocl_util.h"
+
+#include "LLVMUtils.h"
+
+static int exec(const char* cmd, std::ostream& out) {
+  char buffer[128];
+  auto pipe = popen(cmd, "r");
+  if (!pipe) {
+      //throw std::runtime_error("popen() failed!");
+      return -1;
+  }
+  while (!feof(pipe)) {
+      if (fgets(buffer, 128, pipe) != nullptr)
+          out << buffer;
+  }
+  return pclose(pipe);
+}
+
+void remove_extension(char* filename) {
+  char *last_dot = strrchr(filename, '.');
+  if (last_dot != NULL) {
+    *last_dot = '\0';
+  }
+}
+
+static char* convertToCharArray(const llvm::SmallVector<std::string, 8>& names) {
+  // Calculate the total length required for the buffer
+  size_t totalLength = 0;
+  for (const auto& name : names) {
+    totalLength += name.size() + 1; // +1 for the null terminator
+  }
+
+  // Allocate buffer
+  char* buffer = (char*)malloc(totalLength * sizeof(char));
+  if (buffer == nullptr) {
+    std::cerr << "Memory allocation failed" << std::endl;
+    return nullptr;
+  }
+
+  // Copy names into buffer with null separation
+  size_t position = 0;
+  for (const auto& name : names) {
+    std::strcpy(buffer + position, name.c_str());
+    position += name.size();
+    buffer[position] = '\0'; // Null terminator
+    position += 1;
+  }
+
+  return buffer;
+}
+
+// Store function arguments in a single argument buffer.
+static bool createArgumentsBuffer(llvm::Function *function, llvm::Module *module, llvm::SmallVector<std::string, 8>& funcNames) {
+  auto &Context = module->getContext();
+  const llvm::DataLayout &DL = module->getDataLayout();
+
+  auto I32Ty = llvm::Type::getInt32Ty(Context);
+  auto I8Ty = llvm::Type::getInt8Ty(Context);
+
+  // Create new function signature
+  auto ArgBufferType = llvm::PointerType::get(llvm::Type::getInt8Ty(Context), 0);
+  auto NewFuncType = llvm::FunctionType::get(function->getReturnType(), {ArgBufferType}, false);
+  auto NewFunc = llvm::Function::Create(NewFuncType, function->getLinkage(), function->getName() + "_vortex");
+  module->getFunctionList().insert(function->getIterator(), NewFunc);
+  NewFunc->takeName(function);
+
+  auto EntryBlock = llvm::BasicBlock::Create(Context, "entry", NewFunc);
+  llvm::IRBuilder<> Builder(EntryBlock);
+
+  // Access function arguments
+  auto ai = NewFunc->arg_begin();
+  auto ArgBuffer = &*ai++;
+  ArgBuffer->setName("ArgBuffer");
+  auto I8PtrTy = I8Ty->getPointerTo();
+
+  unsigned arg_idx = 0;
+  unsigned arg_offset = 0;
+
+  llvm::Value* allocated_local_mem = nullptr;
+
+  for (auto& OldArg : function->args()) {
+    auto ArgType = OldArg.getType();
+    auto ArgOffset = llvm::ConstantInt::get(I32Ty, arg_offset);
+    llvm::Value* Arg;
+    if (pocl::isLocalMemFunctionArg(function, arg_idx)) {
+      if (allocated_local_mem == nullptr) {
+        // Load __local_size
+        auto local_size_ptr = Builder.CreateGEP(I8Ty, ArgBuffer, ArgOffset, "__local_size_ptr");
+        arg_offset += 4;
+        auto local_size = Builder.CreateLoad(I32Ty, local_size_ptr, "__local_size");
+        // Call vx_local_alloc(__local_size)
+        auto function_type = llvm::FunctionType::get(I8PtrTy, {I32Ty}, false);
+        auto vx_local_alloc_func = module->getOrInsertFunction("vx_local_alloc", function_type);
+        allocated_local_mem = Builder.CreateCall(vx_local_alloc_func, {local_size}, "__local_mem");
+      }
+      // Load argument __offset
+      auto offset_ptr = Builder.CreateGEP(I8Ty, ArgBuffer, ArgOffset, OldArg.getName() + "_offset_ptr");
+      auto offset = Builder.CreateLoad(I32Ty, offset_ptr, OldArg.getName() + "_offset");
+      arg_offset += 4;
+      // Apply pointer offset
+      Arg = Builder.CreateGEP(I8PtrTy, allocated_local_mem, offset, OldArg.getName() + "_byte_ptr");
+    } else {
+      auto offset_ptr = Builder.CreateGEP(I8Ty, ArgBuffer, ArgOffset, OldArg.getName() + "_offset_ptr");
+      Arg = Builder.CreateLoad(ArgType, offset_ptr, OldArg.getName() + "_loaded");
+      arg_offset += DL.getTypeAllocSize(ArgType);
+    }
+    OldArg.replaceAllUsesWith(Arg);
+    arg_idx += 1;
+  }
+
+  // Move the body of the old function to the new function
+  NewFunc->splice(NewFunc->end(), function);
+
+  // Connect the entry block to the first block of the old function
+  for (auto& BB : *NewFunc) {
+    if (&BB != EntryBlock) {
+      Builder.CreateBr(&BB);
+      break;
+    }
+  }
+
+  funcNames.push_back(NewFunc->getName().str());
+
+  return true;
+}
+
+static void processKernels(llvm::SmallVector<std::string, 8>& funcNames, llvm::Module *module) {
+  llvm::SmallVector<llvm::Function *, 8> functionsToErase;
+  for (auto& function : module->functions()) {
+    if (!pocl::isKernelToProcess(function))
+      continue;
+    if (createArgumentsBuffer(&function, module, funcNames))
+      functionsToErase.push_back(&function);
+  }
+  for (auto function : functionsToErase) {
+    function->eraseFromParent();
+  }
+}
+
+static void addKernelSelect(llvm::SmallVector<std::string, 8>& funcNames, llvm::Module *module) {
+  auto& Context = module->getContext();
+
+  auto I32Ty = llvm::Type::getInt32Ty(Context);
+  auto VoidTy = llvm::Type::getVoidTy(Context);
+  auto VoidPtrTy = llvm::PointerType::getUnqual(VoidTy);
+  auto GetKernelCallbackTy = llvm::FunctionType::get(VoidPtrTy, {I32Ty}, false);
+
+  auto GetKernelCallbackFunc = llvm::Function::Create(
+    GetKernelCallbackTy, llvm::Function::ExternalLinkage, "__vx_get_kernel_callback", module);
+
+  llvm::IRBuilder<> Builder(Context);
+
+  auto EntryBB = llvm::BasicBlock::Create(Context, "entry", GetKernelCallbackFunc);
+  Builder.SetInsertPoint(EntryBB);
+
+  // Get the function argument (kernel_index)
+  auto Args = GetKernelCallbackFunc->arg_begin();
+  auto KernelIndex = Args++;
+  KernelIndex->setName("kernel_index");
+
+  // Prepare the switch instruction
+  auto Switch = Builder.CreateSwitch(KernelIndex, EntryBB);
+
+  // Iterate through the functions in the module and create cases for the switch
+  int FunctionIndex = 0;
+  for (llvm::Function& F : module->functions()) {
+    if (std::find(funcNames.begin(), funcNames.end(), F.getName().str()) == funcNames.end())
+      continue;
+    // Create a basic block for this function index
+    auto CaseBB = llvm::BasicBlock::Create(Context, "case_" + std::to_string(FunctionIndex), GetKernelCallbackFunc);
+    Builder.SetInsertPoint(CaseBB);
+    // Return the function pointer
+    Builder.CreateRet(Builder.CreateBitCast(&F, GetKernelCallbackTy->getReturnType()));
+    // Add the case to the switch statement
+    Switch->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(Context), FunctionIndex), CaseBB);
+    ++FunctionIndex;
+  }
+  // Create a default case block for out-of-bounds indices
+  auto DefaultBB = llvm::BasicBlock::Create(Context, "default", GetKernelCallbackFunc);
+  Builder.SetInsertPoint(DefaultBB);
+  Builder.CreateRet(llvm::ConstantPointerNull::get(llvm::cast<llvm::PointerType>(GetKernelCallbackTy->getReturnType())));
+  Switch->setDefaultDest(DefaultBB);
+}
+
+int compile_vortex_program(char**kernel_names, int* num_kernels, char* sz_program_vxbin, void* llvm_module) {
+  int err;
+
+  const char* llvm_install_path = getenv("LLVM_PREFIX");
+  if (llvm_install_path) {
+    if (!pocl_exists(llvm_install_path)) {
+      POCL_MSG_ERR("$LLVM_PREFIX: '%s' doesn't exist\n", llvm_install_path);
+      return -1;
+    }
+    POCL_MSG_PRINT_INFO("using $LLVM_PREFIX=%s!\n", llvm_install_path);
+  }
+
+  std::string build_cflags = pocl_get_string_option("POCL_VORTEX_CFLAGS", "");
+  if (build_cflags == "") {
+    POCL_MSG_ERR("'POCL_VORTEX_CFLAGS' need to be set\n");
+    return -1;
+  }
+
+  std::string build_ldflags = pocl_get_string_option ("POCL_VORTEX_LDFLAGS", "");
+  if(build_ldflags == ""){
+    POCL_MSG_ERR("'POCL_VORTEX_LDFLAGS' need to be set\n");
+    return -1;
+  }
+
+  char sz_program_bc[POCL_MAX_PATHNAME_LENGTH + 1];
+  err = pocl_mk_tempname(sz_program_bc, "/tmp/pocl_vortex_program", ".bc", nullptr);
+  if (err != 0)
+    return err;
+
+  char sz_program_elf[POCL_MAX_PATHNAME_LENGTH + 1];
+  err = pocl_mk_tempname(sz_program_elf, "/tmp/pocl_vortex_program", ".elf", nullptr);
+  if (err != 0)
+    return err;
+
+  auto module = (llvm::Module *)llvm_module;
+  llvm::SmallVector<std::string, 8> kernelNames;
+  processKernels(kernelNames, module);
+  addKernelSelect(kernelNames, module);
+
+  *num_kernels = kernelNames.size();
+  *kernel_names = convertToCharArray(kernelNames);
+
+  {
+    std::error_code EC;
+    llvm::raw_fd_ostream file(sz_program_bc, EC, llvm::sys::fs::OF_None);
+    llvm::WriteBitcodeToFile(*module, file);
+    file.close();
+  }
+
+  if (POCL_DEBUGGING_ON) {
+    std::error_code EC;
+    llvm::raw_fd_ostream file("program.ll", EC, llvm::sys::fs::OF_None);
+    module->print(file, nullptr);
+    file.close();
+  }
+
+  {
+    std::string clang_path(CLANG);
+    if (llvm_install_path) {
+      clang_path.replace(0, strlen(LLVM_PREFIX), llvm_install_path);
+    }
+
+    char sz_kernel_main[POCL_MAX_PATHNAME_LENGTH];
+    pocl_get_srcdir_or_datadir (sz_kernel_main, "/lib/CL/devices", "", "/vortex/kernel_main.c");
+
+    std::stringstream ss_cmd, ss_out;
+    ss_cmd << clang_path.c_str() << " " << build_cflags << " " << sz_program_bc << " " << sz_kernel_main << " " << build_ldflags << " -o " << sz_program_elf;
+    POCL_MSG_PRINT_LLVM("running \"%s\"\n", ss_cmd.str().c_str());
+    int err = exec(ss_cmd.str().c_str(), ss_out);
+    if (err != 0) {
+      POCL_MSG_ERR("%s\n", ss_out.str().c_str());
+      return err;
+    }
+  }
+
+  if (POCL_DEBUGGING_ON) {
+    std::string objdump_path(LLVM_OBJDUMP);
+    if (llvm_install_path) {
+      objdump_path.replace(0, strlen(LLVM_PREFIX), llvm_install_path);
+    }
+
+    std::stringstream ss_cmd, ss_out;
+    ss_cmd << objdump_path.c_str() << " -D " << sz_program_elf << " > program.dump";
+
+    POCL_MSG_PRINT_LLVM("running \"%s\"\n", ss_cmd.str().c_str());
+    int err = exec(ss_cmd.str().c_str(), ss_out);
+    if (err != 0) {
+      POCL_MSG_ERR("%s\n", ss_out.str().c_str());
+      return err;
+    }
+  }
+
+  {
+    std::string vxbintool_path = pocl_get_string_option ("POCL_VORTEX_BINTOOL", "");
+    if (vxbintool_path == ""){
+      POCL_MSG_ERR("'POCL_VORTEX_BINTOOL' need to be set\n");
+      return -1;
+    }
+    std::stringstream ss_cmd, ss_out;
+    ss_cmd << vxbintool_path << " " << sz_program_elf << " " << sz_program_vxbin;
+    POCL_MSG_PRINT_LLVM("running \"%s\"\n", ss_cmd.str().c_str());
+    int err = exec(ss_cmd.str().c_str(), ss_out);
+    if (err != 0) {
+      POCL_MSG_ERR("%s\n", ss_out.str().c_str());
+      return err;
+    }
+  }
+
+  return 0;
+}
\ No newline at end of file
diff --git a/lib/CL/devices/vortex/vortex_utils.h b/lib/CL/devices/vortex/vortex_utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..ab3c6f2424abb9cd9eccf3adb3a245d43c49a602
--- /dev/null
+++ b/lib/CL/devices/vortex/vortex_utils.h
@@ -0,0 +1,18 @@
+#ifndef VORTEX_UTILS_H
+#define VORTEX_UTILS_H
+
+#include "config.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void remove_extension(char* filename);
+
+int compile_vortex_program(char** kernel_names, int* num_kernels, char* sz_program_vxbin, void* llvm_module);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
\ No newline at end of file
diff --git a/lib/kernel/CMakeLists.txt b/lib/kernel/CMakeLists.txt
index 81244fb2b4e7a490b652a2c7983b3c62e254f4fd..7932a7066ad96b93856bcfe7a999518860f3743f 100644
--- a/lib/kernel/CMakeLists.txt
+++ b/lib/kernel/CMakeLists.txt
@@ -224,6 +224,12 @@ endif()
 
 #*********************************************************************
 
+if(OCL_TARGETS MATCHES "vortex")
+  add_subdirectory("vortex")
+endif()
+
+#*********************************************************************
+
 # "Escape" a list before passing to an external command
 string(REPLACE ";" "****" KERNEL_BC_LIST_ESCAPED "${KERNEL_BC_LIST}")
 
diff --git a/lib/kernel/vortex/CMakeLists.txt b/lib/kernel/vortex/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c6dbf55420da29dcc17a6a0f9348fcfdad9edc31
--- /dev/null
+++ b/lib/kernel/vortex/CMakeLists.txt
@@ -0,0 +1,79 @@
+#=============================================================================
+#   CMake build system files
+#
+#   Copyright (c) 2014-2018 pocl developers
+#
+#   Permission is hereby granted, free of charge, to any person obtaining a copy
+#   of this software and associated documentation files (the "Software"), to deal
+#   in the Software without restriction, including without limitation the rights
+#   to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+#   copies of the Software, and to permit persons to whom the Software is
+#   furnished to do so, subject to the following conditions:
+#
+#   The above copyright notice and this permission notice shall be included in
+#   all copies or substantial portions of the Software.
+#
+#   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+#   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+#   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+#   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+#   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+#   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+#   THE SOFTWARE.
+#
+#=============================================================================
+
+include("bitcode_rules")
+
+if( ${VORTEX_XLEN} EQUAL 64 )
+  set(LLVM_TARGET riscv64)
+  set(TARGET_MARCH rv64imafd)
+else( ${VORTEX_XLEN} EQUAL 64 )
+  set(LLVM_TARGET riscv32)
+  set(TARGET_MARCH rv32imaf)
+endif( ${VORTEX_XLEN} EQUAL 64 )
+
+set(KERNEL_SOURCES ${SOURCES_GENERIC})
+
+foreach(FILE printf.c print_base.c
+             get_work_dim.c get_num_groups.c get_local_size.c get_global_offset.c get_global_size.c
+             get_group_id.c get_local_id.c get_global_id.c get_linear_id.c
+             barrier.ll
+             atomics.cl)
+  list(REMOVE_ITEM KERNEL_SOURCES "${FILE}")
+endforeach()
+
+foreach(FILE workitems.c printf.c barrier.c)
+  list(REMOVE_ITEM KERNEL_SOURCES "${FILE}")
+  list(APPEND KERNEL_SOURCES "vortex/${FILE}")
+endforeach()
+
+set(CLANG_FLAGS "-ffreestanding" "-target" "${LLVM_TARGET}" "-march=${TARGET_MARCH}" "-emit-llvm" "-D_CL_DISABLE_HALF" "-I${VORTEX_PREFIX}/kernel/include")
+
+set(KERNEL_CL_FLAGS "-Xclang" "-cl-std=CL${VORTEX_DEVICE_CL_STD}" "-D__OPENCL_C_VERSION__=${VORTEX_DEVICE_CL_VERSION}" ${KERNEL_CL_FLAGS})
+
+set(LLC_FLAGS "")
+
+set(DEVICE_CL_FLAGS "-D__OPENCL_VERSION__=${VORTEX_DEVICE_CL_VERSION} -DPOCL_DEVICE_ADDRESS_BITS=${VORTEX_XLEN}")
+
+separate_arguments(VORTEX_DEVICE_EXTENSIONS)
+foreach(EXT ${VORTEX_DEVICE_EXTENSIONS})
+  set(DEVICE_CL_FLAGS "${DEVICE_CL_FLAGS} -D${EXT}")
+endforeach()
+separate_arguments(DEVICE_CL_FLAGS)
+
+make_kernel_bc(KERNEL_BC "${LLVM_TARGET}" "BCs" 0 0 0 ${KERNEL_SOURCES})
+
+# just debug
+message(STATUS "${LLVM_TARGET} Kernel BC: ${KERNEL_BC}")
+
+list(APPEND KERNEL_BC_LIST "${KERNEL_BC}")
+set(KERNEL_BC_LIST "${KERNEL_BC_LIST}" PARENT_SCOPE)
+
+# a target is needed...
+add_custom_target("kernel_${LLVM_TARGET}" DEPENDS ${KERNEL_BC})
+
+list(APPEND KERNEL_TARGET_LIST "kernel_${LLVM_TARGET}")
+set(KERNEL_TARGET_LIST "${KERNEL_TARGET_LIST}" PARENT_SCOPE)
+
+install(FILES "${KERNEL_BC}" DESTINATION "${POCL_INSTALL_PRIVATE_DATADIR}" COMPONENT "lib")
diff --git a/lib/kernel/vortex/barrier.c b/lib/kernel/vortex/barrier.c
new file mode 100644
index 0000000000000000000000000000000000000000..c6e0ea21ddf8af77a052ee64b63cabe0bbcc91d1
--- /dev/null
+++ b/lib/kernel/vortex/barrier.c
@@ -0,0 +1,10 @@
+#include <vx_spawn.h>
+
+#define CLK_GLOBAL_MEM_FENCE  0x02
+
+void _Z7barrierj(int flags) {
+  if (flags & CLK_GLOBAL_MEM_FENCE) {
+    vx_fence();
+  }
+  vx_barrier(__local_group_id, __warps_per_group);
+}
diff --git a/lib/kernel/vortex/printf.c b/lib/kernel/vortex/printf.c
new file mode 100644
index 0000000000000000000000000000000000000000..5e2065dfdd5cb7c65ba7116320aeac72027d5b78
--- /dev/null
+++ b/lib/kernel/vortex/printf.c
@@ -0,0 +1,10 @@
+#include <vx_print.h>
+
+int printf (const char *restrict fmt, ...) {
+  int ret;
+	va_list va;
+	va_start(va, fmt);
+	ret = vx_vprintf(fmt, va);
+	va_end(va);
+  return ret;
+}
diff --git a/lib/kernel/vortex/workitems.c b/lib/kernel/vortex/workitems.c
new file mode 100644
index 0000000000000000000000000000000000000000..5ac4add8da76c9c8a4fc643f64ce181d314a7def
--- /dev/null
+++ b/lib/kernel/vortex/workitems.c
@@ -0,0 +1,84 @@
+#include <vx_spawn.h>
+
+extern int g_work_dim;
+extern dim3_t g_global_offset;
+
+uint32_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE
+get_work_dim (void) {
+  return g_work_dim;
+}
+
+size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE
+get_num_groups(uint32_t dimindx) {
+  switch (dimindx) {
+  default: return gridDim.x;
+  case 1: return gridDim.y;
+  case 2: return gridDim.z;
+  }
+}
+
+size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE
+get_local_size(uint32_t dimindx) {
+  switch (dimindx) {
+  default: return blockDim.x;
+  case 1: return blockDim.y;
+  case 2: return blockDim.z;
+  }
+}
+
+size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE
+get_global_offset(uint32_t dimindx) {
+  switch (dimindx) {
+  default: return g_global_offset.x;
+  case 1: return g_global_offset.y;
+  case 2: return g_global_offset.z;
+  }
+}
+
+size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE
+get_group_id(uint32_t dimindx) {
+  switch (dimindx) {
+  default: return blockIdx.x;
+  case 1: return blockIdx.y;
+  case 2: return blockIdx.z;
+  }
+}
+
+size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE
+get_local_id(uint32_t dimindx) {
+  switch (dimindx) {
+  default: return threadIdx.x;
+  case 1: return threadIdx.y;
+  case 2: return threadIdx.z;
+  }
+}
+
+size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE
+get_global_size(uint32_t dimindx) {
+  switch (dimindx) {
+  default: return blockDim.x * gridDim.x;
+  case 1: return blockDim.y * gridDim.y;
+  case 2: return blockDim.z * gridDim.z;
+  }
+}
+
+size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE
+get_global_id(uint32_t dimindx) {
+  switch (dimindx) {
+  default: return blockIdx.x * blockDim.x + threadIdx.x + g_global_offset.x;
+  case 1: return blockIdx.y * blockDim.y + threadIdx.y + g_global_offset.y;
+  case 2: return blockIdx.z * blockDim.z + threadIdx.z + g_global_offset.z;
+  }
+}
+
+size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE
+get_global_linear_id(void) {
+  return ((blockIdx.z * blockDim.z + threadIdx.z) * blockDim.y * gridDim.y * blockDim.x * gridDim.x)
+       + ((blockIdx.y * blockDim.y + threadIdx.y) * blockDim.x * gridDim.x)
+       + ((blockIdx.x * blockDim.z + threadIdx.x));
+}
+
+size_t _CL_OVERLOADABLE _CL_READNONE _CL_OPTNONE
+get_local_linear_id(void) {
+  return (threadIdx.z * blockDim.y * blockDim.x) + (threadIdx.y * blockDim.x) + threadIdx.x;
+}
\ No newline at end of file
diff --git a/lib/llvmopencl/LLVMUtils.h b/lib/llvmopencl/LLVMUtils.h
index c9cf1ad87c393391a621c53836da0a0f0f97d3ce..91ebf3c8e07197f21dab0246f10f968de30b2c92 100644
--- a/lib/llvmopencl/LLVMUtils.h
+++ b/lib/llvmopencl/LLVMUtils.h
@@ -76,6 +76,7 @@ POCL_EXPORT
 bool isGVarUsedByFunction(llvm::GlobalVariable *GVar, llvm::Function *F);
 
 // Checks if the given argument of Func is a local buffer.
+POCL_EXPORT
 bool isLocalMemFunctionArg(llvm::Function *Func, unsigned ArgIndex);
 
 // determines if GVar is OpenCL program-scope variable