From c0211fb8fcf1bd9dd70e81d6befe0c12e9ac718f Mon Sep 17 00:00:00 2001
From: Topi Leppanen <topi.leppanen@tuni.fi>
Date: Fri, 20 Oct 2023 11:22:10 +0300
Subject: [PATCH] Almaif add DBDevice

It can be used to transparently reconfigure FPGAs from different
vendors using a presynthesized database of FPGA bitstreams.
The database with the bitstreams is generated by github.com/cpc/AFOCL.
---
 CHANGES                                       |   4 +-
 CMakeLists.txt                                |   4 +-
 doc/notes_5_0.rst                             |  14 +
 doc/sphinx/source/almaif.rst                  |  49 +-
 examples/accel/CMakeLists.txt                 |   2 +-
 examples/accel/firmware.c                     |  90 +++-
 lib/CL/devices/almaif/AlmaIFDevice.cc         | 248 ++++++----
 lib/CL/devices/almaif/AlmaIFDevice.hh         |  42 +-
 lib/CL/devices/almaif/AlmaIFRegion.cc         |   6 +-
 lib/CL/devices/almaif/AlmaIFRegion.hh         |   7 +-
 lib/CL/devices/almaif/AlmaifCompile.cc        |  24 +-
 lib/CL/devices/almaif/AlmaifCompile.hh        |   7 +-
 .../AlmaIFBitstreamDatabaseManager.cc         | 298 +++++++++++
 .../AlmaIFBitstreamDatabaseManager.hh         | 101 ++++
 lib/CL/devices/almaif/AlmaifDB/DBDevice.cc    | 167 +++++++
 lib/CL/devices/almaif/AlmaifDB/DBDevice.hh    |  80 +++
 lib/CL/devices/almaif/AlmaifDB/tiny-json.c    | 461 ++++++++++++++++++
 lib/CL/devices/almaif/AlmaifDB/tiny-json.h    | 176 +++++++
 lib/CL/devices/almaif/AlmaifShared.hh         |  13 +-
 lib/CL/devices/almaif/CMakeLists.txt          |  22 +-
 lib/CL/devices/almaif/EmulationDevice.cc      |  30 +-
 lib/CL/devices/almaif/EmulationDevice.hh      |   1 -
 lib/CL/devices/almaif/EmulationRegion.cc      |   4 +-
 lib/CL/devices/almaif/MMAPDevice.cc           |  16 +-
 lib/CL/devices/almaif/MMAPDevice.hh           |   2 +-
 lib/CL/devices/almaif/MMAPRegion.cc           |  74 +--
 lib/CL/devices/almaif/MMAPRegion.hh           |   3 +-
 lib/CL/devices/almaif/XilinxXrtDevice.cc      | 270 ++++++++++
 lib/CL/devices/almaif/XilinxXrtDevice.hh      |  71 +++
 .../devices/almaif/XilinxXrtExternalRegion.cc | 128 +++++
 .../devices/almaif/XilinxXrtExternalRegion.hh |  63 +++
 lib/CL/devices/almaif/XilinxXrtRegion.cc      | 263 ++++++++++
 .../{XrtRegion.hh => XilinxXrtRegion.hh}      |  25 +-
 lib/CL/devices/almaif/XrtDevice.cc            |  75 ---
 lib/CL/devices/almaif/XrtDevice.hh            |  40 --
 lib/CL/devices/almaif/XrtRegion.cc            | 184 -------
 lib/CL/devices/almaif/almaif.cc               | 397 +++++++++------
 ...CompileTCE.cc => AlmaifCompileOpenasip.cc} | 331 ++++++-------
 .../almaif/openasip/AlmaifCompileOpenasip.hh  |  69 +++
 .../almaif/openasip/AlmaifCompileTCE.hh       |  67 ---
 .../almaif/openasip/TTASimControlRegion.cc    |  72 +--
 .../devices/almaif/openasip/TTASimDevice.cc   |  36 +-
 .../devices/almaif/openasip/TTASimDevice.hh   |   4 +-
 .../devices/almaif/openasip/TTASimRegion.cc   |  66 +--
 lib/CL/devices/builtin_kernels.cc             |  38 ++
 lib/CL/devices/builtin_kernels.hh             |   8 +-
 tools/data/tta_test_machines/axim_sep.adf     |   2 +-
 tools/data/tta_test_machines/relative_sep.adf |   6 +-
 48 files changed, 3132 insertions(+), 1028 deletions(-)
 create mode 100644 lib/CL/devices/almaif/AlmaifDB/AlmaIFBitstreamDatabaseManager.cc
 create mode 100644 lib/CL/devices/almaif/AlmaifDB/AlmaIFBitstreamDatabaseManager.hh
 create mode 100644 lib/CL/devices/almaif/AlmaifDB/DBDevice.cc
 create mode 100644 lib/CL/devices/almaif/AlmaifDB/DBDevice.hh
 create mode 100644 lib/CL/devices/almaif/AlmaifDB/tiny-json.c
 create mode 100644 lib/CL/devices/almaif/AlmaifDB/tiny-json.h
 create mode 100644 lib/CL/devices/almaif/XilinxXrtDevice.cc
 create mode 100644 lib/CL/devices/almaif/XilinxXrtDevice.hh
 create mode 100644 lib/CL/devices/almaif/XilinxXrtExternalRegion.cc
 create mode 100644 lib/CL/devices/almaif/XilinxXrtExternalRegion.hh
 create mode 100644 lib/CL/devices/almaif/XilinxXrtRegion.cc
 rename lib/CL/devices/almaif/{XrtRegion.hh => XilinxXrtRegion.hh} (71%)
 delete mode 100644 lib/CL/devices/almaif/XrtDevice.cc
 delete mode 100644 lib/CL/devices/almaif/XrtDevice.hh
 delete mode 100644 lib/CL/devices/almaif/XrtRegion.cc
 rename lib/CL/devices/almaif/openasip/{AlmaifCompileTCE.cc => AlmaifCompileOpenasip.cc} (69%)
 create mode 100644 lib/CL/devices/almaif/openasip/AlmaifCompileOpenasip.hh
 delete mode 100644 lib/CL/devices/almaif/openasip/AlmaifCompileTCE.hh

diff --git a/CHANGES b/CHANGES
index 8062d9faf..95e6786fd 100644
--- a/CHANGES
+++ b/CHANGES
@@ -1,10 +1,12 @@
+
+Change logs have been moved to doc/notes*.rst
+
 4.1 Unreleased
 =================
 
 Notable User Facing Changes
 ---------------------------
 - Added support for Coarse-Grained buffer SVM on CUDA devices.
-
 Notable Fixes
 -------------
 
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ad0b95b24..b407f19ef 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1424,8 +1424,8 @@ if(ENABLE_ALMAIF_DEVICE)
     set(XRT $ENV{XILINX_XRT})
 
     if(NOT XRT_INCLUDEDIR)
-      if(EXISTS "${XRT}/include/xrt")
-        set(XRT_INCLUDEDIR "${XRT}/include/xrt" CACHE PATH "XRT include dir")
+      if(EXISTS "${XRT}/include")
+        set(XRT_INCLUDEDIR "${XRT}/include" CACHE PATH "XRT include dir")
       else()
         message(FATAL_ERROR "please provide -DXRT_INCLUDEDIR=... to CMake")
       endif()
diff --git a/doc/notes_5_0.rst b/doc/notes_5_0.rst
index 7a5e2d517..7606f17fc 100644
--- a/doc/notes_5_0.rst
+++ b/doc/notes_5_0.rst
@@ -37,6 +37,20 @@ CUDA driver has gained some new features:
   never been tested properly and will be removed in the next release. SPIR-V
   remains the supported option.
 
+
+* AlmaIF: Add DBDevice backend, which can be used to transparently
+  reconfigure FPGAs from different vendors using a database of bitstreams.
+  The database with the bitstreams is generated by AFOCL project
+  (github.com/cpc/AFOCL). See a following publication for more info:
+  Topi Leppänen, Joonas Multanen, Leevi Leppänen, Pekka Jääskeläinen:
+  "AFOCL: Portable OpenCL Programming of FPGAs via Automated
+   Built-in Kernel Management",
+   2023 IEEE Nordic Circuits and Systems Conference (NorCAS),
+   Aalborg, Denmark, 2023, pp. 1-7,
+   doi: 10.1109/NorCAS58970.2023.10305457
+
+
+
 ================
 Acknowledgements
 ================
diff --git a/doc/sphinx/source/almaif.rst b/doc/sphinx/source/almaif.rst
index 933dd3f16..ea54fb35a 100644
--- a/doc/sphinx/source/almaif.rst
+++ b/doc/sphinx/source/almaif.rst
@@ -179,7 +179,7 @@ First, set CMAKE variable VIVADO_PATH to point to the directory with the
 you can set ENABLE_TCE to 1 to enable
 RTL and firmware generation of various OpenASIP TTA cores with different memory configurations.
 Then, you can simulate them with ttasim instruction set simulator by running
-``LLVM=1 ../tools/scripts/run_almaif_tests`` from the build directory.
+``../tools/scripts/run_almaif_tests`` from the build directory.
 
 2. If you have Vitis HLS installed, set VITIS_HLS_PATH to point to the directory
 with the vitis_hls executable.
@@ -249,8 +249,40 @@ Example usage of the mode can be found in examples/accel/CMakelists.txt, which
 generates standalone tests using both ttasim and RTL simulator (ghdl) to run the
 example0 kernel on various TTA configurations.
 
-Wrapping new hardware component
--------------------------------
+
+Using a bitstream database
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+You can use the AlmaIF-driver with the cross-vendor bitstream databases generated
+with the `AFOCL-project <http://github.com/cpc/AFOCL>`_.
+That project generates a directory-based database with a json-based metadata.
+The database contains the bitstreams and firmware-files necessary to implement
+the set of built-in kernels defined in the json-file.
+
+The bistream database device will report all the built-in kernel implementations it can
+find from the database in clGetDeviceInfo's CL_DEVICE_BUILT_IN_KERNELS-query.
+The bitstream database device ("0xF") will automatically fetch bitstream from the database
+and reconfigure the FPGA when user enqueues a built-in kernel for execution.
+Therefore, the user does not need to handle the bitstream binaries themselves,
+since the OpenCL implementation reconfigures the FPGA behind-the-scenes.
+
+To use AFOCL-databases in PoCL, it is enough to point the Almaif-driver to the database
+with the env variable::
+
+  POCL_DEVICES=almaif POCL_ALMAIF0_PARAMETERS=0xF,<path/to/afocl-db> ./accel_example
+
+At the moment, the public AlmaIF-driver and AFOCL include support only for
+Xilinx Alveo U280 device, but adding support for other Alveo devices should be easy.
+In the AFOCL publication the methodology was also demonstrated with Intel Arria 10,
+but the code for that is not yet upstreamed. The driver is built to hide the
+vendor-specific details from the end user, with different AlmaIFDevice backends
+taking care of vendor-specific details.
+For more information about the bitstream database,
+see our :ref:`AFOCL-publication (2023) <publications>`.
+
+
+Wrapping a new hardware component
+---------------------------------
 
 This section will walk through the addition of new implementation for an existing
 built-in kernel.
@@ -310,8 +342,19 @@ in your academic work, please cite the following publication::
       AUTHOR = {Topi Leppänen and Atro Lotvonen and Panagiotis Mousouliotis and Joonas Multanen and Georgios Keramidas and Pekka Jääskeläinen},
     }
 
+.. _publications:
+
 The other relevant publications::
 
+    @ARTICLE{afocl2023,
+      AUTHOR={Leppänen, Topi and Multanen, Joonas and Leppänen, Leevi and Jääskeläinen, Pekka},
+      TITLE={{AFOCL}: Portable {OpenCL} Programming of {FPGAs} via Automated Built-in Kernel Management},
+      BOOKTITLE={2023 IEEE Nordic Circuits and Systems Conference ({NorCAS})},
+      YEAR={2023},
+      PAGES={1-7},
+      DOI={10.1109/NorCAS58970.2023.10305457}
+    }
+
     @ARTICLE{leppanen2022,
       AUTHOR={Leppänen, Topi and Lotvonen, Atro and Jääskeläinen, Pekka},
       TITLE={Cross-vendor programming abstraction for diverse heterogeneous platforms},
diff --git a/examples/accel/CMakeLists.txt b/examples/accel/CMakeLists.txt
index e138a0b0f..e6c7299f6 100644
--- a/examples/accel/CMakeLists.txt
+++ b/examples/accel/CMakeLists.txt
@@ -138,7 +138,7 @@ endif ()
         bitstreams/${core_name}.bit
       COMMAND cp vivado_${core_name}_1/vivado_${core_name}_1.gen/sources_1/bd/toplevel/hw_handoff/toplevel.hwh
         bitstreams/${core_name}.hwh
-      DEPENDS ${ADF} ${CMAKE_CURRENT_SOURCE_DIR}/generate_project.tcl)
+      DEPENDS ${core_name}_rtl ${CMAKE_CURRENT_SOURCE_DIR}/generate_project.tcl)
     add_custom_target(${core_name}_bs DEPENDS bitstreams/${core_name}.bit)
 
     add_dependencies(bitstreams ${core_name}_bs)
diff --git a/examples/accel/firmware.c b/examples/accel/firmware.c
index 5b6246829..69c5577d8 100644
--- a/examples/accel/firmware.c
+++ b/examples/accel/firmware.c
@@ -54,6 +54,8 @@
 #define __cq__ __attribute__ ((address_space (5)))
 #define __buffer__ __attribute__ ((address_space (1)))
 
+// NOTE: This enum contains highly experimental built-in kernel IDs, that are
+// subject to change in future PoCL releases without any deprecation period.
 enum BuiltinKernelId : uint16_t
 {
   // CD = custom device, BI = built-in
@@ -77,7 +79,14 @@ enum BuiltinKernelId : uint16_t
   POCL_CDBI_MUL_I16 = 15,
   POCL_CDBI_STREAMOUT_I32 = 16,
   POCL_CDBI_STREAMIN_I32 = 17,
-  POCL_CDBI_LAST = 18,
+  POCL_CDBI_VOTE_U32 = 18,
+  POCL_CDBI_VOTE_U8 = 19,
+  POCL_CDBI_DNN_CONV2D_NCHW_F32 = 20,
+  POCL_CDBI_OPENVX_SCALEIMAGE_NN_U8 = 21,
+  POCL_CDBI_OPENVX_SCALEIMAGE_BL_U8 = 22,
+  POCL_CDBI_OPENVX_TENSORCONVERTDEPTH_WRAP_U8_F32 = 23,
+  POCL_CDBI_OPENVX_MINMAXLOC_R1_U8 = 24,
+  POCL_CDBI_LAST = 25,
   POCL_CDBI_JIT_COMPILER = 0xFFFF
 };
 
@@ -224,10 +233,6 @@ main ()
 #endif
 
           uint32_t kernel_id = packet->kernel_object_low;
-          if (kernel_id > POCL_CDBI_MUL_I32)
-            {
-              continue;
-            }
 
           __buffer__ uint32_t *kernarg_ptr
               = (__buffer__ uint32_t *)(packet->kernarg_address_low);
@@ -235,25 +240,70 @@ main ()
           __buffer__ uint32_t *arg0 = (__buffer__ uint32_t *)kernarg_ptr[0];
           __buffer__ uint32_t *arg1 = (__buffer__ uint32_t *)kernarg_ptr[1];
           __buffer__ uint32_t *arg2 = (__buffer__ uint32_t *)kernarg_ptr[2];
-
-          uint32_t dim_x = packet->grid_size_x;
-
-          for (int idx = 0; idx < dim_x; idx++)
+          __buffer__ uint32_t *arg3 = (__buffer__ uint32_t *)kernarg_ptr[3];
+          __buffer__ uint32_t *arg4 = (__buffer__ uint32_t *)kernarg_ptr[4];
+
+          // Check how many dimensions are in use, and set the unused ones
+          // to 1.
+          int dim_x = packet->grid_size_x;
+          int dim_y = (packet->dimensions >= 2) ? (packet->grid_size_y) : 1;
+          int dim_z = (packet->dimensions == 3) ? (packet->grid_size_z) : 1;
+
+          uint8_t min = 255;
+          uint8_t max = 0;
+          uint32_t minlocx, minlocy, maxlocx, maxlocy;
+          for (int z = 0; z < dim_z; z++)
             {
-              // Do the operation based on the kernel_object (integer id)
-              switch (kernel_id)
+              for (int y = 0; y < dim_y; y++)
                 {
-                case (POCL_CDBI_COPY_I8):
-                  arg1[idx] = arg0[idx];
-                  break;
-                case (POCL_CDBI_ADD_I32):
-                  arg2[idx] = arg0[idx] + arg1[idx];
-                  break;
-                case (POCL_CDBI_MUL_I32):
-                  arg2[idx] = arg0[idx] * arg1[idx];
-                  break;
+                  for (int x = 0; x < dim_x; x++)
+                    {
+                      // Linearize grid
+                      int idx = z * dim_y * dim_x + dim_x * y + x;
+                      // Do the operation based on the kernel_object (integer
+                      // id)
+                      switch (kernel_id)
+                        {
+                        case (POCL_CDBI_COPY_I8):
+                          arg1[idx] = arg0[idx];
+                          break;
+                        case (POCL_CDBI_ADD_I32):
+                          arg2[idx] = arg0[idx] + arg1[idx];
+                          break;
+                        case (POCL_CDBI_MUL_I32):
+                          arg2[idx] = arg0[idx] * arg1[idx];
+                          break;
+                        case (POCL_CDBI_OPENVX_MINMAXLOC_R1_U8):
+                          {
+                            uint8_t pixel = ((__buffer__ uint8_t *)arg0)[idx];
+                            if (pixel < min)
+                              {
+                                min = pixel;
+                                minlocx = x;
+                                minlocy = y;
+                              }
+                            if (pixel > max)
+                              {
+                                max = pixel;
+                                maxlocx = x;
+                                maxlocy = y;
+                              }
+                          }
+                          break;
+                        }
+                    }
                 }
             }
+
+          if (kernel_id == POCL_CDBI_OPENVX_MINMAXLOC_R1_U8)
+            {
+              arg1[0] = min;
+              arg2[0] = max;
+              arg3[0] = minlocx;
+              arg3[1] = minlocy;
+              arg4[0] = maxlocx;
+              arg4[1] = maxlocy;
+            }
 #ifdef BASE_ADDRESS
           cc_l = control_region[ALMAIF_STATUS_REG_CC_LOW / 4];
           // cc_h = control_region[ALMAIF_STATUS_REG_CC_HIGH/4];
diff --git a/lib/CL/devices/almaif/AlmaIFDevice.cc b/lib/CL/devices/almaif/AlmaIFDevice.cc
index 3d0f7e268..183bf1520 100644
--- a/lib/CL/devices/almaif/AlmaIFDevice.cc
+++ b/lib/CL/devices/almaif/AlmaIFDevice.cc
@@ -42,6 +42,7 @@ AlmaIFDevice::~AlmaIFDevice() {
   delete InstructionMemory;
   delete CQMemory;
   delete DataMemory;
+  delete ExternalMemory;
   memory_region_t *el, *tmp;
   LL_FOREACH_SAFE(AllocRegions, el, tmp) { free(el); }
 }
@@ -67,25 +68,25 @@ void AlmaIFDevice::discoverDeviceParameters() {
     BaseAddress + 3*segment_size + Dmem_size - PRIVATE_MEM_SIZE        --> Local
     scratchpad memory for stack etc Where segment_size = 0x10000 (size of imem)
     */
-    imem_size = ControlMemory->Read32(ALMAIF_INFO_IMEM_SIZE_LEGACY);
-    // cq_size = ControlMemory->Read32(ALMAIF_INFO_PMEM_SIZE_LEGACY);
-    cq_size = 4 * 64;
-    // dmem_size = ControlMemory->Read32(ALMAIF_INFO_PMEM_SIZE_LEGACY);
+    ImemSize = ControlMemory->Read32(ALMAIF_INFO_IMEM_SIZE_LEGACY);
+    // CQSize = ControlMemory->Read32(ALMAIF_INFO_PMEM_SIZE_LEGACY);
+    CQSize = 4 * 64;
+    // DmemSize = ControlMemory->Read32(ALMAIF_INFO_PMEM_SIZE_LEGACY);
     int private_mem_size =
         pocl_get_int_option("POCL_ALMAIF_PRIVATE_MEM_SIZE", ALMAIF_DEFAULT_PRIVATE_MEM_SIZE);
 
-    dmem_size = ControlMemory->Read32(ALMAIF_INFO_PMEM_SIZE_LEGACY) -
-                private_mem_size - cq_size;
+    DmemSize = ControlMemory->Read32(ALMAIF_INFO_PMEM_SIZE_LEGACY) -
+               private_mem_size - CQSize;
     PointerSize = 4;
     RelativeAddressing = false;
 
-    uint32_t segment_size = imem_size;
-    imem_start = segment_size;
-    dmem_start = 3 * segment_size;
-    cq_start = dmem_start + dmem_size;
-    cq_start += ControlMemory->PhysAddress;
-    imem_start += ControlMemory->PhysAddress;
-    dmem_start += ControlMemory->PhysAddress;
+    uint32_t segment_size = ImemSize;
+    ImemStart = segment_size;
+    DmemStart = 3 * segment_size;
+    CQStart = DmemStart + DmemSize;
+    CQStart += ControlMemory->PhysAddress();
+    ImemStart += ControlMemory->PhysAddress();
+    DmemStart += ControlMemory->PhysAddress();
   } else if (interface_version == ALMAIF_VERSION_3) {
     uint64_t feature_flags =
         ControlMemory->Read64(ALMAIF_INFO_FEATURE_FLAGS_LOW);
@@ -95,123 +96,123 @@ void AlmaIFDevice::discoverDeviceParameters() {
     RelativeAddressing =
         (feature_flags & ALMAIF_FF_BIT_AXI_MASTER) ? (false) : (true);
 
-    imem_size = ControlMemory->Read32(ALMAIF_INFO_IMEM_SIZE);
-    cq_size = ControlMemory->Read32(ALMAIF_INFO_CQMEM_SIZE_LOW);
-    dmem_size = ControlMemory->Read32(ALMAIF_INFO_DMEM_SIZE_LOW);
+    ImemSize = ControlMemory->Read32(ALMAIF_INFO_IMEM_SIZE);
+    CQSize = ControlMemory->Read32(ALMAIF_INFO_CQMEM_SIZE_LOW);
+    DmemSize = ControlMemory->Read32(ALMAIF_INFO_DMEM_SIZE_LOW);
 
-    imem_start = ControlMemory->Read64(ALMAIF_INFO_IMEM_START_LOW);
-    cq_start = ControlMemory->Read64(ALMAIF_INFO_CQMEM_START_LOW);
-    dmem_start = ControlMemory->Read64(ALMAIF_INFO_DMEM_START_LOW);
+    ImemStart = ControlMemory->Read64(ALMAIF_INFO_IMEM_START_LOW);
+    CQStart = ControlMemory->Read64(ALMAIF_INFO_CQMEM_START_LOW);
+    DmemStart = ControlMemory->Read64(ALMAIF_INFO_DMEM_START_LOW);
 
     if (RelativeAddressing) {
       POCL_MSG_PRINT_ALMAIF("Almaif: Enabled relative addressing\n");
-      cq_start += ControlMemory->PhysAddress;
-      imem_start += ControlMemory->PhysAddress;
-      dmem_start += ControlMemory->PhysAddress;
+      CQStart += ControlMemory->PhysAddress();
+      ImemStart += ControlMemory->PhysAddress();
+      DmemStart += ControlMemory->PhysAddress();
     }
 
   } else {
     POCL_ABORT_UNIMPLEMENTED("Unsupported AlmaIF version\n");
   }
-  POCL_MSG_PRINT_ALMAIF("cq_start=%p imem_start=%p dmem_start=%p\n",
-                       (void *)cq_start, (void *)imem_start,
-                       (void *)dmem_start);
-  POCL_MSG_PRINT_ALMAIF("cq_size=%u imem_size=%u dmem_size=%u\n", cq_size,
-                       imem_size, dmem_size);
+  POCL_MSG_PRINT_ALMAIF("CQStart=%p ImemStart=%p DmemStart=%p\n",
+                        (void *)CQStart, (void *)ImemStart, (void *)DmemStart);
+  POCL_MSG_PRINT_ALMAIF("CQSize=%u ImemSize=%u DmemSize=%u\n", CQSize, ImemSize,
+                        DmemSize);
   POCL_MSG_PRINT_ALMAIF("ControlMemory->PhysAddress=%zu\n",
-                       ControlMemory->PhysAddress);
+                        ControlMemory->PhysAddress());
   AllocRegions = (memory_region_t *)calloc(1, sizeof(memory_region_t));
   pocl_init_mem_region(AllocRegions,
-                       dmem_start + ALMAIF_DEFAULT_CONSTANT_MEM_SIZE,
-                       dmem_size - ALMAIF_DEFAULT_CONSTANT_MEM_SIZE);
+                       DmemStart + ALMAIF_DEFAULT_CONSTANT_MEM_SIZE,
+                       DmemSize - ALMAIF_DEFAULT_CONSTANT_MEM_SIZE);
   POCL_MSG_PRINT_ALMAIF(
       "Reserved %d bytes at the start of global memory for constant data\n",
       ALMAIF_DEFAULT_CONSTANT_MEM_SIZE);
 }
 
-void AlmaIFDevice::loadProgramToDevice(almaif_kernel_data_s *kd,
-                                       cl_kernel kernel,
-                                       _cl_command_node *cmd) {
-  assert(kd);
+void AlmaIFDevice::loadProgramToDevice(almaif_kernel_data_s *KernelData,
+                                       cl_kernel Kernel,
+                                       _cl_command_node *Command) {
+  assert(KernelData);
 
-  if (kd->imem_img_size == 0) {
+  if (KernelData->imem_img_size == 0) {
     char img_file[POCL_MAX_PATHNAME_LENGTH];
     char cachedir[POCL_MAX_PATHNAME_LENGTH];
     // first try specialized
-    pocl_cache_kernel_cachedir_path(img_file, kernel->program,
-                                    cmd->program_device_i, kernel,
-                                    "/parallel.img", cmd, 1);
+    pocl_cache_kernel_cachedir_path(img_file, Kernel->program,
+                                    Command->program_device_i, Kernel,
+                                    "/parallel.img", Command, 1);
     if (pocl_exists(img_file)) {
-      pocl_cache_kernel_cachedir_path(
-          cachedir, kernel->program, cmd->program_device_i, kernel, "", cmd, 1);
-      preread_images(cachedir, kd);
+      pocl_cache_kernel_cachedir_path(cachedir, Kernel->program,
+                                      Command->program_device_i, Kernel, "",
+                                      Command, 1);
+      prereadImages(cachedir, KernelData);
     } else {
       // if it doesn't exist, try specialized with local sizes 0-0-0
       // should pick either 0-0-0 or 0-0-0-goffs0
       _cl_command_node cmd_copy;
-      memcpy(&cmd_copy, cmd, sizeof(_cl_command_node));
+      memcpy(&cmd_copy, Command, sizeof(_cl_command_node));
       cmd_copy.command.run.pc.local_size[0] = 0;
       cmd_copy.command.run.pc.local_size[1] = 0;
       cmd_copy.command.run.pc.local_size[2] = 0;
 
-      pocl_cache_kernel_cachedir_path(img_file, kernel->program,
-                                      cmd->program_device_i, kernel,
+      pocl_cache_kernel_cachedir_path(img_file, Kernel->program,
+                                      Command->program_device_i, Kernel,
                                       "/parallel.img", &cmd_copy, 1);
       if (pocl_exists(img_file)) {
-        pocl_cache_kernel_cachedir_path(cachedir, kernel->program,
-                                        cmd->program_device_i, kernel, "",
+        pocl_cache_kernel_cachedir_path(cachedir, Kernel->program,
+                                        Command->program_device_i, Kernel, "",
                                         &cmd_copy, 1);
       } else {
-        pocl_cache_kernel_cachedir_path(cachedir, kernel->program,
-                                        cmd->program_device_i, kernel, "",
+        pocl_cache_kernel_cachedir_path(cachedir, Kernel->program,
+                                        Command->program_device_i, Kernel, "",
                                         &cmd_copy, 0);
       }
       POCL_MSG_PRINT_ALMAIF("Specialized kernel not found, using %s\n",
                            cachedir);
-      preread_images(cachedir, kd);
+      prereadImages(cachedir, KernelData);
     }
   }
 
-  assert(kd->imem_img_size > 0);
+  assert(KernelData->imem_img_size > 0);
 
   ControlMemory->Write32(ALMAIF_CONTROL_REG_COMMAND, ALMAIF_RESET_CMD);
 
-  InstructionMemory->CopyToMMAP(InstructionMemory->PhysAddress, kd->imem_img,
-                                kd->imem_img_size);
+  InstructionMemory->CopyToMMAP(InstructionMemory->PhysAddress(),
+                                KernelData->imem_img,
+                                KernelData->imem_img_size);
   POCL_MSG_PRINT_ALMAIF("IMEM image written: %zu / %zu B\n",
-                       InstructionMemory->PhysAddress, kd->imem_img_size);
+                        InstructionMemory->PhysAddress(),
+                        KernelData->imem_img_size);
 
   ControlMemory->Write32(ALMAIF_CONTROL_REG_COMMAND, ALMAIF_CONTINUE_CMD);
   HwClockStart = pocl_gettimemono_ns();
 }
 
-void AlmaIFDevice::preread_images(const char *kernel_cachedir,
-                                  almaif_kernel_data_s *kd) {
+void AlmaIFDevice::prereadImages(const std::string &KernelCacheDir,
+                                 almaif_kernel_data_s *KernelData) {
   POCL_MSG_PRINT_ALMAIF("Reading image files\n");
   uint64_t temp = 0;
   size_t size = 0;
   char *content = NULL;
 
-  char module_fn[POCL_MAX_PATHNAME_LENGTH];
-  snprintf(module_fn, POCL_MAX_PATHNAME_LENGTH, "%s/parallel.img",
-           kernel_cachedir);
+  std::string module_fn = KernelCacheDir + "/parallel.img";
 
-  if (pocl_exists(module_fn)) {
-    int res = pocl_read_file(module_fn, &content, &temp);
+  if (pocl_exists(module_fn.c_str())) {
+    int res = pocl_read_file(module_fn.c_str(), &content, &temp);
     size = (size_t)temp;
     assert(res == 0);
     assert(size > 0);
-    assert(size < InstructionMemory->Size);
-    kd->imem_img = content;
-    kd->imem_img_size = size;
+    assert(size < InstructionMemory->Size());
+    KernelData->imem_img = content;
+    KernelData->imem_img_size = size;
     content = NULL;
   } else
-    POCL_ABORT("ALMAIF: %s for this kernel does not exist.\n", module_fn);
+    POCL_ABORT("ALMAIF: %s for this kernel does not exist.\n",
+               module_fn.c_str());
 
-  snprintf(module_fn, POCL_MAX_PATHNAME_LENGTH, "%s/kernel_address.txt",
-           kernel_cachedir);
-  if (pocl_exists(module_fn)) {
-    int res = pocl_read_file(module_fn, &content, &temp);
+  module_fn = KernelCacheDir + "/kernel_address.txt";
+  if (pocl_exists(module_fn.c_str())) {
+    int res = pocl_read_file(module_fn.c_str(), &content, &temp);
     assert(res == 0);
     size = (size_t)temp;
     assert(size > 0);
@@ -219,21 +220,22 @@ void AlmaIFDevice::preread_images(const char *kernel_cachedir,
     uint32_t kernel_address = 0;
     sscanf(content, "kernel address = %u", &kernel_address);
     assert(kernel_address != 0);
-    kd->kernel_address = kernel_address;
+    KernelData->kernel_address = kernel_address;
     content = NULL;
   } else
-    POCL_ABORT("ALMAIF: %s for this kernel does not exist.\n", module_fn);
+    POCL_ABORT("ALMAIF: %s for this kernel does not exist.\n",
+               module_fn.c_str());
 
   /*  snprintf(module_fn, POCL_MAX_PATHNAME_LENGTH, "%s/parallel_local.img",
-             kernel_cachedir);
+             KernelCacheDir);
     if (pocl_exists(module_fn)) {
       int res = pocl_read_file(module_fn, &content, &temp);
       assert(res == 0);
       size = (size_t)temp;
       if (size == 0)
         POCL_MEM_FREE(content);
-      kd->dmem_img = content;
-      kd->dmem_img_size = size;
+      KernelData->dmem_img = content;
+      KernelData->dmem_img_size = size;
 
       uint32_t kernel_addr = 0;
       if (size) {
@@ -242,7 +244,7 @@ void AlmaIFDevice::preread_images(const char *kernel_cachedir,
         kernel_addr = *up;
      }
       POCL_MSG_PRINT_ALMAIF("Kernel address (%0x) found\n", kernel_addr);
-      kd->kernel_address = kernel_addr;
+      KernelData->kernel_address = kernel_addr;
       content = NULL;
     } else
       POCL_ABORT("ALMAIF: %s for this kernel does not exist.\n", module_fn);
@@ -250,43 +252,95 @@ void AlmaIFDevice::preread_images(const char *kernel_cachedir,
 }
 
 void AlmaIFDevice::printMemoryDump() {
-  for (unsigned k = 0; k < CQMemory->Size; k += 4) {
+  for (unsigned k = 0; k < InstructionMemory->Size(); k += 4) {
+    uint32_t value = InstructionMemory->Read32(k);
+    std::cerr << "IMEM at " << k << "=" << value << "\n";
+  }
+  for (unsigned k = 0; k < CQMemory->Size(); k += 4) {
     uint32_t value = CQMemory->Read32(k);
     std::cerr << "CQ at " << k << "=" << value << "\n";
   }
 
-  for (unsigned k = 0; k < DataMemory->Size; k += 4) {
+  for (unsigned k = 0; k < DataMemory->Size(); k += 4) {
     uint32_t value = DataMemory->Read32(k);
     std::cerr << "Data at " << k << "=" << value << "\n";
   }
   std::cerr << std::endl;
 }
 
-void AlmaIFDevice::writeDataToDevice(size_t dst,
-                                     const char *__restrict__ const src,
-                                     size_t size) {
-  if (DataMemory->isInRange(dst)) {
-    POCL_MSG_PRINT_ALMAIF("almaif: Copying %zu bytes to 0x%zx\n", size, dst);
-    DataMemory->CopyToMMAP(dst, src, size);
-  } else if (ExternalMemory && ExternalMemory->isInRange(dst)) {
-    POCL_MSG_PRINT_ALMAIF("almaif: Copying %zu bytes to external 0x%zx\n", size,
-                         dst);
-    ExternalMemory->CopyToMMAP(dst, src, size);
+void AlmaIFDevice::writeDataToDevice(pocl_mem_identifier *DstMemId,
+                                     const char *__restrict__ const Src,
+                                     size_t Size, size_t Offset) {
+  chunk_info_t *chunk = (chunk_info_t *)DstMemId->mem_ptr;
+  size_t Dst = chunk->start_address + Offset;
+
+  if (DataMemory->isInRange(Dst)) {
+    POCL_MSG_PRINT_ALMAIF("almaif: Copying %zu bytes to 0x%zx\n", Size, Dst);
+    DataMemory->CopyToMMAP(Dst, Src, Size);
+  } else if (ExternalMemory && ExternalMemory->isInRange(Dst)) {
+    POCL_MSG_PRINT_ALMAIF("almaif: Copying %zu bytes to external 0x%zx\n", Size,
+                          Dst);
+    ExternalMemory->CopyToMMAP(Dst, Src, Size);
   } else {
-    POCL_ABORT("Attempt to write data to outside the device memories\n");
+    POCL_ABORT(
+        "Attempt to write data to outside the device memories. Address=%zu\n",
+        Dst);
   }
 }
 
-void AlmaIFDevice::readDataFromDevice(char *__restrict__ const dst, size_t src,
-                                      size_t size) {
-  if (DataMemory->isInRange(src)) {
-    POCL_MSG_PRINT_ALMAIF("almaif: Copying %zu bytes from 0x%zx\n", size, src);
-    DataMemory->CopyFromMMAP(dst, src, size);
-  } else if (ExternalMemory && ExternalMemory->isInRange(src)) {
-    POCL_MSG_PRINT_ALMAIF("almaif: Copying 0x%zu bytes from external 0x%zx\n",
-                         size, src);
-    ExternalMemory->CopyFromMMAP(dst, src, size);
+void AlmaIFDevice::readDataFromDevice(char *__restrict__ const Dst,
+                                      pocl_mem_identifier *SrcMemId,
+                                      size_t Size, size_t Offset) {
+
+  chunk_info_t *chunk = (chunk_info_t *)SrcMemId->mem_ptr;
+  POCL_MSG_PRINT_ALMAIF("Reading data with chunk start %zu, and offset %zu\n",
+                        chunk->start_address, Offset);
+  size_t Src = chunk->start_address + Offset;
+  if (DataMemory->isInRange(Src)) {
+    POCL_MSG_PRINT_ALMAIF("almaif: Copying %zu bytes from 0x%zx\n", Size, Src);
+    DataMemory->CopyFromMMAP(Dst, Src, Size);
+  } else if (ExternalMemory && ExternalMemory->isInRange(Src)) {
+    POCL_MSG_PRINT_ALMAIF("almaif: Copying %zu bytes from external 0x%zx\n",
+                          Size, Src);
+    ExternalMemory->CopyFromMMAP(Dst, Src, Size);
   } else {
-    POCL_ABORT("Attempt to write data to outside the device memories\n");
+    POCL_ABORT(
+        "Attempt to read data from outside the device memories. Address=%zu\n",
+        Src);
   }
 }
+
+size_t AlmaIFDevice::pointerDeviceOffset(pocl_mem_identifier *P) {
+  assert(P->extra == 0);
+  chunk_info_t *chunk = (chunk_info_t *)P->mem_ptr;
+  assert(chunk != NULL);
+  return chunk->start_address;
+}
+
+void AlmaIFDevice::freeBuffer(pocl_mem_identifier *P) {
+  chunk_info_t *chunk = (chunk_info_t *)P->mem_ptr;
+
+  POCL_MSG_PRINT_MEMORY("almaif: freed buffer from 0x%zx\n",
+                        chunk->start_address);
+
+  assert(chunk != NULL);
+  pocl_free_chunk(chunk);
+}
+
+cl_int AlmaIFDevice::allocateBuffer(pocl_mem_identifier *P, size_t Size) {
+
+  assert(P->mem_ptr == NULL);
+  chunk_info_t *chunk = NULL;
+
+  chunk = pocl_alloc_buffer(AllocRegions, Size);
+  if (chunk == NULL)
+    return CL_MEM_OBJECT_ALLOCATION_FAILURE;
+
+  POCL_MSG_PRINT_MEMORY("almaif: allocated %zu bytes from 0x%zx\n", Size,
+                        chunk->start_address);
+
+  P->mem_ptr = chunk;
+  P->version = 0;
+  P->extra = 0;
+  return CL_SUCCESS;
+}
diff --git a/lib/CL/devices/almaif/AlmaIFDevice.hh b/lib/CL/devices/almaif/AlmaIFDevice.hh
index ccbfc161d..af17a8188 100644
--- a/lib/CL/devices/almaif/AlmaIFDevice.hh
+++ b/lib/CL/devices/almaif/AlmaIFDevice.hh
@@ -32,6 +32,7 @@
 #include "pocl_types.h"
 
 #include <stdlib.h>
+#include <string>
 
 struct almaif_kernel_data_s;
 
@@ -40,8 +41,8 @@ public:
   AlmaIFDevice();
   virtual ~AlmaIFDevice();
 
-  virtual void loadProgramToDevice(almaif_kernel_data_s *kd, cl_kernel kernel,
-                                   _cl_command_node *cmd);
+  virtual void loadProgramToDevice(almaif_kernel_data_s *KernelData,
+                                   cl_kernel Kernel, _cl_command_node *Command);
 
   AlmaIFRegion *ControlMemory;
   AlmaIFRegion *InstructionMemory;
@@ -63,22 +64,35 @@ public:
 
   void printMemoryDump();
 
-  virtual void writeDataToDevice(size_t dst, const char *__restrict__ const src,
-                                 size_t size);
-  virtual void readDataFromDevice(char *__restrict__ const dst, size_t src,
-                                  size_t size);
+  virtual void writeDataToDevice(pocl_mem_identifier *DstMemId,
+                                 const char *__restrict__ const Src,
+                                 size_t Size, size_t Offset);
+  virtual void readDataFromDevice(char *__restrict__ const Dst,
+                                  pocl_mem_identifier *SrcMemId, size_t Size,
+                                  size_t Offset);
 
-protected:
   virtual void discoverDeviceParameters();
-  uintptr_t imem_start;
-  uint32_t imem_size;
-  uintptr_t cq_start;
-  uint32_t cq_size;
-  uintptr_t dmem_start;
-  uint32_t dmem_size;
+
+  virtual bool isDBDevice() { return false; }
+
+  // Allocate buffer from AlmaIFDevice's DataMemory or ExternalMemory
+  virtual cl_int allocateBuffer(pocl_mem_identifier *P, size_t Size);
+  // Free buffer from AlmaIFDevice's DataMemory or ExternalMemory
+  virtual void freeBuffer(pocl_mem_identifier *P);
+  // Retuns the offset of the allocated buffer, to be used as a kernel argument
+  virtual size_t pointerDeviceOffset(pocl_mem_identifier *P);
+
+protected:
+  uintptr_t ImemStart;
+  uint32_t ImemSize;
+  uintptr_t CQStart;
+  uint32_t CQSize;
+  uintptr_t DmemStart;
+  uint32_t DmemSize;
 
 private:
-  void preread_images(const char *kernel_cachedir, almaif_kernel_data_s *kd);
+  void prereadImages(const std::string &KernelCacheDir,
+                     almaif_kernel_data_s *KernelData);
 };
 
 #endif
diff --git a/lib/CL/devices/almaif/AlmaIFRegion.cc b/lib/CL/devices/almaif/AlmaIFRegion.cc
index 79832bda4..6e197f6c3 100644
--- a/lib/CL/devices/almaif/AlmaIFRegion.cc
+++ b/lib/CL/devices/almaif/AlmaIFRegion.cc
@@ -27,5 +27,9 @@
 AlmaIFRegion::~AlmaIFRegion() {}
 
 bool AlmaIFRegion::isInRange(size_t dst) {
-  return ((dst >= PhysAddress) && (dst < (PhysAddress + Size)));
+  return ((dst >= PhysAddress_) && (dst < (PhysAddress_ + Size_)));
 }
+
+size_t AlmaIFRegion::PhysAddress() { return PhysAddress_; }
+
+size_t AlmaIFRegion::Size() { return Size_; }
diff --git a/lib/CL/devices/almaif/AlmaIFRegion.hh b/lib/CL/devices/almaif/AlmaIFRegion.hh
index 7c167709e..cc0cea825 100644
--- a/lib/CL/devices/almaif/AlmaIFRegion.hh
+++ b/lib/CL/devices/almaif/AlmaIFRegion.hh
@@ -44,9 +44,12 @@ public:
   virtual void CopyInMem(size_t source, size_t destination, size_t bytes) = 0;
 
   virtual bool isInRange(size_t dst);
+  virtual size_t PhysAddress();
+  virtual size_t Size();
 
-  size_t PhysAddress;
-  size_t Size;
+protected:
+  size_t PhysAddress_;
+  size_t Size_;
 };
 
 #endif
diff --git a/lib/CL/devices/almaif/AlmaifCompile.cc b/lib/CL/devices/almaif/AlmaifCompile.cc
index 9066fc7d9..a8181b93d 100644
--- a/lib/CL/devices/almaif/AlmaifCompile.cc
+++ b/lib/CL/devices/almaif/AlmaifCompile.cc
@@ -37,12 +37,13 @@
 #endif
 
 #ifdef ENABLE_COMPILER
-#include "openasip/AlmaifCompileTCE.hh"
+#include "openasip/AlmaifCompileOpenasip.hh"
 #endif
 
 extern int pocl_offline_compile;
 
-int pocl_almaif_compile_init(unsigned j, cl_device_id dev, const char *parameters) {
+int pocl_almaif_compile_init(unsigned j, cl_device_id dev,
+                             const std::string &parameters) {
   AlmaifData *d = (AlmaifData *)dev->data;
 
   d->compilationData = (compilation_data_t *)pocl_aligned_malloc(
@@ -86,8 +87,7 @@ int pocl_almaif_compile_init(unsigned j, cl_device_id dev, const char *parameter
   d->compilationData->current_kernel = NULL;
   SETUP_DEVICE_CL_VERSION(1, 2);
 
-  // dev->available = CL_TRUE;
-  dev->available = pocl_offline_compile ? CL_FALSE : CL_TRUE;
+  d->Available = pocl_offline_compile ? CL_FALSE : CL_TRUE;
 
   dev->compiler_available = true;
   dev->linker_available = true;
@@ -97,12 +97,12 @@ int pocl_almaif_compile_init(unsigned j, cl_device_id dev, const char *parameter
 
 #ifdef ENABLE_COMPILER
   // TODO tce specific
-  adi->initialize_device = pocl_almaif_tce_initialize;
-  adi->cleanup_device = pocl_almaif_tce_cleanup;
-  adi->compile_kernel = pocl_almaif_tce_compile;
+  adi->initialize_device = pocl_almaif_openasip_initialize;
+  adi->cleanup_device = pocl_almaif_openasip_cleanup;
+  adi->compile_kernel = pocl_almaif_openasip_compile;
   if (pocl_get_bool_option("POCL_ALMAIF_STANDALONE", 0)) {
     adi->produce_standalone_program =
-        pocl_almaif_tce_produce_standalone_program;
+        pocl_almaif_openasip_produce_standalone_program;
   }
   // backend specific init
   POCL_MSG_PRINT_ALMAIF("Starting device specific initializion\n");
@@ -111,8 +111,8 @@ int pocl_almaif_compile_init(unsigned j, cl_device_id dev, const char *parameter
   POCL_MSG_PRINT_ALMAIF("Device specific initializion done\n");
 
   SHA1_digest_t digest;
-  pocl_almaif_tce_device_hash(parameters, dev->llvm_target_triplet,
-                              (char *)digest);
+  pocl_almaif_openasip_device_hash(parameters.c_str(), dev->llvm_target_triplet,
+                                   (char *)digest);
   POCL_MSG_PRINT_ALMAIF("ALMAIF TCE DEVICE HASH=%s", (char *)digest);
   adi->build_hash = strdup((char *)digest);
 
@@ -134,8 +134,8 @@ int pocl_almaif_compile_init(unsigned j, cl_device_id dev, const char *parameter
   dev->ops->build_poclbinary = pocl_driver_build_poclbinary;
   dev->ops->build_binary = pocl_almaif_build_binary;
 #ifdef ENABLE_COMPILER
-  dev->ops->compile_kernel = pocl_almaif_tce_compile;
-  dev->ops->init_build = pocl_tce_init_build;
+  dev->ops->compile_kernel = pocl_almaif_openasip_compile;
+  dev->ops->init_build = pocl_almaif_openasip_init_build;
 #endif
   return CL_SUCCESS;
 }
diff --git a/lib/CL/devices/almaif/AlmaifCompile.hh b/lib/CL/devices/almaif/AlmaifCompile.hh
index bf8c1b85b..213d2820b 100644
--- a/lib/CL/devices/almaif/AlmaifCompile.hh
+++ b/lib/CL/devices/almaif/AlmaifCompile.hh
@@ -47,7 +47,7 @@ typedef struct compilation_data_s {
   /* device-specific callbacks */
   void (*compile_kernel)(_cl_command_node *cmd, cl_kernel kernel,
                          cl_device_id device, int specialize);
-  int (*initialize_device)(cl_device_id device, const char *parameters);
+  int (*initialize_device)(cl_device_id device, const std::string &parameters);
   int (*cleanup_device)(cl_device_id device);
 
   void (*produce_standalone_program)(AlmaifData *D, _cl_command_node *cmd,
@@ -70,7 +70,8 @@ typedef struct almaif_kernel_data_s {
   uint32_t kernel_md_offset;
 } almaif_kernel_data_t;
 
-int pocl_almaif_compile_init(unsigned j, cl_device_id dev, const char *parameters);
+int pocl_almaif_compile_init(unsigned j, cl_device_id dev,
+                             const std::string &parameters);
 cl_int pocl_almaif_compile_uninit(unsigned j, cl_device_id dev);
 
 extern "C" {
@@ -85,8 +86,6 @@ int pocl_almaif_free_kernel(cl_device_id device, cl_program program,
 int pocl_almaif_build_binary(cl_program program, cl_uint device_i,
                              int link_program, int spir_build);
 
-void preread_images(const char *kernel_cachedir, void *d_void,
-                    almaif_kernel_data_t *kd);
 char *pocl_almaif_compile_build_hash(cl_device_id device);
 
 #endif
diff --git a/lib/CL/devices/almaif/AlmaifDB/AlmaIFBitstreamDatabaseManager.cc b/lib/CL/devices/almaif/AlmaifDB/AlmaIFBitstreamDatabaseManager.cc
new file mode 100644
index 000000000..42c3bec9a
--- /dev/null
+++ b/lib/CL/devices/almaif/AlmaifDB/AlmaIFBitstreamDatabaseManager.cc
@@ -0,0 +1,298 @@
+/* AlmaIFBitstreamDatabaseManager.cc - Parses and responds to queries about
+   AlmaifDB
+
+   Copyright (c) 2023 Topi Leppänen / Tampere University
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to
+   deal in the Software without restriction, including without limitation the
+   rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+   sell copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+   IN THE SOFTWARE.
+*/
+
+#include "AlmaIFBitstreamDatabaseManager.hh"
+
+#include "../AlmaifShared.hh"
+
+#include "pocl_file_util.h"
+
+#include <dirent.h>
+#include <iostream>
+#include <set>
+
+#include "tiny-json.h"
+
+void AlmaIFBitstreamDatabaseManager::parseOverlay(json_t const *Overlay,
+                                                  const std::string &DBPath) {
+
+  json_t const *OverlayName = json_getProperty(Overlay, "name");
+  if (!OverlayName || JSON_TEXT != json_getType(OverlayName)) {
+    POCL_ABORT("Error, the overlay name property is not found.");
+  }
+  std::string OverlayNameStr = json_getValue(OverlayName);
+  POCL_MSG_PRINT_ALMAIF("Overlay Name: %s.\n", OverlayNameStr.c_str());
+
+  std::string OverlayPath = DBPath + "/" + OverlayNameStr;
+
+  json_t const *PrDevice = json_getProperty(Overlay, "device");
+  if (!PrDevice || JSON_TEXT != json_getType(PrDevice)) {
+    POCL_ABORT("Overlay doesn't have associated device\n");
+  }
+  std::string PrDeviceName = json_getValue(PrDevice);
+  POCL_MSG_PRINT_ALMAIF("PR device name: %s\n", PrDeviceName.c_str());
+  DEVICE_TYPE PrDeviceEnum = string2DeviceTypeEnum(PrDeviceName);
+  POCL_MSG_PRINT_ALMAIF("PR device enum: %d\n", PrDeviceEnum);
+
+  struct ProgrammingFiles ProgFilesInfo = {0, PrDeviceEnum, "", "", ""};
+
+  json_t const *OverlayDefaultFilename = json_getProperty(Overlay, "filename");
+  if (!OverlayDefaultFilename ||
+      JSON_TEXT != json_getType(OverlayDefaultFilename)) {
+    POCL_ABORT("Error, the overlay default filename property is not found.");
+  }
+  std::string OverlayDefaultFilenameStr = json_getValue(OverlayDefaultFilename);
+
+  std::string OverlayDefaultFilenamePath =
+      OverlayPath + "/" + OverlayDefaultFilenameStr;
+
+  POCL_MSG_PRINT_ALMAIF("Overlay default filename path: %s.\n",
+                        OverlayDefaultFilenamePath.c_str());
+  DefaultFilenamePath_ = OverlayDefaultFilenamePath;
+
+  json_t const *OverlayDefaultKernelName =
+      json_getProperty(Overlay, "default-kernel");
+  if (!OverlayDefaultKernelName ||
+      JSON_TEXT != json_getType(OverlayDefaultKernelName)) {
+    POCL_ABORT("Error, the overlay default kernel name property is not found.");
+  }
+  std::string OverlayDefaultKernelNameStr =
+      json_getValue(OverlayDefaultKernelName);
+  DefaultKernelName_ = OverlayDefaultKernelNameStr;
+
+  json_t const *OverlayExternalMemory =
+      json_getProperty(Overlay, "external-memory");
+  if (!OverlayExternalMemory ||
+      JSON_TEXT != json_getType(OverlayExternalMemory)) {
+    POCL_ABORT("Error, the overlay external-memory property is not found.");
+  }
+  DeviceExternalMemParameters_[PrDeviceEnum] =
+      json_getValue(OverlayExternalMemory);
+
+  json_t const *Accelerators = json_getProperty(Overlay, "accelerators");
+  if (!Accelerators || JSON_ARRAY != json_getType(Accelerators)) {
+    POCL_ABORT("Error, accelerators list not parsed\n");
+  }
+
+  json_t const *Accel;
+  for (Accel = json_getChild(Accelerators); Accel != 0;
+       Accel = json_getSibling(Accel)) {
+    parseAccelerator(Accel, ProgFilesInfo, OverlayPath);
+  }
+}
+
+void AlmaIFBitstreamDatabaseManager::parseAccelerator(
+    json_t const *Accel, struct ProgrammingFiles &ProgFilesInfo,
+    const std::string &OverlayPath) {
+
+  json_t const *AccelNameJs = json_getProperty(Accel, "name");
+  if (!AccelNameJs || JSON_TEXT != json_getType(AccelNameJs)) {
+    POCL_ABORT("Partial bitstream doesn't have a name\n");
+  }
+  ProgFilesInfo.KernelName = json_getValue(AccelNameJs);
+  POCL_MSG_PRINT_ALMAIF("PR device name: %s\n",
+                        ProgFilesInfo.KernelName.c_str());
+
+  std::string AcceleratorPath =
+      OverlayPath + "/accelerators/" + ProgFilesInfo.KernelName;
+
+  json_t const *PrBitstream = json_getProperty(Accel, "filename");
+  if (!PrBitstream || JSON_TEXT != json_getType(PrBitstream)) {
+    POCL_ABORT("Partial bitstream filename parsing failed\n");
+  }
+
+  std::string PrBitstreamPath =
+      AcceleratorPath + "/" + json_getValue(PrBitstream);
+  POCL_MSG_PRINT_ALMAIF("Arria device pr file %s\n", PrBitstreamPath.c_str());
+  ProgFilesInfo.BitstreamPath = PrBitstreamPath;
+
+  json_t const *Firmwares = json_getProperty(Accel, "firmwares");
+  if (!Firmwares || JSON_ARRAY != json_getType(Firmwares)) {
+    POCL_ABORT("Error, firmwares not found\n");
+  }
+  for (json_t const *Firmware = json_getChild(Firmwares); Firmware != 0;
+       Firmware = json_getSibling(Firmware)) {
+    parseFirmware(Firmware, ProgFilesInfo, AcceleratorPath);
+  }
+}
+
+void AlmaIFBitstreamDatabaseManager::parseFirmware(
+    json_t const *Firmware, struct ProgrammingFiles &ProgFilesInfo,
+    const std::string &AcceleratorPath) {
+  json_t const *FirmwarePath = json_getProperty(Firmware, "filename");
+  if (!FirmwarePath || JSON_TEXT != json_getType(FirmwarePath)) {
+    POCL_ABORT("Error, firmware filepath not found from json\n");
+  }
+  std::string FirmwarePathStr =
+      AcceleratorPath + "/firmwares/" + json_getValue(FirmwarePath);
+  ProgFilesInfo.FirmwarePath = FirmwarePathStr;
+
+  json_t const *BiKernels = json_getProperty(Firmware, "builtin-kernels");
+  if (!BiKernels || JSON_ARRAY != json_getType(BiKernels)) {
+    POCL_ABORT("Error, builtin kernels not found\n");
+  }
+  json_t const *Bik;
+  for (Bik = json_getChild(BiKernels); Bik != 0; Bik = json_getSibling(Bik)) {
+    parseBIKernels(Bik, ProgFilesInfo);
+  }
+}
+
+void AlmaIFBitstreamDatabaseManager::parseBIKernels(
+    json_t const *Bik, struct ProgrammingFiles &ProgFilesInfo) {
+  if (JSON_INTEGER != json_getType(Bik)) {
+    POCL_ABORT("Error, Builtin kernel id is wrong type\n");
+  }
+  int64_t BikIDLong = json_getInteger(Bik);
+  assert(BikIDLong < 0xFFFF);
+
+  BuiltinKernelId BikID = (BuiltinKernelId)BikIDLong;
+  SupportedBIKernels_[ProgFilesInfo.FpgaType].push_back(
+      {BikID, ProgFilesInfo.FpgaType, ProgFilesInfo.BitstreamPath,
+       ProgFilesInfo.FirmwarePath, ProgFilesInfo.KernelName});
+  POCL_MSG_PRINT_ALMAIF(
+      "Found support for builtin kernel %d with fw path: %s\n", BikID,
+      ProgFilesInfo.FirmwarePath.c_str());
+}
+
+AlmaIFBitstreamDatabaseManager::AlmaIFBitstreamDatabaseManager(
+    const std::string &DBPath) {
+
+  std::string DBFile = DBPath;
+
+  DIR *dp;
+  struct dirent *dirp;
+  if ((dp = opendir(DBPath.c_str())) == NULL) {
+    POCL_ABORT("Failed opening the Almaif db directory\n");
+  }
+  while ((dirp = readdir(dp)) != NULL) {
+    std::string OverlayFolderName = dirp->d_name;
+    if (OverlayFolderName.find("overlay") != std::string::npos) {
+      POCL_MSG_PRINT_ALMAIF("Found overlay dir %s\n",
+                            OverlayFolderName.c_str());
+      std::string BitstreamDatabaseIndexPath =
+          DBFile + "/" + OverlayFolderName + "/db.json";
+
+      uint64_t Size = 0;
+      char *BitstreamDatabaseIndex = NULL;
+      pocl_read_file(BitstreamDatabaseIndexPath.c_str(),
+                     &BitstreamDatabaseIndex, &Size);
+      POCL_MSG_PRINT_ALMAIF("Read file size=%lld\n", Size);
+
+      POCL_MSG_PRINT_ALMAIF("DATABASE FILE %s:\n",
+                            BitstreamDatabaseIndexPath.c_str());
+      POCL_MSG_PRINT_ALMAIF("%s\n", BitstreamDatabaseIndex);
+      POCL_MSG_PRINT_ALMAIF("DATABASE FILE END\n");
+
+      json_t Mem[256];
+      json_t const *t =
+          json_create(BitstreamDatabaseIndex, Mem, sizeof(Mem) / sizeof(*Mem));
+      if (!t) {
+        POCL_ABORT("Failed opening AlmaifDB as json object\n");
+      }
+      parseOverlay(t, DBPath);
+    }
+  }
+}
+
+AlmaIFBitstreamDatabaseManager::~AlmaIFBitstreamDatabaseManager() {}
+
+AlmaIFBitstreamDatabaseManager::DEVICE_TYPE
+AlmaIFBitstreamDatabaseManager::string2DeviceTypeEnum(const std::string &Str) {
+  unsigned int Len = Str.length();
+  std::string StringToConvert = Str;
+  for (int i = 0; i < Len; i++) {
+    StringToConvert[i] = tolower(Str[i]);
+  }
+
+  for (int j = 0; j < sizeof(Conversion) / sizeof(Conversion[0]); ++j)
+    if (StringToConvert == Conversion[j].Str) {
+      return Conversion[j].Val;
+    } else {
+      POCL_MSG_PRINT_ALMAIF(
+          "String-to-enum. String:%s, comparing:%s, lengths:%d,%d\n",
+          Conversion[j].Str.c_str(), StringToConvert.c_str(),
+          Conversion[j].Str.length(), StringToConvert.length());
+    }
+  POCL_ABORT("Almaif DB device string to enum conversion failed. String %s\n",
+             Str.c_str());
+}
+
+std::string
+AlmaIFBitstreamDatabaseManager::deviceTypeEnum2String(DEVICE_TYPE DeviceType) {
+  for (int j = 0; j < sizeof(Conversion) / sizeof(Conversion[0]); ++j) {
+    if (DeviceType == Conversion[j].Val) {
+      return Conversion[j].Str;
+    }
+  }
+  POCL_ABORT("Almaif DB device enum to string conversion failed");
+}
+
+const AlmaIFBitstreamDatabaseManager::ProgrammingFiles &
+AlmaIFBitstreamDatabaseManager::getBitstreamFile(BuiltinKernelId BikID,
+                                                 DEVICE_TYPE UsedDeviceType) {
+
+  for (const ProgrammingFiles &Iter : SupportedBIKernels_[UsedDeviceType]) {
+    if (Iter.BikID == BikID) {
+      return Iter;
+    }
+  }
+  POCL_ABORT("Built in kernel %d bitstream not found\n", BikID);
+}
+
+const AlmaIFBitstreamDatabaseManager::ProgrammingFiles &
+AlmaIFBitstreamDatabaseManager::getFirmwareFile(BuiltinKernelId BikID,
+                                                DEVICE_TYPE UsedDeviceType) {
+
+  for (const ProgrammingFiles &Iter : SupportedBIKernels_[UsedDeviceType]) {
+    if (Iter.BikID == BikID) {
+      return Iter;
+    }
+  }
+  POCL_ABORT("Built in kernel %d firmware not found\n", BikID);
+}
+
+std::vector<BuiltinKernelId>
+AlmaIFBitstreamDatabaseManager::supportedBuiltinKernels(
+    DEVICE_TYPE UsedDeviceType) {
+
+  std::vector<BuiltinKernelId> Output;
+  for (const ProgrammingFiles &Iter : SupportedBIKernels_[UsedDeviceType]) {
+    Output.push_back((BuiltinKernelId)Iter.BikID);
+  }
+  return Output;
+}
+
+std::string AlmaIFBitstreamDatabaseManager::externalMemoryParameters(
+    DEVICE_TYPE UsedDeviceType) {
+
+  return DeviceExternalMemParameters_[UsedDeviceType];
+}
+
+std::string AlmaIFBitstreamDatabaseManager::defaultBitstream() {
+  return DefaultFilenamePath_;
+}
+
+std::string AlmaIFBitstreamDatabaseManager::defaultKernelName() {
+  return DefaultKernelName_;
+}
diff --git a/lib/CL/devices/almaif/AlmaifDB/AlmaIFBitstreamDatabaseManager.hh b/lib/CL/devices/almaif/AlmaifDB/AlmaIFBitstreamDatabaseManager.hh
new file mode 100644
index 000000000..324821b9c
--- /dev/null
+++ b/lib/CL/devices/almaif/AlmaifDB/AlmaIFBitstreamDatabaseManager.hh
@@ -0,0 +1,101 @@
+/* AlmaIFBitstreamDatabaseManager.hh - Parses and responds to queries about
+   AlmaifDB
+
+   Copyright (c) 2023 Topi Leppänen / Tampere University
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to
+   deal in the Software without restriction, including without limitation the
+   rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+   sell copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+   IN THE SOFTWARE.
+*/
+
+#ifndef POCL_ALMAIFBITSTREAMDATABASEMANAGER_H
+#define POCL_ALMAIFBITSTREAMDATABASEMANAGER_H
+
+#include "builtin_kernels.hh"
+
+#include <map>
+#include <string>
+
+typedef struct json_s json_t;
+
+// A helper class used by DBDevice to parse the bitstream database.
+// This class can be thought to be the interface from C++ to the JSON-based
+// database. After parsing, the DBDevice will query this class
+// for information about the bitstream database.
+//
+// Since the AFOCL bitstream database format is still very experimental, there
+// is no fixed specification for it. Therefore, this class defines the format
+// since it is responsible for parsing the database.
+// While having a clear, versioned format would obviously be the best solution,
+// maintaining a separate specification at this point would risk it
+// going out-of-date. You can see the separate AFOCL-project for examples of
+// the current database format.
+class AlmaIFBitstreamDatabaseManager {
+public:
+  AlmaIFBitstreamDatabaseManager(const std::string &DBPath);
+  virtual ~AlmaIFBitstreamDatabaseManager();
+
+  enum DEVICE_TYPE { ARRIA10, ALVEOU280 };
+
+  DEVICE_TYPE string2DeviceTypeEnum(const std::string &Str);
+  std::string deviceTypeEnum2String(DEVICE_TYPE DeviceType);
+
+  struct ProgrammingFiles {
+    int BikID;
+    DEVICE_TYPE FpgaType;
+    std::string BitstreamPath;
+    std::string FirmwarePath;
+    std::string KernelName;
+  };
+  const ProgrammingFiles &getBitstreamFile(BuiltinKernelId BikID,
+                                           DEVICE_TYPE UsedDeviceType);
+  const ProgrammingFiles &getFirmwareFile(BuiltinKernelId BikID,
+                                          DEVICE_TYPE UsedDeviceType);
+
+  std::vector<BuiltinKernelId>
+  supportedBuiltinKernels(DEVICE_TYPE UsedDeviceType);
+  std::string externalMemoryParameters(DEVICE_TYPE UsedDeviceType);
+  std::string defaultBitstream();
+  std::string defaultKernelName();
+
+private:
+  void parseDB(json_t const *DB, const std::string &DBPath);
+  void parseOverlay(json_t const *Overlay, const std::string &DBPath);
+  void parseAccelerator(json_t const *Accel,
+                        struct ProgrammingFiles &ProgFilesInfo,
+                        const std::string &OverlayPath);
+  void parseFirmware(json_t const *Firmware,
+                     struct ProgrammingFiles &ProgFilesInfo,
+                     const std::string &AcceleratorPath);
+  void parseBIKernels(json_t const *Bik,
+                      struct ProgrammingFiles &ProgFilesInfo);
+
+  const struct {
+    DEVICE_TYPE Val;
+    const std::string Str;
+  } Conversion[2] = {
+      {ARRIA10, "arria10"},
+      {ALVEOU280, "alveou280"},
+  };
+
+  std::map<DEVICE_TYPE, std::vector<ProgrammingFiles>> SupportedBIKernels_;
+  std::map<DEVICE_TYPE, std::string> DeviceExternalMemParameters_;
+  std::string DefaultFilenamePath_;
+  std::string DefaultKernelName_;
+};
+
+#endif
diff --git a/lib/CL/devices/almaif/AlmaifDB/DBDevice.cc b/lib/CL/devices/almaif/AlmaifDB/DBDevice.cc
new file mode 100644
index 000000000..5097d33ee
--- /dev/null
+++ b/lib/CL/devices/almaif/AlmaifDB/DBDevice.cc
@@ -0,0 +1,167 @@
+/* DBDevice.cc - Device based on parsing Almaif database and instantiating
+ *               other device types based on what it finds from there
+
+   Copyright (c) 2023 Topi Leppänen / Tampere University
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to
+   deal in the Software without restriction, including without limitation the
+   rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+   sell copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+   IN THE SOFTWARE.
+*/
+
+#include "DBDevice.hh"
+
+#include "../AlmaifShared.hh"
+
+#ifdef HAVE_XRT
+#include "../XilinxXrtDevice.hh"
+#include "../XilinxXrtRegion.hh"
+#endif
+
+DBDevice::DBDevice(const std::string &DBPath) : DB_(DBPath) {
+
+  POCL_MSG_PRINT_INFO("Starting bitstream database device initialization");
+
+  bool UseAlveoDevice = false;
+  bool UseIntelDevice = false;
+  if (pocl_is_option_set("XILINX_XRT")) {
+    UseAlveoDevice = true;
+    UsedDeviceType_ = AlmaIFBitstreamDatabaseManager::DEVICE_TYPE::ALVEOU280;
+  }
+  if (pocl_is_option_set("INTEL_ACL")) {
+    UseIntelDevice = true;
+    UsedDeviceType_ = AlmaIFBitstreamDatabaseManager::DEVICE_TYPE::ARRIA10;
+  }
+  if (UseAlveoDevice && UseIntelDevice) {
+    POCL_ABORT("AlmaIF: DBDevice only supports one vendor FPGA at the time\n");
+  }
+
+  std::string ExternalMemParams = DB_.externalMemoryParameters(UsedDeviceType_);
+
+  if (UseAlveoDevice) {
+    Dev_ = new XilinxXrtDevice(DB_.defaultKernelName(), DB_.defaultBitstream(),
+                               ExternalMemParams, 0);
+  } else if (UseIntelDevice) {
+    POCL_ABORT_UNIMPLEMENTED("AlmaIF intel device not implemented\n");
+  } else {
+    POCL_ABORT("AlmaIF: DBDevice didn't find any vendor FPGAs\n");
+  }
+
+  ControlMemory = Dev_->ControlMemory;
+  InstructionMemory = Dev_->InstructionMemory;
+  CQMemory = Dev_->CQMemory;
+  DataMemory = Dev_->DataMemory;
+  RelativeAddressing = Dev_->RelativeAddressing;
+  HasHardwareClock = Dev_->HasHardwareClock;
+  HwClockFrequency = Dev_->HwClockFrequency;
+  PointerSize = Dev_->PointerSize;
+  ExternalMemory = Dev_->ExternalMemory;
+  AllocRegions = Dev_->AllocRegions;
+}
+
+DBDevice::~DBDevice() { delete Dev_; }
+
+void DBDevice::programBIKernelBitstream(BuiltinKernelId BikID) {
+
+  const AlmaIFBitstreamDatabaseManager::ProgrammingFiles &BitstreamToProgram =
+      DB_.getBitstreamFile(BikID, UsedDeviceType_);
+  std::string BitstreamPath = BitstreamToProgram.BitstreamPath;
+  std::string KernelName = BitstreamToProgram.KernelName;
+
+  if (BitstreamPath == LoadedBitstreamPath_) {
+    return;
+  }
+
+  POCL_MSG_PRINT_ALMAIF("Programming built-in kernel %s bitstream from: %s\n",
+                        KernelName.c_str(), BitstreamPath.c_str());
+  if (UsedDeviceType_ ==
+      AlmaIFBitstreamDatabaseManager::DEVICE_TYPE::ALVEOU280) {
+    ((XilinxXrtDevice *)Dev_)
+        ->programBitstream(KernelName.c_str(), BitstreamPath.c_str(), 0);
+  } else if (UsedDeviceType_ ==
+             AlmaIFBitstreamDatabaseManager::DEVICE_TYPE::ARRIA10) {
+    POCL_ABORT_UNIMPLEMENTED("AlmaIF intel device not implemented\n");
+  } else {
+    POCL_ABORT("Almaif neither device activated\n");
+  }
+
+  LoadedBitstreamPath_ = BitstreamPath;
+}
+
+void DBDevice::programBIKernelFirmware(BuiltinKernelId BikID) {
+
+  const AlmaIFBitstreamDatabaseManager::ProgrammingFiles &BitstreamToProgram =
+      DB_.getFirmwareFile(BikID, UsedDeviceType_);
+  std::string FirmwarePath = BitstreamToProgram.FirmwarePath;
+
+  if (FirmwarePath == LoadedFirmwarePath_) {
+    return;
+  }
+  POCL_MSG_PRINT_ALMAIF("Programming built-in kernel firmware from: %s\n",
+                        FirmwarePath.c_str());
+
+  ControlMemory->Write32(ALMAIF_CONTROL_REG_COMMAND, ALMAIF_RESET_CMD);
+
+  if (UsedDeviceType_ ==
+      AlmaIFBitstreamDatabaseManager::DEVICE_TYPE::ALVEOU280) {
+    ((XilinxXrtRegion *)InstructionMemory)->initRegion(FirmwarePath.c_str());
+  } else if (UsedDeviceType_ ==
+             AlmaIFBitstreamDatabaseManager::DEVICE_TYPE::ARRIA10) {
+    POCL_ABORT_UNIMPLEMENTED("AlmaIF intel device not implemented\n");
+  } else {
+    POCL_ABORT("Neither device activated\n");
+  }
+  ControlMemory->Write32(ALMAIF_CONTROL_REG_COMMAND, ALMAIF_CONTINUE_CMD);
+
+  POCL_MSG_PRINT_ALMAIF("Programming done");
+  LoadedFirmwarePath_ = FirmwarePath;
+}
+
+void DBDevice::loadProgramToDevice(almaif_kernel_data_s *KernelData,
+                                   cl_kernel Kernel,
+                                   _cl_command_node *Command) {
+  Dev_->loadProgramToDevice(KernelData, Kernel, Command);
+}
+
+void DBDevice::printMemoryDump() { Dev_->printMemoryDump(); }
+
+void DBDevice::writeDataToDevice(pocl_mem_identifier *DstMemId,
+                                 const char *__restrict__ const Src,
+                                 size_t Size, size_t Offset) {
+  Dev_->writeDataToDevice(DstMemId, Src, Size, Offset);
+}
+
+void DBDevice::readDataFromDevice(char *__restrict__ const Dst,
+                                  pocl_mem_identifier *SrcMemId, size_t Size,
+                                  size_t Offset) {
+  Dev_->readDataFromDevice(Dst, SrcMemId, Size, Offset);
+}
+
+cl_int DBDevice::allocateBuffer(pocl_mem_identifier *P, size_t Size) {
+  Dev_->allocateBuffer(P, Size);
+}
+
+void DBDevice::freeBuffer(pocl_mem_identifier *P) { Dev_->freeBuffer(P); }
+
+size_t DBDevice::pointerDeviceOffset(pocl_mem_identifier *P) {
+  Dev_->pointerDeviceOffset(P);
+}
+
+void DBDevice::discoverDeviceParameters() { Dev_->discoverDeviceParameters(); }
+
+std::vector<BuiltinKernelId> DBDevice::supportedBuiltinKernels() {
+  return DB_.supportedBuiltinKernels(UsedDeviceType_);
+}
diff --git a/lib/CL/devices/almaif/AlmaifDB/DBDevice.hh b/lib/CL/devices/almaif/AlmaifDB/DBDevice.hh
new file mode 100644
index 000000000..0004fb636
--- /dev/null
+++ b/lib/CL/devices/almaif/AlmaifDB/DBDevice.hh
@@ -0,0 +1,80 @@
+/* DBDevice.hh - Device based on parsing Almaif database and instantiating
+ *               other device types based on what it finds from there
+
+   Copyright (c) 2023 Topi Leppänen / Tampere University
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to
+   deal in the Software without restriction, including without limitation the
+   rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+   sell copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+   IN THE SOFTWARE.
+*/
+
+#ifndef POCL_DBDEVICE_H
+#define POCL_DBDEVICE_H
+
+#include "../AlmaIFDevice.hh"
+#include "AlmaIFBitstreamDatabaseManager.hh"
+
+// A class that acts as an interface between the Almaif-driver
+// and the underlying FPGA device.
+// This class is FPGA vendor-agnostic AlmaIFDevice.
+// It instantiates a separate vendor-specific AlmaIFDevice-class.
+// Many of the class methods are simply forwarded as is to the
+// underlying vendor-specific AlmaIFDevice stored in the private
+// Dev_-variable.
+//
+// This class uses AlmaIFBitstreamManager-class to parse the
+// bitstream database, and to fetch the bitstream and firmware
+// filepaths from there.
+class DBDevice : public AlmaIFDevice {
+
+public:
+  DBDevice(const std::string &DBPath);
+  ~DBDevice();
+
+  virtual void loadProgramToDevice(almaif_kernel_data_s *KernelData,
+                                   cl_kernel Kernel, _cl_command_node *Command);
+  void printMemoryDump();
+  void writeDataToDevice(pocl_mem_identifier *DstMemId,
+                         const char *__restrict__ const Src, size_t Size,
+                         size_t Offset) override;
+  void readDataFromDevice(char *__restrict__ const Dst,
+                          pocl_mem_identifier *SrcMemId, size_t Size,
+                          size_t Offset) override;
+  cl_int allocateBuffer(pocl_mem_identifier *P, size_t Size) override;
+  void freeBuffer(pocl_mem_identifier *P) override;
+  size_t pointerDeviceOffset(pocl_mem_identifier *P) override;
+
+  virtual void programBIKernelFirmware(BuiltinKernelId BikID);
+  virtual void programBIKernelBitstream(BuiltinKernelId BikID);
+
+  virtual std::vector<BuiltinKernelId> supportedBuiltinKernels();
+  virtual void discoverDeviceParameters();
+
+  bool isDBDevice() override { return true; }
+
+protected:
+private:
+  AlmaIFBitstreamDatabaseManager DB_;
+  AlmaIFDevice *Dev_;
+
+  AlmaIFBitstreamDatabaseManager::DEVICE_TYPE UsedDeviceType_;
+
+  std::string LoadedBitstreamPath_ = "";
+  std::string LoadedFirmwarePath_ = "";
+};
+
+#endif
diff --git a/lib/CL/devices/almaif/AlmaifDB/tiny-json.c b/lib/CL/devices/almaif/AlmaifDB/tiny-json.c
new file mode 100644
index 000000000..795715c5e
--- /dev/null
+++ b/lib/CL/devices/almaif/AlmaifDB/tiny-json.c
@@ -0,0 +1,461 @@
+
+/*
+
+<https://github.com/rafagafe/tiny-json>
+
+  Licensed under the MIT License <http://opensource.org/licenses/MIT>.
+  SPDX-License-Identifier: MIT
+  Copyright (c) 2016-2018 Rafa Garcia <rafagarcia77@gmail.com>.
+
+  Permission is hereby  granted, free of charge, to any  person obtaining a copy
+  of this software and associated  documentation files (the "Software"), to deal
+  in the Software  without restriction, including without  limitation the rights
+  to  use, copy,  modify, merge,  publish, distribute,  sublicense, and/or  sell
+  copies  of  the Software,  and  to  permit persons  to  whom  the Software  is
+  furnished to do so, subject to the following conditions:
+
+  The above copyright notice and this permission notice shall be included in all
+  copies or substantial portions of the Software.
+
+  THE SOFTWARE  IS PROVIDED "AS  IS", WITHOUT WARRANTY  OF ANY KIND,  EXPRESS OR
+  IMPLIED,  INCLUDING BUT  NOT  LIMITED TO  THE  WARRANTIES OF  MERCHANTABILITY,
+  FITNESS FOR  A PARTICULAR PURPOSE AND  NONINFRINGEMENT. IN NO EVENT  SHALL THE
+  AUTHORS  OR COPYRIGHT  HOLDERS  BE  LIABLE FOR  ANY  CLAIM,  DAMAGES OR  OTHER
+  LIABILITY, WHETHER IN AN ACTION OF  CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE  OR THE USE OR OTHER DEALINGS IN THE
+  SOFTWARE.
+
+*/
+
+#include <string.h>
+#include <ctype.h>
+#include "tiny-json.h"
+
+/** Structure to handle a heap of JSON properties. */
+typedef struct jsonStaticPool_s {
+    json_t* mem;      /**< Pointer to array of json properties.      */
+    unsigned int qty; /**< Length of the array of json properties.   */
+    unsigned int nextFree;  /**< The index of the next free json property. */
+    jsonPool_t pool;
+} jsonStaticPool_t;
+
+/* Search a property by its name in a JSON object. */
+json_t const* json_getProperty( json_t const* obj, char const* property ) {
+    json_t const* sibling;
+    for( sibling = obj->u.c.child; sibling; sibling = sibling->sibling )
+        if ( sibling->name && !strcmp( sibling->name, property ) )
+            return sibling;
+    return 0;
+}
+
+/* Search a property by its name in a JSON object and return its value. */
+char const* json_getPropertyValue( json_t const* obj, char const* property ) {
+	json_t const* field = json_getProperty( obj, property );
+	if ( !field ) return 0;
+        jsonType_t type = json_getType( field );
+        if ( JSON_ARRAY >= type ) return 0;
+	return json_getValue( field );
+}
+
+/* Internal prototypes: */
+static char* goBlank( char* str );
+static char* goNum( char* str );
+static json_t* poolInit( jsonPool_t* pool );
+static json_t* poolAlloc( jsonPool_t* pool );
+static char* objValue( char* ptr, json_t* obj, jsonPool_t* pool );
+static char* setToNull( char* ch );
+static bool isEndOfPrimitive( char ch );
+
+/* Parse a string to get a json. */
+json_t const* json_createWithPool( char *str, jsonPool_t *pool ) {
+    char* ptr = goBlank( str );
+    if ( !ptr || (*ptr != '{' && *ptr != '[') ) return 0;
+    json_t* obj = pool->init( pool );
+    obj->name    = 0;
+    obj->sibling = 0;
+    obj->u.c.child = 0;
+    ptr = objValue( ptr, obj, pool );
+    if ( !ptr ) return 0;
+    return obj;
+}
+
+/* Parse a string to get a json. */
+json_t const* json_create( char* str, json_t mem[], unsigned int qty ) {
+    jsonStaticPool_t spool;
+    spool.mem = mem;
+    spool.qty = qty;
+    spool.pool.init = poolInit;
+    spool.pool.alloc = poolAlloc;
+    return json_createWithPool( str, &spool.pool );
+}
+
+/** Get a special character with its escape character. Examples:
+  * 'b' -> '\\b', 'n' -> '\\n', 't' -> '\\t'
+  * @param ch The escape character.
+  * @retval  The character code. */
+static char getEscape( char ch ) {
+    static struct { char ch; char code; } const pair[] = {
+        { '\"', '\"' }, { '\\', '\\' },
+        { '/',  '/'  }, { 'b',  '\b' },
+        { 'f',  '\f' }, { 'n',  '\n' },
+        { 'r',  '\r' }, { 't',  '\t' },
+    };
+    unsigned int i;
+    for( i = 0; i < sizeof pair / sizeof *pair; ++i )
+        if ( pair[i].ch == ch )
+            return pair[i].code;
+    return '\0';
+}
+
+/** Parse 4 characters.
+  * @param str Pointer to  first digit.
+  * @retval '?' If the four characters are hexadecimal digits.
+  * @retval '\0' In other cases. */
+static unsigned char getCharFromUnicode( unsigned char const* str ) {
+    unsigned int i;
+    for( i = 0; i < 4; ++i )
+        if ( !isxdigit( str[i] ) )
+            return '\0';
+    return '?';
+}
+
+/** Parse a string and replace the scape characters by their meaning characters.
+  * This parser stops when finds the character '\"'. Then replaces '\"' by '\0'.
+  * @param str Pointer to first character.
+  * @retval Pointer to first non white space after the string. If success.
+  * @retval Null pointer if any error occur. */
+static char* parseString( char* str ) {
+    unsigned char* head = (unsigned char*)str;
+    unsigned char* tail = (unsigned char*)str;
+    for( ; *head; ++head, ++tail ) {
+        if ( *head == '\"' ) {
+            *tail = '\0';
+            return (char*)++head;
+        }
+        if ( *head == '\\' ) {
+            if ( *++head == 'u' ) {
+                char const ch = getCharFromUnicode( ++head );
+                if ( ch == '\0' ) return 0;
+                *tail = ch;
+                head += 3;
+            }
+            else {
+                char const esc = getEscape( *head );
+                if ( esc == '\0' ) return 0;
+                *tail = esc;
+            }
+        }
+        else *tail = *head;
+    }
+    return 0;
+}
+
+/** Parse a string to get the name of a property.
+  * @param ptr Pointer to first character.
+  * @param property The property to assign the name.
+  * @retval Pointer to first of property value. If success.
+  * @retval Null pointer if any error occur. */
+static char* propertyName( char* ptr, json_t* property ) {
+    property->name = ++ptr;
+    ptr = parseString( ptr );
+    if ( !ptr ) return 0;
+    ptr = goBlank( ptr );
+    if ( !ptr ) return 0;
+    if ( *ptr++ != ':' ) return 0;
+    return goBlank( ptr );
+}
+
+/** Parse a string to get the value of a property when its type is JSON_TEXT.
+  * @param ptr Pointer to first character ('\"').
+  * @param property The property to assign the name.
+  * @retval Pointer to first non white space after the string. If success.
+  * @retval Null pointer if any error occur. */
+static char* textValue( char* ptr, json_t* property ) {
+    ++property->u.value;
+    ptr = parseString( ++ptr );
+    if ( !ptr ) return 0;
+    property->type = JSON_TEXT;
+    return ptr;
+}
+
+/** Compare two strings until get the null character in the second one.
+  * @param ptr sub string
+  * @param str main string
+  * @retval Pointer to next character.
+  * @retval Null pointer if any error occur. */
+static char* checkStr( char* ptr, char const* str ) {
+    while( *str )
+        if ( *ptr++ != *str++ )
+            return 0;
+    return ptr;
+}
+
+/** Parser a string to get a primitive value.
+  * If the first character after the value is different of '}' or ']' is set to '\0'.
+  * @param ptr Pointer to first character.
+  * @param property Property handler to set the value and the type, (true, false or null).
+  * @param value String with the primitive literal.
+  * @param type The code of the type. ( JSON_BOOLEAN or JSON_NULL )
+  * @retval Pointer to first non white space after the string. If success.
+  * @retval Null pointer if any error occur. */
+static char* primitiveValue( char* ptr, json_t* property, char const* value, jsonType_t type ) {
+    ptr = checkStr( ptr, value );
+    if ( !ptr || !isEndOfPrimitive( *ptr ) ) return 0;
+    ptr = setToNull( ptr );
+    property->type = type;
+    return ptr;
+}
+
+/** Parser a string to get a true value.
+  * If the first character after the value is different of '}' or ']' is set to '\0'.
+  * @param ptr Pointer to first character.
+  * @param property Property handler to set the value and the type, (true, false or null).
+  * @retval Pointer to first non white space after the string. If success.
+  * @retval Null pointer if any error occur. */
+static char* trueValue( char* ptr, json_t* property ) {
+    return primitiveValue( ptr, property, "true", JSON_BOOLEAN );
+}
+
+/** Parser a string to get a false value.
+  * If the first character after the value is different of '}' or ']' is set to '\0'.
+  * @param ptr Pointer to first character.
+  * @param property Property handler to set the value and the type, (true, false or null).
+  * @retval Pointer to first non white space after the string. If success.
+  * @retval Null pointer if any error occur. */
+static char* falseValue( char* ptr, json_t* property ) {
+    return primitiveValue( ptr, property, "false", JSON_BOOLEAN );
+}
+
+/** Parser a string to get a null value.
+  * If the first character after the value is different of '}' or ']' is set to '\0'.
+  * @param ptr Pointer to first character.
+  * @param property Property handler to set the value and the type, (true, false or null).
+  * @retval Pointer to first non white space after the string. If success.
+  * @retval Null pointer if any error occur. */
+static char* nullValue( char* ptr, json_t* property ) {
+    return primitiveValue( ptr, property, "null", JSON_NULL );
+}
+
+/** Analyze the exponential part of a real number.
+  * @param ptr Pointer to first character.
+  * @retval Pointer to first non numerical after the string. If success.
+  * @retval Null pointer if any error occur. */
+static char* expValue( char* ptr ) {
+    if ( *ptr == '-' || *ptr == '+' ) ++ptr;
+    if ( !isdigit( (int)(*ptr) ) ) return 0;
+    ptr = goNum( ++ptr );
+    return ptr;
+}
+
+/** Analyze the decimal part of a real number.
+  * @param ptr Pointer to first character.
+  * @retval Pointer to first non numerical after the string. If success.
+  * @retval Null pointer if any error occur. */
+static char* fraqValue( char* ptr ) {
+    if ( !isdigit( (int)(*ptr) ) ) return 0;
+    ptr = goNum( ++ptr );
+    if ( !ptr ) return 0;
+    return ptr;
+}
+
+/** Parser a string to get a numerical value.
+  * If the first character after the value is different of '}' or ']' is set to '\0'.
+  * @param ptr Pointer to first character.
+  * @param property Property handler to set the value and the type: JSON_REAL or JSON_INTEGER.
+  * @retval Pointer to first non white space after the string. If success.
+  * @retval Null pointer if any error occur. */
+static char* numValue( char* ptr, json_t* property ) {
+    if ( *ptr == '-' ) ++ptr;
+    if ( !isdigit( (int)(*ptr) ) ) return 0;
+    if ( *ptr != '0' ) {
+        ptr = goNum( ptr );
+        if ( !ptr ) return 0;
+    }
+    else if ( isdigit( (int)(*++ptr) ) ) return 0;
+    property->type = JSON_INTEGER;
+    if ( *ptr == '.' ) {
+        ptr = fraqValue( ++ptr );
+        if ( !ptr ) return 0;
+        property->type = JSON_REAL;
+    }
+    if ( *ptr == 'e' || *ptr == 'E' ) {
+        ptr = expValue( ++ptr );
+        if ( !ptr ) return 0;
+        property->type = JSON_REAL;
+    }
+    if ( !isEndOfPrimitive( *ptr ) ) return 0;
+    if ( JSON_INTEGER == property->type ) {
+        char const* value = property->u.value;
+        bool const negative = *value == '-';
+        static char const min[] = "-9223372036854775808";
+        static char const max[] = "9223372036854775807";
+        unsigned int const maxdigits = ( negative? sizeof min: sizeof max ) - 1;
+        unsigned int const len = ( unsigned int const ) ( ptr - value );
+        if ( len > maxdigits ) return 0;
+        if ( len == maxdigits ) {
+            char const tmp = *ptr;
+            *ptr = '\0';
+            char const* const threshold = negative ? min: max;
+            if ( 0 > strcmp( threshold, value ) ) return 0;
+            *ptr = tmp;
+        }
+    }
+    ptr = setToNull( ptr );
+    return ptr;
+}
+
+/** Add a property to a JSON object or array.
+  * @param obj The handler of the JSON object or array.
+  * @param property The handler of the property to be added. */
+static void add( json_t* obj, json_t* property ) {
+    property->sibling = 0;
+    if ( !obj->u.c.child ){
+	    obj->u.c.child = property;
+	    obj->u.c.last_child = property;
+    } else {
+	    obj->u.c.last_child->sibling = property;
+	    obj->u.c.last_child = property;
+    }
+}
+
+/** Parser a string to get a json object value.
+  * @param ptr Pointer to first character.
+  * @param obj The handler of the JSON root object or array.
+  * @param pool The handler of a json pool for creating json instances.
+  * @retval Pointer to first character after the value. If success.
+  * @retval Null pointer if any error occur. */
+static char* objValue( char* ptr, json_t* obj, jsonPool_t* pool ) {
+    obj->type    = *ptr == '{' ? JSON_OBJ : JSON_ARRAY;
+    obj->u.c.child = 0;
+    obj->sibling = 0;
+    ptr++;
+    for(;;) {
+        ptr = goBlank( ptr );
+        if ( !ptr ) return 0;
+        if ( *ptr == ',' ) {
+            ++ptr;
+            continue;
+        }
+        char const endchar = ( obj->type == JSON_OBJ )? '}': ']';
+        if ( *ptr == endchar ) {
+            *ptr = '\0';
+            json_t* parentObj = obj->sibling;
+            if ( !parentObj ) return ++ptr;
+            obj->sibling = 0;
+            obj = parentObj;
+            ++ptr;
+            continue;
+        }
+        json_t* property = pool->alloc( pool );
+        if ( !property ) return 0;
+        if( obj->type != JSON_ARRAY ) {
+            if ( *ptr != '\"' ) return 0;
+            ptr = propertyName( ptr, property );
+            if ( !ptr ) return 0;
+        }
+        else property->name = 0;
+        add( obj, property );
+        property->u.value = ptr;
+        switch( *ptr ) {
+            case '{':
+                property->type    = JSON_OBJ;
+                property->u.c.child = 0;
+                property->sibling = obj;
+                obj = property;
+                ++ptr;
+                break;
+            case '[':
+                property->type    = JSON_ARRAY;
+                property->u.c.child = 0;
+                property->sibling = obj;
+                obj = property;
+                ++ptr;
+                break;
+            case '\"': ptr = textValue( ptr, property );  break;
+            case 't':  ptr = trueValue( ptr, property );  break;
+            case 'f':  ptr = falseValue( ptr, property ); break;
+            case 'n':  ptr = nullValue( ptr, property );  break;
+            default:   ptr = numValue( ptr, property );   break;
+        }
+        if ( !ptr ) return 0;
+    }
+}
+
+/** Initialize a json pool.
+  * @param pool The handler of the pool.
+  * @return a instance of a json. */
+static json_t* poolInit( jsonPool_t* pool ) {
+    jsonStaticPool_t *spool = json_containerOf( pool, jsonStaticPool_t, pool );
+    spool->nextFree = 1;
+    return spool->mem;
+}
+
+/** Create an instance of a json from a pool.
+  * @param pool The handler of the pool.
+  * @retval The handler of the new instance if success.
+  * @retval Null pointer if the pool was empty. */
+static json_t* poolAlloc( jsonPool_t* pool ) {
+    jsonStaticPool_t *spool = json_containerOf( pool, jsonStaticPool_t, pool );
+    if ( spool->nextFree >= spool->qty ) return 0;
+    return spool->mem + spool->nextFree++;
+}
+
+/** Checks whether an character belongs to set.
+  * @param ch Character value to be checked.
+  * @param set Set of characters. It is just a null-terminated string.
+  * @return true or false there is membership or not. */
+static bool isOneOfThem( char ch, char const* set ) {
+    while( *set != '\0' )
+        if ( ch == *set++ )
+            return true;
+    return false;
+}
+
+/** Increases a pointer while it points to a character that belongs to a set.
+  * @param str The initial pointer value.
+  * @param set Set of characters. It is just a null-terminated string.
+  * @return The final pointer value or null pointer if the null character was found. */
+static char* goWhile( char* str, char const* set ) {
+    for(; *str != '\0'; ++str ) {
+        if ( !isOneOfThem( *str, set ) )
+            return str;
+    }
+    return 0;
+}
+
+/** Set of characters that defines a blank. */
+static char const* const blank = " \n\r\t\f";
+
+/** Increases a pointer while it points to a white space character.
+  * @param str The initial pointer value.
+  * @return The final pointer value or null pointer if the null character was found. */
+static char* goBlank( char* str ) {
+    return goWhile( str, blank );
+}
+
+/** Increases a pointer while it points to a decimal digit character.
+  * @param str The initial pointer value.
+  * @return The final pointer value or null pointer if the null character was found. */
+static char* goNum( char* str ) {
+    for( ; *str != '\0'; ++str ) {
+        if ( !isdigit( (int)(*str) ) )
+            return str;
+    }
+    return 0;
+}
+
+/** Set of characters that defines the end of an array or a JSON object. */
+static char const* const endofblock = "}]";
+
+/** Set a char to '\0' and increase its pointer if the char is different to '}' or ']'.
+  * @param ch Pointer to character.
+  * @return  Final value pointer. */
+static char* setToNull( char* ch ) {
+    if ( !isOneOfThem( *ch, endofblock ) ) *ch++ = '\0';
+    return ch;
+}
+
+/** Indicate if a character is the end of a primitive value. */
+static bool isEndOfPrimitive( char ch ) {
+    return ch == ',' || isOneOfThem( ch, blank ) || isOneOfThem( ch, endofblock );
+}
diff --git a/lib/CL/devices/almaif/AlmaifDB/tiny-json.h b/lib/CL/devices/almaif/AlmaifDB/tiny-json.h
new file mode 100644
index 000000000..2b527e7af
--- /dev/null
+++ b/lib/CL/devices/almaif/AlmaifDB/tiny-json.h
@@ -0,0 +1,176 @@
+
+/*
+
+<https://github.com/rafagafe/tiny-json>
+     
+  Licensed under the MIT License <http://opensource.org/licenses/MIT>.
+  SPDX-License-Identifier: MIT
+  Copyright (c) 2016-2018 Rafa Garcia <rafagarcia77@gmail.com>.
+
+  Permission is hereby  granted, free of charge, to any  person obtaining a copy
+  of this software and associated  documentation files (the "Software"), to deal
+  in the Software  without restriction, including without  limitation the rights
+  to  use, copy,  modify, merge,  publish, distribute,  sublicense, and/or  sell
+  copies  of  the Software,  and  to  permit persons  to  whom  the Software  is
+  furnished to do so, subject to the following conditions:
+
+  The above copyright notice and this permission notice shall be included in all
+  copies or substantial portions of the Software.
+
+  THE SOFTWARE  IS PROVIDED "AS  IS", WITHOUT WARRANTY  OF ANY KIND,  EXPRESS OR
+  IMPLIED,  INCLUDING BUT  NOT  LIMITED TO  THE  WARRANTIES OF  MERCHANTABILITY,
+  FITNESS FOR  A PARTICULAR PURPOSE AND  NONINFRINGEMENT. IN NO EVENT  SHALL THE
+  AUTHORS  OR COPYRIGHT  HOLDERS  BE  LIABLE FOR  ANY  CLAIM,  DAMAGES OR  OTHER
+  LIABILITY, WHETHER IN AN ACTION OF  CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+  OUT OF OR IN CONNECTION WITH THE SOFTWARE  OR THE USE OR OTHER DEALINGS IN THE
+  SOFTWARE.
+    
+*/
+
+#ifndef _TINY_JSON_H_
+#define	_TINY_JSON_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#define json_containerOf( ptr, type, member ) \
+    ((type*)( (char*)ptr - offsetof( type, member ) ))
+
+/** @defgroup tinyJson Tiny JSON parser.
+  * @{ */
+
+/** Enumeration of codes of supported JSON properties types. */
+typedef enum {
+    JSON_OBJ, JSON_ARRAY, JSON_TEXT, JSON_BOOLEAN,
+    JSON_INTEGER, JSON_REAL, JSON_NULL
+} jsonType_t;
+
+/** Structure to handle JSON properties. */
+typedef struct json_s {
+    struct json_s* sibling;
+    char const* name;
+    union {
+        char const* value;
+        struct {
+            struct json_s* child;
+            struct json_s* last_child;
+        } c;
+    } u;
+    jsonType_t type;
+} json_t;
+
+/** Parse a string to get a json.
+  * @param str String pointer with a JSON object. It will be modified.
+  * @param mem Array of json properties to allocate.
+  * @param qty Number of elements of mem.
+  * @retval Null pointer if any was wrong in the parse process.
+  * @retval If the parser process was successfully a valid handler of a json.
+  *         This property is always unnamed and its type is JSON_OBJ. */
+json_t const* json_create( char* str, json_t mem[], unsigned int qty );
+
+/** Get the name of a json property.
+  * @param json A valid handler of a json property.
+  * @retval Pointer to null-terminated if property has name.
+  * @retval Null pointer if the property is unnamed. */
+static inline char const* json_getName( json_t const* json ) {
+    return json->name;
+}
+
+/** Get the value of a json property.
+  * The type of property cannot be JSON_OBJ or JSON_ARRAY.
+  * @param property A valid handler of a json property.
+  * @return Pointer to null-terminated string with the value. */
+static inline char const* json_getValue( json_t const* property ) {
+    return property->u.value;
+}
+
+/** Get the type of a json property.
+  * @param json A valid handler of a json property.
+  * @return The code of type.*/
+static inline jsonType_t json_getType( json_t const* json ) {
+    return json->type;
+}
+
+/** Get the next sibling of a JSON property that is within a JSON object or array.
+  * @param json A valid handler of a json property.
+  * @retval The handler of the next sibling if found.
+  * @retval Null pointer if the json property is the last one. */
+static inline json_t const* json_getSibling( json_t const* json ) {
+    return json->sibling;
+}
+
+/** Search a property by its name in a JSON object.
+  * @param obj A valid handler of a json object. Its type must be JSON_OBJ.
+  * @param property The name of property to get.
+  * @retval The handler of the json property if found.
+  * @retval Null pointer if not found. */
+json_t const* json_getProperty( json_t const* obj, char const* property );
+
+
+/** Search a property by its name in a JSON object and return its value.
+  * @param obj A valid handler of a json object. Its type must be JSON_OBJ.
+  * @param property The name of property to get.
+  * @retval If found a pointer to null-terminated string with the value.
+  * @retval Null pointer if not found or it is an array or an object. */
+char const* json_getPropertyValue( json_t const* obj, char const* property );
+
+/** Get the first property of a JSON object or array.
+  * @param json A valid handler of a json property.
+  *             Its type must be JSON_OBJ or JSON_ARRAY.
+  * @retval The handler of the first property if there is.
+  * @retval Null pointer if the json object has not properties. */
+static inline json_t const* json_getChild( json_t const* json ) {
+    return json->u.c.child;
+}
+
+/** Get the value of a json boolean property.
+  * @param property A valid handler of a json object. Its type must be JSON_BOOLEAN.
+  * @return The value stdbool. */
+static inline bool json_getBoolean( json_t const* property ) {
+    return *property->u.value == 't';
+}
+
+/** Get the value of a json integer property.
+  * @param property A valid handler of a json object. Its type must be JSON_INTEGER.
+  * @return The value stdint. */
+static inline int64_t json_getInteger( json_t const* property ) {
+  return strtoll( property->u.value,(char**)NULL, 10);
+}
+
+/** Get the value of a json real property.
+  * @param property A valid handler of a json object. Its type must be JSON_REAL.
+  * @return The value. */
+static inline double json_getReal( json_t const* property ) {
+  return strtod( property->u.value,(char**)NULL );
+}
+
+
+
+/** Structure to handle a heap of JSON properties. */
+typedef struct jsonPool_s jsonPool_t;
+struct jsonPool_s {
+    json_t* (*init)( jsonPool_t* pool );
+    json_t* (*alloc)( jsonPool_t* pool );
+};
+
+/** Parse a string to get a json.
+  * @param str String pointer with a JSON object. It will be modified.
+  * @param pool Custom json pool pointer.
+  * @retval Null pointer if any was wrong in the parse process.
+  * @retval If the parser process was successfully a valid handler of a json.
+  *         This property is always unnamed and its type is JSON_OBJ. */
+json_t const* json_createWithPool( char* str, jsonPool_t* pool );
+
+/** @ } */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif	/* _TINY_JSON_H_ */
diff --git a/lib/CL/devices/almaif/AlmaifShared.hh b/lib/CL/devices/almaif/AlmaifShared.hh
index 735691fd5..cc2ce13dd 100644
--- a/lib/CL/devices/almaif/AlmaifShared.hh
+++ b/lib/CL/devices/almaif/AlmaifShared.hh
@@ -127,6 +127,13 @@ struct AQLQueueInfo {
 
 #define ALMAIF_DRIVER_SLEEP 200
 
+enum ALMAIF_DEVICE_TYPE : size_t {
+  POCL_ALMAIFDEVICE_XRT = 0xA,
+  POCL_ALMAIFDEVICE_TTASIM = 0xB,
+  POCL_ALMAIFDEVICE_EMULATION = 0xE,
+  POCL_ALMAIFDEVICE_BITSTREAMDATABASE = 0xF,
+};
+
 struct CommandMetadata {
   uint32_t completion_signal;
   uint32_t reserved0;
@@ -179,6 +186,8 @@ struct AlmaifData {
 
   AlmaIFDevice *Dev;
 
+  cl_bool Available;
+
   std::set<BIKD *> SupportedKernels;
   // List of commands ready to be executed.
   _cl_command_node *ReadyList;
@@ -190,8 +199,8 @@ struct AlmaifData {
   // Lock for device-side command queue manipulation
   pocl_lock_t AQLQueueLock;
 
-  void *printf_buffer;
-  void *printf_position;
+  void *PrintfBuffer;
+  void *PrintfPosition;
 
   emulation_data_t EmulationData;
 
diff --git a/lib/CL/devices/almaif/CMakeLists.txt b/lib/CL/devices/almaif/CMakeLists.txt
index 2f6d5063b..4d506c357 100644
--- a/lib/CL/devices/almaif/CMakeLists.txt
+++ b/lib/CL/devices/almaif/CMakeLists.txt
@@ -40,18 +40,26 @@ set(ALMAIF_SOURCES "AlmaifShared.hh"
                   "EmulationRegion.hh"
                   "EmulationDevice.cc"
                   "EmulationDevice.hh"
-                  "XrtDevice.hh"
+                  "XilinxXrtDevice.hh"
                   "AlmaifCompile.cc"
                   "AlmaifCompile.hh"
-                  "openasip/AlmaifCompileTCE.hh"
+                  "openasip/AlmaifCompileOpenasip.hh"
                   )
 
 if(HAVE_XRT)
     add_compile_options(-I${XRT_INCLUDEDIR})
     set(ALMAIF_SOURCES ${ALMAIF_SOURCES}
-                      "XrtDevice.cc"
-                      "XrtRegion.cc"
-                      "XrtRegion.hh"
+                      "XilinxXrtDevice.cc"
+                      "XilinxXrtRegion.cc"
+                      "XilinxXrtRegion.hh"
+                      "XilinxXrtExternalRegion.cc"
+                      "XilinxXrtExternalRegion.hh"
+                      "AlmaifDB/DBDevice.hh"
+                      "AlmaifDB/DBDevice.cc"
+                      "AlmaifDB/AlmaIFBitstreamDatabaseManager.cc"
+                      "AlmaifDB/AlmaIFBitstreamDatabaseManager.hh"
+                      "AlmaifDB/tiny-json.c"
+                      "AlmaifDB/tiny-json.h"
                       )
 endif()
 
@@ -63,7 +71,7 @@ if(ENABLE_TCE)
                       "openasip/TTASimRegion.hh"
                       "openasip/TTASimControlRegion.cc"
                       "openasip/TTASimControlRegion.hh"
-                      "openasip/AlmaifCompileTCE.cc"
+                      "openasip/AlmaifCompileOpenasip.cc"
                       )
 endif()
 
@@ -74,7 +82,7 @@ endif(MSVC)
 
 add_pocl_device_library(pocl-devices-almaif ${ALMAIF_SOURCES})
 
-if(HAVE_XRT})
+if(HAVE_XRT)
     target_link_libraries(pocl-devices-almaif PRIVATE "${XRT_LIBDIR}/libxrt_coreutil.so")
 endif()
 
diff --git a/lib/CL/devices/almaif/EmulationDevice.cc b/lib/CL/devices/almaif/EmulationDevice.cc
index c6b50834b..5c0565da5 100644
--- a/lib/CL/devices/almaif/EmulationDevice.cc
+++ b/lib/CL/devices/almaif/EmulationDevice.cc
@@ -54,9 +54,9 @@ EmulationDevice::EmulationDevice() {
 
   discoverDeviceParameters();
 
-  InstructionMemory = new EmulationRegion(imem_start, imem_size);
-  CQMemory = new EmulationRegion(cq_start, cq_size);
-  DataMemory = new EmulationRegion(dmem_start, dmem_size);
+  InstructionMemory = new EmulationRegion(ImemStart, ImemSize);
+  CQMemory = new EmulationRegion(CQStart, CQSize);
+  DataMemory = new EmulationRegion(DmemStart, DmemSize);
 }
 
 EmulationDevice::~EmulationDevice() {
@@ -79,8 +79,8 @@ void *emulate_almaif(void *E_void) {
   void *base_address = E->emulating_address;
 
   uint32_t ctrl_size = 1024;
-  uint32_t imem_size = 0;
-  uint32_t dmem_size = EMULATING_MAX_SIZE * 3 / 4;
+  uint32_t ImemSize = 0;
+  uint32_t DmemSize = EMULATING_MAX_SIZE * 3 / 4;
   // The accelerator can choose the size of the queue (must be a power-of-two)
   // Can be even 1, to make the packet handling easiest with static offsets
   uint32_t queue_length = 3;
@@ -89,14 +89,14 @@ void *emulate_almaif(void *E_void) {
   // The accelerator can set the starting addresses
   // Even the order can be changed if the accelerator wants to
   // Here packing the memory regions tighly as an example.
-  uintptr_t imem_start = (uintptr_t)base_address + ctrl_size;
-  uintptr_t cqmem_start = imem_start + imem_size;
-  uintptr_t dmem_start = cqmem_start + cqmem_size;
+  uintptr_t ImemStart = (uintptr_t)base_address + ctrl_size;
+  uintptr_t cqmem_start = ImemStart + ImemSize;
+  uintptr_t DmemStart = cqmem_start + cqmem_size;
 
   volatile uint32_t *Control = (uint32_t *)base_address;
-  //volatile uint8_t *Instruction = (uint8_t *)imem_start;
+  // volatile uint8_t *Instruction = (uint8_t *)ImemStart;
   volatile uint32_t *CQ = (uint32_t *)cqmem_start;
-  //volatile uint8_t *Data = (uint8_t *)dmem_start;
+  // volatile uint8_t *Data = (uint8_t *)DmemStart;
 
   // Set initial values for info registers:
   Control[ALMAIF_INFO_DEV_CLASS / 4] = 0xE; // Unused
@@ -113,16 +113,16 @@ void *emulate_almaif(void *E_void) {
   // that are written BEFORE hw reset is deasserted.
   // E.g. program binaries of a processor-based accelerator
   Control[ALMAIF_INFO_IMEM_SIZE / 4] = 0;
-  Control[ALMAIF_INFO_IMEM_START_LOW / 4] = (uint32_t)imem_start;
-  Control[ALMAIF_INFO_IMEM_START_HIGH / 4] = (uint32_t)(imem_start >> 32);
+  Control[ALMAIF_INFO_IMEM_START_LOW / 4] = (uint32_t)ImemStart;
+  Control[ALMAIF_INFO_IMEM_START_HIGH / 4] = (uint32_t)(ImemStart >> 32);
 
   Control[ALMAIF_INFO_CQMEM_SIZE_LOW / 4] = cqmem_size;
   Control[ALMAIF_INFO_CQMEM_START_LOW / 4] = (uint32_t)cqmem_start;
   Control[ALMAIF_INFO_CQMEM_START_HIGH / 4] = (uint32_t)(cqmem_start >> 32);
 
-  Control[ALMAIF_INFO_DMEM_SIZE_LOW / 4] = dmem_size;
-  Control[ALMAIF_INFO_DMEM_START_LOW / 4] = (uint32_t)dmem_start;
-  Control[ALMAIF_INFO_DMEM_START_HIGH / 4] = (uint32_t)(dmem_start >> 32);
+  Control[ALMAIF_INFO_DMEM_SIZE_LOW / 4] = DmemSize;
+  Control[ALMAIF_INFO_DMEM_START_LOW / 4] = (uint32_t)DmemStart;
+  Control[ALMAIF_INFO_DMEM_START_HIGH / 4] = (uint32_t)(DmemStart >> 32);
 
   uint32_t feature_flags_low = ALMAIF_FF_BIT_AXI_MASTER;
   Control[ALMAIF_INFO_FEATURE_FLAGS_LOW / 4] = feature_flags_low;
diff --git a/lib/CL/devices/almaif/EmulationDevice.hh b/lib/CL/devices/almaif/EmulationDevice.hh
index fb1e1f2fa..4c30e4512 100644
--- a/lib/CL/devices/almaif/EmulationDevice.hh
+++ b/lib/CL/devices/almaif/EmulationDevice.hh
@@ -29,7 +29,6 @@
 
 #include "AlmaIFDevice.hh"
 
-#define EMULATING_ADDRESS 0xE
 #define EMULATING_MAX_SIZE (256 * 1024 * 1024)
 //#define EMULATING_MAX_SIZE 4 * 4096
 
diff --git a/lib/CL/devices/almaif/EmulationRegion.cc b/lib/CL/devices/almaif/EmulationRegion.cc
index 793c2934c..70ce690a5 100644
--- a/lib/CL/devices/almaif/EmulationRegion.cc
+++ b/lib/CL/devices/almaif/EmulationRegion.cc
@@ -26,7 +26,7 @@
 // Used in emulator to hack the MMAP to work with just virtually contiguous
 // memory
 EmulationRegion::EmulationRegion(size_t Address, size_t RegionSize) {
-  PhysAddress = Address;
+  PhysAddress_ = Address;
   Data = (void *)Address;
-  Size = RegionSize;
+  Size_ = RegionSize;
 }
diff --git a/lib/CL/devices/almaif/MMAPDevice.cc b/lib/CL/devices/almaif/MMAPDevice.cc
index 8b9b40416..7f00d61ac 100644
--- a/lib/CL/devices/almaif/MMAPDevice.cc
+++ b/lib/CL/devices/almaif/MMAPDevice.cc
@@ -32,7 +32,7 @@
 //#include <sys/stat.h>
 #include <fcntl.h>
 
-MMAPDevice::MMAPDevice(size_t base_address, char *kernel_name) {
+MMAPDevice::MMAPDevice(size_t base_address, const std::string &kernel_name) {
   int mem_fd = -1;
   mem_fd = open("/dev/mem", O_RDWR | O_SYNC);
   if (mem_fd == -1) {
@@ -42,23 +42,19 @@ MMAPDevice::MMAPDevice(size_t base_address, char *kernel_name) {
 
   discoverDeviceParameters();
 
-  InstructionMemory = new MMAPRegion(imem_start, imem_size, mem_fd);
-  CQMemory = new MMAPRegion(cq_start, cq_size, mem_fd);
-  DataMemory = new MMAPRegion(dmem_start, dmem_size, mem_fd);
+  InstructionMemory = new MMAPRegion(ImemStart, ImemSize, mem_fd);
+  CQMemory = new MMAPRegion(CQStart, CQSize, mem_fd);
+  DataMemory = new MMAPRegion(DmemStart, DmemSize, mem_fd);
 
-  unsigned img_file_name_length = strlen(kernel_name) + 5;
-  char *file_name = (char *)malloc(img_file_name_length);
-  assert(file_name);
-  snprintf(file_name, img_file_name_length, "%s.img", kernel_name);
+  std::string file_name = kernel_name + ".img";
 
-  if (pocl_exists(file_name)) {
+  if (pocl_exists(file_name.c_str())) {
     POCL_MSG_PRINT_ALMAIF(
         "Almaif: Found built-in kernel firmaware. Loading it in\n");
     ((MMAPRegion *)InstructionMemory)->initRegion(file_name);
   } else {
     POCL_MSG_PRINT_ALMAIF("Almaif: No default firmware found. Skipping\n");
   }
-  free(file_name);
 
   if (pocl_is_option_set("POCL_ALMAIF_EXTERNALREGION")) {
     char *region_params =
diff --git a/lib/CL/devices/almaif/MMAPDevice.hh b/lib/CL/devices/almaif/MMAPDevice.hh
index 1e6936e8f..d569611b2 100644
--- a/lib/CL/devices/almaif/MMAPDevice.hh
+++ b/lib/CL/devices/almaif/MMAPDevice.hh
@@ -32,7 +32,7 @@ private:
   ~MMAPDevice();
 
 public:
-  MMAPDevice(size_t base_address, char *kernel_name);
+  MMAPDevice(size_t base_address, const std::string &kernel_name);
 };
 
 #endif
diff --git a/lib/CL/devices/almaif/MMAPRegion.cc b/lib/CL/devices/almaif/MMAPRegion.cc
index 677d08e2a..44f794270 100644
--- a/lib/CL/devices/almaif/MMAPRegion.cc
+++ b/lib/CL/devices/almaif/MMAPRegion.cc
@@ -24,6 +24,7 @@
 #include <assert.h>
 #include <fstream>
 #include <stdlib.h>
+#include <string>
 #include <sys/mman.h>
 #include <unistd.h>
 
@@ -34,9 +35,9 @@
 MMAPRegion::MMAPRegion() {}
 
 MMAPRegion::MMAPRegion(size_t Address, size_t RegionSize, int mem_fd) {
-  PhysAddress = Address;
-  Size = RegionSize;
-  if (Size == 0) {
+  PhysAddress_ = Address;
+  Size_ = RegionSize;
+  if (Size_ == 0) {
     return;
   }
   POCL_MSG_PRINT_ALMAIF_MMAP(
@@ -46,7 +47,7 @@ MMAPRegion::MMAPRegion(size_t Address, size_t RegionSize, int mem_fd) {
   long page_size = sysconf(_SC_PAGESIZE);
   size_t roundDownAddress = (Address / page_size) * page_size;
   size_t difference = Address - roundDownAddress;
-  Data = mmap(0, Size + difference, PROT_READ | PROT_WRITE, MAP_SHARED, mem_fd,
+  Data = mmap(0, Size_ + difference, PROT_READ | PROT_WRITE, MAP_SHARED, mem_fd,
               roundDownAddress);
   assert(Data != MAP_FAILED && "MMAPRegion mapping failed");
   // Increment back to the unaligned address user asked for
@@ -54,9 +55,9 @@ MMAPRegion::MMAPRegion(size_t Address, size_t RegionSize, int mem_fd) {
   POCL_MSG_PRINT_ALMAIF_MMAP("almaif: got address %p\n", Data);
 }
 
-void MMAPRegion::initRegion(char *init_file) {
+void MMAPRegion::initRegion(const std::string &init_file) {
   std::ifstream inFile;
-  inFile.open(init_file, std::ios::binary);
+  inFile.open(init_file.c_str(), std::ios::binary);
   unsigned int current;
   int i = 0;
   while (inFile.good()) {
@@ -70,24 +71,24 @@ void MMAPRegion::initRegion(char *init_file) {
 
 MMAPRegion::~MMAPRegion() {
   POCL_MSG_PRINT_ALMAIF_MMAP("almaif: munmap'ing from address 0x%zx\n",
-                            PhysAddress);
+                             PhysAddress_);
   if (Data) {
     // Align unmap to page_size
     long page_size = sysconf(_SC_PAGESIZE);
     size_t roundDownAddress = ((size_t)Data / page_size) * page_size;
     size_t difference = (size_t)Data - roundDownAddress;
 
-    munmap((void *)roundDownAddress, Size + difference);
+    munmap((void *)roundDownAddress, Size_ + difference);
     Data = NULL;
   }
 }
 
 uint32_t MMAPRegion::Read32(size_t offset) {
   POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Reading from physical address 0x%zx with "
-                            "offset 0x%zx\n",
-                            PhysAddress, offset);
+                             "offset 0x%zx\n",
+                             PhysAddress_, offset);
   assert(Data && "No pointer to MMAP'd region; read before mapping?");
-  assert(offset < Size && "Attempt to access data outside MMAP'd buffer");
+  assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer");
   auto value =
       static_cast<volatile uint32_t *>(Data)[offset / sizeof(uint32_t)];
   return value;
@@ -95,28 +96,28 @@ uint32_t MMAPRegion::Read32(size_t offset) {
 
 void MMAPRegion::Write32(size_t offset, uint32_t value) {
   POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Writing to physical address 0x%zx with "
-                            "offset 0x%zx\n",
-                            PhysAddress, offset);
+                             "offset 0x%zx\n",
+                             PhysAddress_, offset);
   assert(Data && "No pointer to MMAP'd region; write before mapping?");
-  assert(offset < Size && "Attempt to access data outside MMAP'd buffer");
+  assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer");
   static_cast<volatile uint32_t *>(Data)[offset / sizeof(uint32_t)] = value;
 }
 
 void MMAPRegion::Write16(size_t offset, uint16_t value) {
   POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Writing to physical address 0x%zx with "
-                            "offset 0x%zx\n",
-                            PhysAddress, offset);
+                             "offset 0x%zx\n",
+                             PhysAddress_, offset);
   assert(Data && "No pointer to MMAP'd region; write before mapping?");
-  assert(offset < Size && "Attempt to access data outside MMAP'd buffer");
+  assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer");
   static_cast<volatile uint16_t *>(Data)[offset / sizeof(uint16_t)] = value;
 }
 
 uint64_t MMAPRegion::Read64(size_t offset) {
   POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Reading from physical address 0x%zx with "
-                            "offset 0x%zx\n",
-                            PhysAddress, offset);
+                             "offset 0x%zx\n",
+                             PhysAddress_, offset);
   assert(Data && "No pointer to MMAP'd region; read before mapping?");
-  assert(offset < Size && "Attempt to access data outside MMAP'd buffer");
+  assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer");
   auto value =
       static_cast<volatile uint64_t *>(Data)[offset / sizeof(uint64_t)];
   return value;
@@ -124,32 +125,33 @@ uint64_t MMAPRegion::Read64(size_t offset) {
 
 void MMAPRegion::Write64(size_t offset, uint64_t value) {
   POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Writing to physical address 0x%zx with "
-                            "offset 0x%zx\n",
-                            PhysAddress, offset);
+                             "offset 0x%zx\n",
+                             PhysAddress_, offset);
   assert(Data && "No pointer to MMAP'd region; write before mapping?");
-  assert(offset < Size && "Attempt to access data outside MMAP'd buffer");
+  assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer");
   static_cast<volatile uint64_t *>(Data)[offset / sizeof(uint64_t)] = value;
 }
 
 void MMAPRegion::CopyToMMAP(size_t destination, const void *source,
                             size_t bytes) {
-  POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Writing 0x%zx bytes to buffer at 0x%zx with "
-                            "address 0x%zx\n",
-                            bytes, PhysAddress, destination);
+  POCL_MSG_PRINT_ALMAIF_MMAP(
+      "MMAP: Writing 0x%zx bytes to buffer at 0x%zx with "
+      "address 0x%zx\n",
+      bytes, PhysAddress_, destination);
   auto src = (char *)source;
-  size_t offset = destination - PhysAddress;
-  assert(offset < Size && "Attempt to access data outside MMAP'd buffer");
+  size_t offset = destination - PhysAddress_;
+  assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer");
   auto dst = offset + static_cast<volatile char *>(Data);
   memcpy((void *)dst, src, bytes);
 }
 
 void MMAPRegion::CopyFromMMAP(void *destination, size_t source, size_t bytes) {
   POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Reading 0x%zx bytes from buffer at 0x%zx "
-                            "with address 0x%zx\n",
-                            bytes, PhysAddress, source);
+                             "with address 0x%zx\n",
+                             bytes, PhysAddress_, source);
   auto dst = (char *)destination;
-  size_t offset = source - PhysAddress;
-  assert(offset < Size && "Attempt to access data outside MMAP'd buffer");
+  size_t offset = source - PhysAddress_;
+  assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer");
   auto src = offset + static_cast<volatile char *>(Data);
   memcpy(dst, (void *)src, bytes);
 }
@@ -158,11 +160,11 @@ void MMAPRegion::CopyInMem(size_t source, size_t destination, size_t bytes) {
   POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Copying 0x%zx bytes from 0x%zx "
                             "to 0x%zx\n",
                             bytes, source, destination);
-  size_t src_offset = source - PhysAddress;
-  size_t dst_offset = destination - PhysAddress;
-  assert(src_offset < Size && (src_offset + bytes) <= Size &&
+  size_t src_offset = source - PhysAddress_;
+  size_t dst_offset = destination - PhysAddress_;
+  assert(src_offset < Size_ && (src_offset + bytes) <= Size_ &&
          "Attempt to access data outside MMAP'd buffer");
-  assert(dst_offset < Size && (dst_offset + bytes) <= Size &&
+  assert(dst_offset < Size_ && (dst_offset + bytes) <= Size_ &&
          "Attempt to access data outside MMAP'd buffer");
   volatile char *src = src_offset + static_cast<volatile char *>(Data);
   volatile char *dst = dst_offset + static_cast<volatile char *>(Data);
diff --git a/lib/CL/devices/almaif/MMAPRegion.hh b/lib/CL/devices/almaif/MMAPRegion.hh
index aa6f30115..85b5dc42c 100644
--- a/lib/CL/devices/almaif/MMAPRegion.hh
+++ b/lib/CL/devices/almaif/MMAPRegion.hh
@@ -26,6 +26,7 @@
 #define MMAPREGION_H
 
 #include <stdlib.h>
+#include <string>
 
 #include "pocl_types.h"
 
@@ -49,7 +50,7 @@ public:
   virtual void CopyInMem(size_t source, size_t destination,
                          size_t bytes) override;
 
-  virtual void initRegion(char *init_file);
+  virtual void initRegion(const std::string &init_file);
 
 protected:
   MMAPRegion();
diff --git a/lib/CL/devices/almaif/XilinxXrtDevice.cc b/lib/CL/devices/almaif/XilinxXrtDevice.cc
new file mode 100644
index 000000000..0a63488be
--- /dev/null
+++ b/lib/CL/devices/almaif/XilinxXrtDevice.cc
@@ -0,0 +1,270 @@
+/* XilinxXrtDevice.cc - Access AlmaIF device in Xilinx PCIe FPGA.
+
+   Copyright (c) 2022 Topi Leppänen / Tampere University
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to
+   deal in the Software without restriction, including without limitation the
+   rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+   sell copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+   IN THE SOFTWARE.
+*/
+
+#include "XilinxXrtDevice.hh"
+
+#include "AlmaifShared.hh"
+#include "XilinxXrtExternalRegion.hh"
+#include "XilinxXrtRegion.hh"
+
+#include "experimental/xrt_ip.h"
+
+#include "pocl_file_util.h"
+#include "pocl_timing.h"
+
+#include <libgen.h>
+
+void *DeviceHandle;
+
+XilinxXrtDevice::XilinxXrtDevice(const std::string &XrtKernelNamePrefix,
+                                 unsigned j) {
+
+  char *TmpKernelName = strdup(XrtKernelNamePrefix.c_str());
+  char *KernelName = basename(TmpKernelName);
+
+  std::string xclbin_char = XrtKernelNamePrefix + ".xclbin";
+
+  std::string ExternalMemoryParameters =
+      pocl_get_string_option("POCL_ALMAIF_EXTERNALREGION", "");
+
+  init_xrtdevice(KernelName, xclbin_char, ExternalMemoryParameters, j);
+
+  free(TmpKernelName);
+}
+
+XilinxXrtDevice::XilinxXrtDevice(const std::string &XrtKernelNamePrefix,
+                                 const std::string &XclbinFile, unsigned j) {
+  std::string ExternalMemoryParameters =
+      pocl_get_string_option("POCL_ALMAIF_EXTERNALREGION", "");
+  init_xrtdevice(XrtKernelNamePrefix, XclbinFile, ExternalMemoryParameters, j);
+}
+
+XilinxXrtDevice::XilinxXrtDevice(const std::string &XrtKernelNamePrefix,
+                                 const std::string &XclbinFile,
+                                 const std::string &ExternalMemoryParameters,
+                                 unsigned j) {
+  init_xrtdevice(XrtKernelNamePrefix, XclbinFile, ExternalMemoryParameters, j);
+}
+
+void XilinxXrtDevice::init_xrtdevice(
+    const std::string &XrtKernelNamePrefix, const std::string &XclbinFile,
+    const std::string &ExternalMemoryParameters, unsigned j) {
+  if (j == 0) {
+    auto devicehandle = new xrt::device(0);
+    assert(devicehandle != NULL && "devicehandle null\n");
+    DeviceHandle = (void *)devicehandle;
+  }
+  programBitstream(XrtKernelNamePrefix, XclbinFile, j);
+  // TODO Remove magic
+  size_t DeviceOffset = 0x40000000 + j * 0x10000;
+  // size_t DeviceOffset = 0x00000000;
+  ControlMemory = new XilinxXrtRegion(DeviceOffset, ALMAIF_DEFAULT_CTRL_SIZE,
+                                      Kernel, DeviceOffset);
+
+  discoverDeviceParameters();
+
+  char TmpXclbinFile[POCL_MAX_PATHNAME_LENGTH];
+  strncpy(TmpXclbinFile, XclbinFile.c_str(), POCL_MAX_PATHNAME_LENGTH);
+  char *DirectoryName = dirname(TmpXclbinFile);
+  std::string ImgFileName = DirectoryName;
+  ImgFileName += "/" + XrtKernelNamePrefix + ".img";
+  if (pocl_exists(ImgFileName.c_str())) {
+    POCL_MSG_PRINT_ALMAIF(
+        "Almaif: Found built-in kernel firmware. Loading it in\n");
+    InstructionMemory = new XilinxXrtRegion(ImemStart, ImemSize, Kernel,
+                                            ImgFileName, DeviceOffset);
+  } else {
+    POCL_MSG_PRINT_ALMAIF("Almaif: No default firmware found. Skipping\n");
+    InstructionMemory =
+        new XilinxXrtRegion(ImemStart, ImemSize, Kernel, DeviceOffset);
+  }
+
+  CQMemory = new XilinxXrtRegion(CQStart, CQSize, Kernel, DeviceOffset);
+  DataMemory = new XilinxXrtRegion(DmemStart, DmemSize, Kernel, DeviceOffset);
+
+  if (ExternalMemoryParameters != "") {
+    char *tmp_params = strdup(ExternalMemoryParameters.c_str());
+    char *save_ptr;
+    char *param_token = strtok_r(tmp_params, ",", &save_ptr);
+    size_t region_address = strtoul(param_token, NULL, 0);
+    param_token = strtok_r(NULL, ",", &save_ptr);
+    size_t region_size = strtoul(param_token, NULL, 0);
+    if (region_size > 0) {
+      ExternalXRTMemory = new XilinxXrtExternalRegion(
+          region_address, region_size, DeviceHandle);
+      POCL_MSG_PRINT_ALMAIF("Almaif: initialized external XRT alloc region at "
+                            "%zx with size %zx\n",
+                            region_address, region_size);
+    }
+    free(tmp_params);
+  }
+  XilinxXrtDeviceInitDone_ = 1;
+}
+
+XilinxXrtDevice::~XilinxXrtDevice() {
+  delete ((xrt::ip *)Kernel);
+  delete ((xrt::device *)DeviceHandle);
+  /*  if (ExternalXRTMemory) {
+      LL_DELETE(AllocRegions, AllocRegions->next);
+    }*/
+}
+
+void XilinxXrtDevice::programBitstream(const std::string &XrtKernelNamePrefix,
+                                       const std::string &XclbinFile,
+                                       unsigned j) {
+
+  xrt::device *devicehandle = (xrt::device *)DeviceHandle;
+
+  // TODO: Fix the case when the kernel name contains a path
+  // Needs to tokenize the last part of the path and use that
+  // as the kernel name
+  std::string XrtKernelName =
+      XrtKernelNamePrefix + ":{" + XrtKernelNamePrefix + "_1}";
+
+  if (XilinxXrtDeviceInitDone_) {
+    delete (xrt::ip *)Kernel;
+  }
+
+  if (j == 0) {
+    uint64_t start_time = pocl_gettimemono_ns();
+    auto uuid = devicehandle->load_xclbin(XclbinFile);
+    uint64_t end_time = pocl_gettimemono_ns();
+    printf("Reprogramming done. Time: %d ms\n",
+           (end_time - start_time) / 1000000);
+
+    std::string MemInfo = devicehandle->get_info<xrt::info::device::memory>();
+    POCL_MSG_PRINT_ALMAIF_MMAP("XRT device's memory info:%s\n",
+                               MemInfo.c_str());
+  }
+  auto uuid = devicehandle->get_xclbin_uuid();
+
+  auto kernel = new xrt::ip(*devicehandle, uuid, XrtKernelName.c_str());
+
+  assert(kernel != XRT_NULL_HANDLE &&
+         "xrtKernelHandle NULL, is the kernel opened properly?");
+
+  Kernel = (void *)kernel;
+
+  POCL_MSG_PRINT_ALMAIF("TEST\n");
+  if (XilinxXrtDeviceInitDone_) {
+    ((XilinxXrtRegion *)ControlMemory)->setKernelPtr(Kernel);
+    ((XilinxXrtRegion *)InstructionMemory)->setKernelPtr(Kernel);
+    ((XilinxXrtRegion *)CQMemory)->setKernelPtr(Kernel);
+    ((XilinxXrtRegion *)DataMemory)->setKernelPtr(Kernel);
+  }
+  POCL_MSG_PRINT_ALMAIF("BITSTREAM PROGRAMMING DONE\n");
+}
+
+void XilinxXrtDevice::freeBuffer(pocl_mem_identifier *P) {
+  if (P->extra == 1) {
+    POCL_MSG_PRINT_MEMORY("almaif: freed buffer from 0x%zx\n",
+                          ExternalXRTMemory->pointerDeviceOffset(P));
+    ExternalXRTMemory->freeBuffer(P);
+  } else {
+    chunk_info_t *chunk = (chunk_info_t *)P->mem_ptr;
+
+    POCL_MSG_PRINT_MEMORY("almaif: freed buffer from 0x%zx\n",
+                          chunk->start_address);
+
+    assert(chunk != NULL);
+    pocl_free_chunk(chunk);
+  }
+}
+
+size_t XilinxXrtDevice::pointerDeviceOffset(pocl_mem_identifier *P) {
+  if (P->extra == 1) {
+    return ExternalXRTMemory->pointerDeviceOffset(P);
+  } else {
+    chunk_info_t *chunk = (chunk_info_t *)P->mem_ptr;
+    assert(chunk != NULL);
+    return chunk->start_address;
+  }
+}
+
+cl_int XilinxXrtDevice::allocateBuffer(pocl_mem_identifier *P, size_t Size) {
+
+  assert(P->mem_ptr == NULL);
+  chunk_info_t *chunk = NULL;
+
+  // TODO: add bufalloc-based on-chip memory allocation here. The current
+  // version always allocates from external memory, since the current
+  // kernels do not know how to access the on-chip memory.
+  if (chunk == NULL) {
+    if (ExternalXRTMemory) {
+      // XilinxXrtExternalRegion has its own allocation requirements
+      // (doesn't use bufalloc)
+      cl_int alloc_status = ExternalXRTMemory->allocateBuffer(P, Size);
+      P->version = 0;
+      P->extra = 1;
+      return alloc_status;
+    } else {
+      return CL_MEM_OBJECT_ALLOCATION_FAILURE;
+    }
+  } else {
+    POCL_MSG_PRINT_MEMORY("almaif: allocated %zu bytes from 0x%zx\n", Size,
+                          chunk->start_address);
+
+    P->mem_ptr = chunk;
+    P->extra = 0;
+  }
+  P->version = 0;
+  return CL_SUCCESS;
+}
+
+void XilinxXrtDevice::writeDataToDevice(pocl_mem_identifier *DstMemId,
+                                        const char *__restrict__ const Src,
+                                        size_t Size, size_t Offset) {
+
+  if (DstMemId->extra == 0) {
+    chunk_info_t *chunk = (chunk_info_t *)DstMemId->mem_ptr;
+    size_t Dst = chunk->start_address + Offset;
+    POCL_MSG_PRINT_ALMAIF("almaif: Copying %zu bytes to 0x%zx\n", Size, Dst);
+    DataMemory->CopyToMMAP(Dst, Src, Size);
+  } else if (DstMemId->extra == 1) {
+    POCL_MSG_PRINT_ALMAIF("almaif: Copying %zu bytes to external Xrt buffer\n",
+                          Size);
+    ExternalXRTMemory->CopyToMMAP(DstMemId, Src, Size, Offset);
+  } else {
+    POCL_ABORT("Attempt to write data to outside the device memories.\n");
+  }
+}
+
+void XilinxXrtDevice::readDataFromDevice(char *__restrict__ const Dst,
+                                         pocl_mem_identifier *SrcMemId,
+                                         size_t Size, size_t Offset) {
+
+  chunk_info_t *chunk = (chunk_info_t *)SrcMemId->mem_ptr;
+  POCL_MSG_PRINT_ALMAIF("Reading data with chunk start %zu, and offset %zu\n",
+                        chunk->start_address, Offset);
+  size_t Src = chunk->start_address + Offset;
+  if (SrcMemId->extra == 0) {
+    POCL_MSG_PRINT_ALMAIF("almaif: Copying %zu bytes from 0x%zx\n", Size, Src);
+    DataMemory->CopyFromMMAP(Dst, Src, Size);
+  } else if (SrcMemId->extra == 1) {
+    POCL_MSG_PRINT_ALMAIF(
+        "almaif: Copying %zu bytes from external XRT buffer\n", Size);
+    ExternalXRTMemory->CopyFromMMAP(Dst, SrcMemId, Size, Offset);
+  } else {
+    POCL_ABORT("Attempt to read data from outside the device memories.\n");
+  }
+}
diff --git a/lib/CL/devices/almaif/XilinxXrtDevice.hh b/lib/CL/devices/almaif/XilinxXrtDevice.hh
new file mode 100644
index 000000000..805de1ad8
--- /dev/null
+++ b/lib/CL/devices/almaif/XilinxXrtDevice.hh
@@ -0,0 +1,71 @@
+/* XilinxXrtDevice.hh - Access AlmaIF device in Xilinx PCIe FPGA.
+
+   Copyright (c) 2022 Topi Leppänen / Tampere University
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to
+   deal in the Software without restriction, including without limitation the
+   rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+   sell copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+   IN THE SOFTWARE.
+*/
+
+#ifndef XILINXXRTDEVICE_H
+#define XILINXXRTDEVICE_H
+
+#include "AlmaIFDevice.hh"
+
+class XilinxXrtExternalRegion;
+
+// This class abstracts the Almaif device instantiated on a Xilinx (PCIe) FPGA.
+// The FPGA is reconfigured and Almaif device's memory map accessed with
+// the Xilinx Runtime (XRT) API.
+class XilinxXrtDevice : public AlmaIFDevice {
+public:
+  XilinxXrtDevice(const std::string &XrtKernelNamePrefix, unsigned j);
+  XilinxXrtDevice(const std::string &XrtKernelNamePrefix,
+                  const std::string &XclbinFile, unsigned j);
+  XilinxXrtDevice(const std::string &XrtKernelNamePrefix,
+                  const std::string &XclbinFile,
+                  const std::string &ExternalMemoryParameters, unsigned j);
+  void init_xrtdevice(const std::string &XrtKernelNamePrefix,
+                      const std::string &XclbinFile,
+                      const std::string &ExternalMemoryParameters, unsigned j);
+  ~XilinxXrtDevice() override;
+  // Reconfigures the FPGA
+  void programBitstream(const std::string &XrtKernelNamePrefix,
+                        const std::string &XclbinFile, unsigned j);
+
+  // Allocate buffers from either on-chip or external memory regions
+  // (Directs to either XilinxXrtRegion or XilinxXrtExternalRegion)
+  cl_int allocateBuffer(pocl_mem_identifier *P, size_t Size) override;
+  void freeBuffer(pocl_mem_identifier *P) override;
+  // Retuns the offset of the allocated buffer, in order to be passed
+  // as a kernel argument. This is relevant for XilinxXrtDevice specifically,
+  // since the allocations in XilinxXrtExternalRegion are managed by XRT API.
+  size_t pointerDeviceOffset(pocl_mem_identifier *P) override;
+  void writeDataToDevice(pocl_mem_identifier *DstMemId,
+                         const char *__restrict__ const Src, size_t Size,
+                         size_t Offset) override;
+  void readDataFromDevice(char *__restrict__ const Dst,
+                          pocl_mem_identifier *SrcMemId, size_t Size,
+                          size_t Offset) override;
+
+private:
+  XilinxXrtExternalRegion *ExternalXRTMemory;
+  void *Kernel;
+  int XilinxXrtDeviceInitDone_ = 0;
+};
+
+#endif
diff --git a/lib/CL/devices/almaif/XilinxXrtExternalRegion.cc b/lib/CL/devices/almaif/XilinxXrtExternalRegion.cc
new file mode 100644
index 000000000..0ff4be102
--- /dev/null
+++ b/lib/CL/devices/almaif/XilinxXrtExternalRegion.cc
@@ -0,0 +1,128 @@
+/* XilinxXrtExternalRegion.cc - Access external memory (DDR or HBM) of an XRT
+ device
+ *                        as AlmaIFRegion
+
+   Copyright (c) 2023 Topi Leppänen / Tampere University
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to
+   deal in the Software without restriction, including without limitation the
+   rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+   sell copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+   IN THE SOFTWARE.
+*/
+
+#include <assert.h>
+#include <fstream>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "xrt/xrt_bo.h"
+#include "xrt/xrt_device.h"
+
+#include "XilinxXrtExternalRegion.hh"
+#include "pocl_util.h"
+
+XilinxXrtExternalRegion::XilinxXrtExternalRegion(size_t Address,
+                                                 size_t RegionSize,
+                                                 void *Device) {
+
+  POCL_MSG_PRINT_ALMAIF_MMAP(
+      "XRTMMAP: Initializing XilinxXrtExternalRegion with Address %zu "
+      "and Size %zu and device %p\n",
+      Address, RegionSize, Device);
+  PhysAddress_ = Address;
+  Size_ = RegionSize;
+
+  XilinxXrtDeviceHandle_ = Device;
+}
+
+void XilinxXrtExternalRegion::freeBuffer(pocl_mem_identifier *P) {
+  delete (xrt::bo *)(P->mem_ptr);
+  P->mem_ptr = NULL;
+}
+
+uint64_t XilinxXrtExternalRegion::pointerDeviceOffset(pocl_mem_identifier *P) {
+  assert(P->mem_ptr);
+  return ((xrt::bo *)(P->mem_ptr))->address();
+}
+
+// Buffer allocation uses XRT buffer allocation API
+cl_int XilinxXrtExternalRegion::allocateBuffer(pocl_mem_identifier *P,
+                                               size_t Size) {
+  xrt::bo *DeviceBuffer = new xrt::bo(*(xrt::device *)XilinxXrtDeviceHandle_,
+                                      Size, (xrt::memory_group)0);
+
+  assert(DeviceBuffer != XRT_NULL_HANDLE && "xrtBufferHandle NULL");
+  P->mem_ptr = DeviceBuffer;
+  uint64_t PhysAddress = pointerDeviceOffset(P);
+  POCL_MSG_PRINT_ALMAIF(
+      "XRTMMAP: Initialized XilinxXrtExternalRegion buffer with "
+      "physical address %" PRIu64 "\n",
+      PhysAddress);
+  return CL_SUCCESS;
+}
+
+void XilinxXrtExternalRegion::CopyToMMAP(pocl_mem_identifier *DstMemId,
+                                         const void *Source, size_t Bytes,
+                                         size_t Offset) {
+  POCL_MSG_PRINT_ALMAIF_MMAP(
+      "XRTMMAP: Writing 0x%zx bytes to buffer at 0x%zx with "
+      "address 0x%zx\n",
+      Bytes, PhysAddress_, pointerDeviceOffset(DstMemId));
+  auto src = (uint32_t *)Source;
+  assert(Offset < Size_ && "Attempt to access data outside XRT memory");
+
+  xrt::bo *b = (xrt::bo *)(DstMemId->mem_ptr);
+  assert(b != XRT_NULL_HANDLE && "No buffer handle?");
+  b->write(Source, Bytes, Offset);
+  b->sync(XCL_BO_SYNC_BO_TO_DEVICE, Bytes, Offset);
+}
+
+void XilinxXrtExternalRegion::CopyFromMMAP(void *Destination,
+                                           pocl_mem_identifier *SrcMemId,
+                                           size_t Bytes, size_t Offset) {
+  POCL_MSG_PRINT_ALMAIF_MMAP(
+      "XRTMMAP: Reading 0x%zx bytes from buffer at 0x%zx "
+      "with address 0x%zx\n",
+      Bytes, PhysAddress_, pointerDeviceOffset(SrcMemId));
+  assert(Offset < Size_ && "Attempt to access data outside XRT memory");
+
+  xrt::bo *b = (xrt::bo *)(SrcMemId->mem_ptr);
+  assert(b != XRT_NULL_HANDLE && "No kernel handle?");
+  b->sync(XCL_BO_SYNC_BO_FROM_DEVICE, Bytes, Offset);
+  b->read(Destination, Bytes, Offset);
+}
+
+void XilinxXrtExternalRegion::CopyInMem(size_t Source, size_t Destination,
+                                        size_t Bytes) {
+  POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Copying 0x%zx bytes from 0x%zx "
+                             "to 0x%zx\n",
+                             Bytes, Source, Destination);
+  size_t SrcOffset = Source - PhysAddress_;
+  size_t DstOffset = Destination - PhysAddress_;
+  assert(SrcOffset < Size_ && (SrcOffset + Bytes) <= Size_ &&
+         "Attempt to access data outside XRT memory");
+  assert(DstOffset < Size_ && (DstOffset + Bytes) <= Size_ &&
+         "Attempt to access data outside XRT memory");
+//  assert(DeviceBuffer != XRT_NULL_HANDLE &&
+//         "No kernel handle; write before mapping?");
+/*
+  xrt::bo *b = (xrt::bo *)DeviceBuffer;
+  auto b_mapped = b->map();
+
+  b->sync(XCL_BO_SYNC_BO_FROM_DEVICE, Bytes, SrcOffset);
+  memcpy((char *)b_mapped + DstOffset, (char *)b_mapped + SrcOffset, Bytes);
+  b->sync(XCL_BO_SYNC_BO_TO_DEVICE, Bytes, DstOffset);
+*/}
diff --git a/lib/CL/devices/almaif/XilinxXrtExternalRegion.hh b/lib/CL/devices/almaif/XilinxXrtExternalRegion.hh
new file mode 100644
index 000000000..bf7f71e84
--- /dev/null
+++ b/lib/CL/devices/almaif/XilinxXrtExternalRegion.hh
@@ -0,0 +1,63 @@
+/* XilinxXrtExternalRegion.hh - Access external memory (DDR or HBM) of an XRT
+ device
+ *                        as AlmaIFRegion
+
+   Copyright (c) 2023 Topi Leppänen / Tampere University
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to
+   deal in the Software without restriction, including without limitation the
+   rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+   sell copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+   IN THE SOFTWARE.
+*/
+
+#ifndef POCL_XILINXXRTEXTERNALREGION_H
+#define POCL_XILINXXRTEXTERNALREGION_H
+
+#include <stdlib.h>
+
+#include "pocl_cl.h"
+
+class XilinxXrtExternalRegion {
+public:
+  XilinxXrtExternalRegion(size_t Address, size_t RegionSize, void *Device);
+
+  void CopyToMMAP(pocl_mem_identifier *DstMemId, const void *Source,
+                  size_t Bytes, size_t Offset);
+  void CopyFromMMAP(void *Destination, pocl_mem_identifier *SrcMemId,
+                    size_t Bytes, size_t Offset);
+  void CopyInMem(size_t Source, size_t Destination, size_t Bytes);
+
+  // Returns the offset of the allocated pointer in the Xrt address space
+  // used by the kernel
+  uint64_t pointerDeviceOffset(pocl_mem_identifier *P);
+  // Buffer allocation uses XRT buffer allocation API.
+  // This is done in order to support multiple distinct external memory
+  // types in Xilinx PCIe FPGAs (multiple HBM and DDR banks).
+  // The alternative of using our bufalloc library to map the entire memory
+  // banks as bufalloc-regions was found to have significant performance
+  // issues when buffers were being read and written via the XRT API.
+  // (Possibly the entire bufalloc-regions were being read/flushed when only
+  // parts of it were read or written, or something to that effect.)
+  cl_int allocateBuffer(pocl_mem_identifier *P, size_t Size);
+  void freeBuffer(pocl_mem_identifier *P);
+
+private:
+  size_t Size_;
+  size_t PhysAddress_;
+  void *XilinxXrtDeviceHandle_;
+};
+
+#endif
diff --git a/lib/CL/devices/almaif/XilinxXrtRegion.cc b/lib/CL/devices/almaif/XilinxXrtRegion.cc
new file mode 100644
index 000000000..6a7cbd9e8
--- /dev/null
+++ b/lib/CL/devices/almaif/XilinxXrtRegion.cc
@@ -0,0 +1,263 @@
+/* XilinxXrtRegion.cc - Access on-chip memory of an XRT device as AlmaIFRegion
+
+   Copyright (c) 2022 Topi Leppänen / Tampere University
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to
+   deal in the Software without restriction, including without limitation the
+   rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+   sell copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+   IN THE SOFTWARE.
+*/
+
+#include <assert.h>
+#include <stdlib.h>
+#include <unistd.h>
+// #include <stdio.h>
+#include <fstream>
+
+#include "experimental/xrt_ip.h"
+
+#include "XilinxXrtRegion.hh"
+#include "pocl_util.h"
+
+XilinxXrtRegion::XilinxXrtRegion(size_t Address, size_t RegionSize,
+                                 void *kernel, size_t DeviceOffset) {
+
+  POCL_MSG_PRINT_ALMAIF_MMAP(
+      "XRTMMAP: Initializing XilinxXrtRegion with Address %zu "
+      "and Size %zu and kernel %p and DeviceOffset 0x%zx\n",
+      Address, RegionSize, kernel, DeviceOffset);
+  PhysAddress_ = Address;
+  Size_ = RegionSize;
+  Kernel_ = kernel;
+  DeviceOffset_ = DeviceOffset;
+  assert(Kernel_ != XRT_NULL_HANDLE &&
+         "xrtKernelHandle NULL, is the kernel opened properly?");
+}
+
+XilinxXrtRegion::XilinxXrtRegion(size_t Address, size_t RegionSize,
+                                 void *kernel, const std::string &init_file,
+                                 size_t DeviceOffset)
+    : XilinxXrtRegion(Address, RegionSize, kernel, DeviceOffset) {
+
+  if (RegionSize == 0) {
+    return; // don't try to write to empty region
+  }
+  POCL_MSG_PRINT_ALMAIF_MMAP(
+      "XRTMMAP: Initializing XilinxXrtRegion with file %s\n",
+      init_file.c_str());
+  std::ifstream inFile;
+  inFile.open(init_file, std::ios::binary);
+  unsigned int current;
+  int i = 0;
+  while (inFile.good()) {
+    inFile.read(reinterpret_cast<char *>(&current), sizeof(current));
+
+    ((xrt::ip *)Kernel_)->write_register(Address + i - DeviceOffset_, current);
+    i += 4;
+  }
+
+  POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Initialized region with %i bytes \n",
+                             i - 4);
+}
+
+void XilinxXrtRegion::initRegion(const std::string &init_file) {
+  std::ifstream inFile;
+  inFile.open(init_file, std::ios::binary);
+  unsigned int current;
+  int i = 0;
+  while (inFile.good()) {
+    inFile.read(reinterpret_cast<char *>(&current), sizeof(current));
+    Write32(i, current);
+    i += 4;
+  }
+
+  POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Initialized region with %i bytes \n",
+                             i - 4);
+}
+
+uint32_t XilinxXrtRegion::Read32(size_t offset) {
+  POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Reading from region at 0x%zx with "
+                             "offset 0x%zx\n",
+                             PhysAddress_,
+                             PhysAddress_ + offset - DeviceOffset_);
+  assert(Kernel_ != XRT_NULL_HANDLE &&
+         "No kernel handle; read before mapping?");
+  assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer");
+  uint32_t value = ((xrt::ip *)Kernel_)
+                       ->read_register(PhysAddress_ + offset - DeviceOffset_);
+  return value;
+}
+
+void XilinxXrtRegion::Write32(size_t offset, uint32_t value) {
+  POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Writing to region at 0x%zx with "
+                             "offset 0x%zx\n",
+                             PhysAddress_,
+                             PhysAddress_ + offset - DeviceOffset_);
+  assert(Kernel_ != XRT_NULL_HANDLE &&
+         "No kernel handle; write before mapping?");
+  assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer");
+  ((xrt::ip *)Kernel_)
+      ->write_register(PhysAddress_ + offset - DeviceOffset_, value);
+}
+
+void XilinxXrtRegion::Write64(size_t offset, uint64_t value) {
+  POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Writing 64b to region at 0x%zx with "
+                             "offset 0x%zx\n",
+                             PhysAddress_,
+                             PhysAddress_ + offset - DeviceOffset_);
+  assert(Kernel_ != XRT_NULL_HANDLE &&
+         "No kernel handle; write before mapping?");
+  assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer");
+  ((xrt::ip *)Kernel_)
+      ->write_register(PhysAddress_ + offset - DeviceOffset_, value);
+  ((xrt::ip *)Kernel_)
+      ->write_register(PhysAddress_ + offset - DeviceOffset_ + 4, value >> 32);
+}
+
+void XilinxXrtRegion::Write16(size_t offset, uint16_t value) {
+  POCL_MSG_PRINT_ALMAIF_MMAP(
+      "XRTMMAP: Writing 16b to region at 0x%zx with "
+      "offset 0x%zx, DeviceOffset 0x%zx and total offset 0x%zx\n",
+      PhysAddress_, offset, DeviceOffset_,
+      PhysAddress_ + offset - DeviceOffset_);
+  assert(Kernel_ != XRT_NULL_HANDLE &&
+         "No kernel handle; write before mapping?");
+  assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer");
+
+  uint32_t old_value =
+      ((xrt::ip *)Kernel_)
+          ->read_register(PhysAddress_ + (offset & 0xFFFFFFFC) - DeviceOffset_);
+
+  uint32_t new_value = 0;
+  if ((offset & 0b10) == 0) {
+    new_value = (old_value & 0xFFFF0000) | (uint32_t)value;
+  } else {
+    new_value = ((uint32_t)value << 16) | (old_value & 0xFFFF);
+  }
+  ((xrt::ip *)Kernel_)
+      ->write_register(PhysAddress_ + (offset & 0xFFFFFFFC) - DeviceOffset_,
+                       new_value);
+}
+
+uint64_t XilinxXrtRegion::Read64(size_t offset) {
+  POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Reading 64b from region at 0x%zx with "
+                             "offset 0x%zx\n",
+                             PhysAddress_,
+                             PhysAddress_ + offset - DeviceOffset_);
+  assert(Kernel_ != XRT_NULL_HANDLE &&
+         "No kernel handle; write before mapping?");
+  assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer");
+  uint32_t value_low =
+      ((xrt::ip *)Kernel_)
+          ->read_register(PhysAddress_ + offset - DeviceOffset_);
+  uint32_t value_high =
+      ((xrt::ip *)Kernel_)
+          ->read_register(PhysAddress_ + offset - DeviceOffset_ + 4);
+  uint64_t value = ((uint64_t)value_high << 32) | value_low;
+  return value;
+}
+
+void XilinxXrtRegion::CopyToMMAP(size_t destination, const void *source,
+                                 size_t bytes) {
+  auto src = (uint32_t *)source;
+  size_t offset = destination - PhysAddress_;
+  POCL_MSG_PRINT_ALMAIF_MMAP(
+      "XRTMMAP: Writing 0x%zx bytes to buffer at region 0x%zx with "
+      "address 0x%zx and offset %zx\n",
+      bytes, PhysAddress_, destination, offset);
+  assert(offset < Size_ && "Attempt to access data outside XRT memory");
+
+  assert((offset & 0b11) == 0 &&
+         "Xrt copytommap destination must be 4 byte aligned");
+  assert(((size_t)src & 0b11) == 0 &&
+         "Xrt copytommap source must be 4 byte aligned");
+  assert((bytes % 4) == 0 && "Xrt copytommap size must be 4 byte multiple");
+
+  for (size_t i = 0; i < bytes / 4; ++i) {
+    ((xrt::ip *)Kernel_)
+        ->write_register(destination + 4 * i - DeviceOffset_, src[i]);
+  }
+}
+
+void XilinxXrtRegion::CopyFromMMAP(void *destination, size_t source,
+                                   size_t bytes) {
+  auto dst = (uint32_t *)destination;
+  size_t offset = source - PhysAddress_;
+  POCL_MSG_PRINT_ALMAIF_MMAP(
+      "XRTMMAP: Reading 0x%zx bytes from region at 0x%zx "
+      "with address 0x%zx and offset\n",
+      bytes, PhysAddress_, source, offset);
+  assert(offset < Size_ && "Attempt to access data outside XRT memory");
+  assert((offset & 0b11) == 0 &&
+         "Xrt copyfrommmap source must be 4 byte aligned");
+
+  switch (bytes) {
+  case 1: {
+    uint32_t value =
+        ((xrt::ip *)Kernel_)->read_register(source - DeviceOffset_);
+    *((uint8_t *)destination) = value;
+    break;
+  }
+  case 2: {
+    uint32_t value =
+        ((xrt::ip *)Kernel_)->read_register(source - DeviceOffset_);
+    *((uint16_t *)destination) = value;
+    break;
+  }
+  default: {
+    assert(((size_t)dst & 0b11) == 0 &&
+           "Xrt copyfrommmap destination must be 4 byte aligned");
+    size_t i;
+    for (i = 0; i < bytes / 4; ++i) {
+      dst[i] =
+          ((xrt::ip *)Kernel_)->read_register(source - DeviceOffset_ + 4 * i);
+    }
+    if ((bytes % 4) != 0) {
+      union value {
+        char bytes[4];
+        uint32_t full;
+      } value1;
+      value1.full =
+          ((xrt::ip *)Kernel_)->read_register(source - DeviceOffset_ + 4 * i);
+      for (int k = 0; k < (bytes % 4); k++) {
+        dst[i] = value1.bytes[k];
+      }
+    }
+  }
+  }
+}
+
+void XilinxXrtRegion::CopyInMem(size_t source, size_t destination,
+                                size_t bytes) {
+  POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Copying 0x%zx bytes from 0x%zx "
+                             "to 0x%zx\n",
+                             bytes, source, destination);
+  size_t src_offset = source - PhysAddress_;
+  size_t dst_offset = destination - PhysAddress_;
+  assert(src_offset < Size_ && (src_offset + bytes) <= Size_ &&
+         "Attempt to access data outside XRT memory");
+  assert(dst_offset < Size_ && (dst_offset + bytes) <= Size_ &&
+         "Attempt to access data outside XRT memory");
+  assert((bytes % 4) == 0 && "Xrt copyinmem size must be 4 byte multiple");
+  xrt::ip *k = (xrt::ip *)Kernel_;
+
+  for (size_t i = 0; i < bytes / 4; ++i) {
+    uint32_t m = k->read_register(source - DeviceOffset_ + 4 * i);
+    k->write_register(destination - DeviceOffset_ + 4 * i, m);
+  }
+}
+
+void XilinxXrtRegion::setKernelPtr(void *ptr) { Kernel_ = ptr; }
diff --git a/lib/CL/devices/almaif/XrtRegion.hh b/lib/CL/devices/almaif/XilinxXrtRegion.hh
similarity index 71%
rename from lib/CL/devices/almaif/XrtRegion.hh
rename to lib/CL/devices/almaif/XilinxXrtRegion.hh
index acc585eb3..d619c2e8c 100644
--- a/lib/CL/devices/almaif/XrtRegion.hh
+++ b/lib/CL/devices/almaif/XilinxXrtRegion.hh
@@ -1,5 +1,4 @@
-/* XrtRegion.hh - basic way of accessing accelerator memory.
- *                 as a memory mapped region
+/* XilinxXrtRegion.hh - Access on-chip memory of an XRT device as AlmaIFRegion
 
    Copyright (c) 2022 Topi Leppänen / Tampere University
 
@@ -22,8 +21,8 @@
    IN THE SOFTWARE.
 */
 
-#ifndef XRTREGION_H
-#define XRTREGION_H
+#ifndef XILINXXRTREGION_H
+#define XILINXXRTREGION_H
 
 #include <stdlib.h>
 
@@ -31,24 +30,32 @@
 
 #include "AlmaIFRegion.hh"
 
-class XrtRegion : public AlmaIFRegion {
+// Uses XRT's kernel IP API to abstract the on-chip memory
+// of an Almaif device
+class XilinxXrtRegion : public AlmaIFRegion {
 public:
-  XrtRegion(size_t Address, size_t RegionSize, void *kernel);
-  XrtRegion(size_t Address, size_t RegionSize, void *kernel, char *init_file);
+  XilinxXrtRegion(size_t Address, size_t RegionSize, void *kernel,
+                  size_t DeviceOffset);
+  XilinxXrtRegion(size_t Address, size_t RegionSize, void *kernel,
+                  const std::string &init_file, size_t DeviceOffset);
 
   uint32_t Read32(size_t offset) override;
   void Write32(size_t offset, uint32_t value) override;
   void Write16(size_t offset, uint16_t value) override;
   uint64_t Read64(size_t offset) override;
+  void Write64(size_t offset, uint64_t value) override;
 
   void CopyToMMAP(size_t destination, const void *source,
                   size_t bytes) override;
   void CopyFromMMAP(void *destination, size_t source, size_t bytes) override;
   void CopyInMem(size_t source, size_t destination, size_t bytes) override;
 
+  void initRegion(const std::string &init_file);
+  void setKernelPtr(void *ptr);
+
 private:
-  void *Kernel;
-  void *DeviceHandle;
+  void *Kernel_;
+  size_t DeviceOffset_;
 };
 
 #endif
diff --git a/lib/CL/devices/almaif/XrtDevice.cc b/lib/CL/devices/almaif/XrtDevice.cc
deleted file mode 100644
index 0cde026ea..000000000
--- a/lib/CL/devices/almaif/XrtDevice.cc
+++ /dev/null
@@ -1,75 +0,0 @@
-/* XrtDevice.cc - accessing accelerator memory as memory mapped region.
-
-   Copyright (c) 2022 Topi Leppänen / Tampere University
-
-   Permission is hereby granted, free of charge, to any person obtaining a copy
-   of this software and associated documentation files (the "Software"), to
-   deal in the Software without restriction, including without limitation the
-   rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-   sell copies of the Software, and to permit persons to whom the Software is
-   furnished to do so, subject to the following conditions:
-
-   The above copyright notice and this permission notice shall be included in
-   all copies or substantial portions of the Software.
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-   IN THE SOFTWARE.
-*/
-
-#include "XrtDevice.hh"
-
-#include "XrtRegion.hh"
-#include "AlmaifShared.hh"
-
-#include "experimental/xrt_kernel.h"
-
-XrtDevice::XrtDevice(char *xrt_kernel_name) {
-
-  unsigned xclbin_char_length = strlen(xrt_kernel_name) + 8;
-  char *xclbin_char = (char *)malloc(xclbin_char_length);
-  assert(xclbin_char);
-  snprintf(xclbin_char, xclbin_char_length, "%s.xclbin", xrt_kernel_name);
-
-  // TODO: Fix the case when the kernel name contains a path
-  // Needs to tokenize the last part of the path and use that
-  // as the kernel name
-  unsigned xrt_kernel_name_length = 2 * strlen(xrt_kernel_name) + 6;
-  char *xrt_kernel_name = (char *)malloc(xrt_kernel_name_length);
-  assert(xrt_kernel_name);
-  snprintf(xrt_kernel_name, xrt_kernel_name_length, "%s:{%s_1}",
-           xrt_kernel_name, xrt_kernel_name);
-
-  auto devicehandle = new xrt::device(0);
-  assert(devicehandle != NULL && "devicehandle null\n");
-
-  auto uuid = devicehandle->load_xclbin(xclbin_char);
-  auto kernel = new xrt::kernel(*devicehandle, uuid, xrt_kernel_name,
-                                xrt::kernel::cu_access_mode::exclusive);
-
-  free(xclbin_char);
-  free(xrt_kernel_name);
-
-  assert(kernel != XRT_NULL_HANDLE &&
-         "xrtKernelHandle NULL, is the kernel opened properly?");
-
-  Kernel = (void *)kernel;
-  DeviceHandle = (void *)devicehandle;
-
-  ControlMemory = new XrtRegion(0, ALMAIF_DEFAULT_CTRL_SIZE, Kernel);
-
-  discoverDeviceParameters();
-
-  InstructionMemory = new XrtRegion(imem_start, imem_size, Kernel);
-  CQMemory = new XrtRegion(cq_start, cq_size, Kernel);
-  DataMemory = new XrtRegion(dmem_start, dmem_size, Kernel);
-}
-
-XrtDevice::~XrtDevice() {
-  delete ((xrt::kernel *)Kernel);
-  delete ((xrt::device *)DeviceHandle);
-}
diff --git a/lib/CL/devices/almaif/XrtDevice.hh b/lib/CL/devices/almaif/XrtDevice.hh
deleted file mode 100644
index 110b8ab11..000000000
--- a/lib/CL/devices/almaif/XrtDevice.hh
+++ /dev/null
@@ -1,40 +0,0 @@
-/* XrtDevice.hh - basic way of accessing accelerator memory.
- *                 as a memory mapped region
-
-   Copyright (c) 2022 Topi Leppänen / Tampere University
-
-   Permission is hereby granted, free of charge, to any person obtaining a copy
-   of this software and associated documentation files (the "Software"), to
-   deal in the Software without restriction, including without limitation the
-   rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-   sell copies of the Software, and to permit persons to whom the Software is
-   furnished to do so, subject to the following conditions:
-
-   The above copyright notice and this permission notice shall be included in
-   all copies or substantial portions of the Software.
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-   IN THE SOFTWARE.
-*/
-
-#ifndef XrtDevice_H
-#define XrtDevice_H
-
-#include "AlmaIFDevice.hh"
-
-class XrtDevice : public AlmaIFDevice {
-public:
-  XrtDevice(char *xrt_kernel_name);
-  ~XrtDevice() override;
-
-private:
-  void *Kernel;
-  void *DeviceHandle;
-};
-
-#endif
diff --git a/lib/CL/devices/almaif/XrtRegion.cc b/lib/CL/devices/almaif/XrtRegion.cc
deleted file mode 100644
index e8a59402a..000000000
--- a/lib/CL/devices/almaif/XrtRegion.cc
+++ /dev/null
@@ -1,184 +0,0 @@
-/* XrtRegion.cc - accessing accelerator memory as memory mapped region.
-
-   Copyright (c) 2022 Topi Leppänen / Tampere University
-
-   Permission is hereby granted, free of charge, to any person obtaining a copy
-   of this software and associated documentation files (the "Software"), to
-   deal in the Software without restriction, including without limitation the
-   rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-   sell copies of the Software, and to permit persons to whom the Software is
-   furnished to do so, subject to the following conditions:
-
-   The above copyright notice and this permission notice shall be included in
-   all copies or substantial portions of the Software.
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-   IN THE SOFTWARE.
-*/
-
-#include <assert.h>
-#include <stdlib.h>
-#include <unistd.h>
-//#include <stdio.h>
-#include <fstream>
-
-#include "experimental/xrt_kernel.h"
-
-#include "XrtRegion.hh"
-#include "pocl_util.h"
-
-XrtRegion::XrtRegion(size_t Address, size_t RegionSize, void *kernel) {
-
-  POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Initializing XrtRegion with Address %zu "
-                            "and Size %zu and kernel %p\n",
-                            Address, RegionSize, kernel);
-  PhysAddress = Address;
-  Size = RegionSize;
-  Kernel = kernel;
-  assert(Kernel != XRT_NULL_HANDLE &&
-         "xrtKernelHandle NULL, is the kernel opened properly?");
-}
-
-XrtRegion::XrtRegion(size_t Address, size_t RegionSize, void *kernel,
-                     char *init_file)
-    : XrtRegion(Address, RegionSize, kernel) {
-
-  if (RegionSize == 0) {
-    return; // don't try to write to empty region
-  }
-  POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Initializing XrtRegion with file %s\n",
-                            init_file);
-  std::ifstream inFile;
-  inFile.open(init_file, std::ios::binary);
-  unsigned int current;
-  int i = 0;
-  while (inFile.good()) {
-    inFile.read(reinterpret_cast<char *>(&current), sizeof(current));
-
-    ((xrt::kernel *)Kernel)->write_register(Address + i, current);
-    i += 4;
-  }
-
-  POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Initialized region with %i bytes \n",
-                            i - 4);
-}
-
-uint32_t XrtRegion::Read32(size_t offset) {
-  POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Reading from physical address 0x%zx with "
-                            "offset 0x%zx\n",
-                            PhysAddress, offset);
-  assert(Kernel != XRT_NULL_HANDLE && "No kernel handle; read before mapping?");
-  assert(offset < Size && "Attempt to access data outside MMAP'd buffer");
-  uint32_t value = ((xrt::kernel *)Kernel)->read_register(PhysAddress + offset);
-  return value;
-}
-
-void XrtRegion::Write32(size_t offset, uint32_t value) {
-  POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Writing to physical address 0x%zx with "
-                            "offset 0x%zx\n",
-                            PhysAddress, offset);
-  assert(Kernel != XRT_NULL_HANDLE &&
-         "No kernel handle; write before mapping?");
-  assert(offset < Size && "Attempt to access data outside MMAP'd buffer");
-  ((xrt::kernel *)Kernel)->write_register(PhysAddress + offset, value);
-}
-
-void XrtRegion::Write16(size_t offset, uint16_t value) {
-  POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Writing to physical address 0x%zx with "
-                            "offset 0x%zx\n",
-                            PhysAddress, offset);
-  assert(Kernel != XRT_NULL_HANDLE &&
-         "No kernel handle; write before mapping?");
-  assert(offset < Size && "Attempt to access data outside MMAP'd buffer");
-
-  uint32_t old_value = ((xrt::kernel *)Kernel)
-                           ->read_register(PhysAddress + (offset & 0xFFFFFFFC));
-
-  uint32_t new_value = 0;
-  if ((offset & 0b10) == 0) {
-    new_value = (old_value & 0xFFFF0000) | (uint32_t)value;
-  } else {
-    new_value = ((uint32_t)value << 16) | (old_value & 0xFFFF);
-  }
-  ((xrt::kernel *)Kernel)
-      ->write_register(PhysAddress + (offset & 0xFFFFFFFC), new_value);
-}
-
-uint64_t XrtRegion::Read64(size_t offset) {
-  POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Reading from physical address 0x%zx with "
-                            "offset 0x%zx\n",
-                            PhysAddress, offset);
-  assert(Kernel != XRT_NULL_HANDLE &&
-         "No kernel handle; write before mapping?");
-  assert(offset < Size && "Attempt to access data outside MMAP'd buffer");
-  uint32_t value_low =
-      ((xrt::kernel *)Kernel)->read_register(PhysAddress + offset);
-  uint32_t value_high =
-      ((xrt::kernel *)Kernel)->read_register(PhysAddress + offset + 4);
-  uint64_t value = ((uint64_t)value_high << 32) | value_low;
-  return value;
-}
-
-void XrtRegion::CopyToMMAP(size_t destination, const void *source,
-                           size_t bytes) {
-  POCL_MSG_PRINT_ALMAIF_MMAP(
-      "XRTMMAP: Writing 0x%zx bytes to buffer at 0x%zx with "
-      "address 0x%zx\n",
-      bytes, PhysAddress, destination);
-  auto src = (uint32_t *)source;
-  size_t offset = destination - PhysAddress;
-  assert(offset < Size && "Attempt to access data outside XRT memory");
-
-  assert((offset & 0b11) == 0 &&
-         "Xrt copytommap destination must be 4 byte aligned");
-  assert(((size_t)src & 0b11) == 0 &&
-         "Xrt copytommap source must be 4 byte aligned");
-  assert((bytes % 4) == 0 && "Xrt copytommap size must be 4 byte multiple");
-
-  for (size_t i = 0; i < bytes / 4; ++i) {
-    ((xrt::kernel *)Kernel)->write_register(destination + 4 * i, src[i]);
-  }
-}
-
-void XrtRegion::CopyFromMMAP(void *destination, size_t source, size_t bytes) {
-  POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Reading 0x%zx bytes from buffer at 0x%zx "
-                            "with address 0x%zx\n",
-                            bytes, PhysAddress, source);
-  auto dst = (uint32_t *)destination;
-  size_t offset = source - PhysAddress;
-  assert(offset < Size && "Attempt to access data outside XRT memory");
-
-  assert((offset & 0b11) == 0 &&
-         "Xrt copyfrommmap source must be 4 byte aligned");
-  assert(((size_t)dst & 0b11) == 0 &&
-         "Xrt copyfrommmap destination must be 4 byte aligned");
-  assert((bytes % 4) == 0 && "Xrt copyfrommmap size must be 4 byte multiple");
-
-  for (size_t i = 0; i < bytes / 4; ++i) {
-    dst[i] = ((xrt::kernel *)Kernel)->read_register(source + 4 * i);
-  }
-}
-
-void XrtRegion::CopyInMem(size_t source, size_t destination, size_t bytes) {
-  POCL_MSG_PRINT_ALMAIF_MMAP("XRTMMAP: Copying 0x%zx bytes from 0x%zx "
-                            "to 0x%zx\n",
-                            bytes, source, destination);
-  size_t src_offset = source - PhysAddress;
-  size_t dst_offset = destination - PhysAddress;
-  assert(src_offset < Size && (src_offset + bytes) <= Size &&
-         "Attempt to access data outside XRT memory");
-  assert(dst_offset < Size && (dst_offset + bytes) <= Size &&
-         "Attempt to access data outside XRT memory");
-  assert((bytes % 4) == 0 && "Xrt copyinmem size must be 4 byte multiple");
-  xrt::kernel *k = (xrt::kernel *)Kernel;
-
-  for (size_t i = 0; i < bytes / 4; ++i) {
-    uint32_t m = k->read_register(source + 4 * i);
-    k->write_register(destination + 4 * i, m);
-  }
-}
diff --git a/lib/CL/devices/almaif/almaif.cc b/lib/CL/devices/almaif/almaif.cc
index dbac35348..147035112 100644
--- a/lib/CL/devices/almaif/almaif.cc
+++ b/lib/CL/devices/almaif/almaif.cc
@@ -27,9 +27,17 @@
 #include "AlmaIFRegion.hh"
 #include "MMAPDevice.hh"
 #include "config.h"
+
 #ifdef HAVE_XRT
-#include "XrtDevice.hh"
+#include "XilinxXrtDevice.hh"
+#define HAVE_DBDEVICE
+#endif
+
+#ifdef HAVE_DBDEVICE
+#include "AlmaifDB/AlmaIFBitstreamDatabaseManager.hh"
+#include "AlmaifDB/DBDevice.hh"
 #endif
+
 #include "EmulationDevice.hh"
 
 #ifdef TCE_AVAILABLE
@@ -42,7 +50,7 @@
 #include "common.h"
 #include "common_driver.h"
 #include "devices.h"
-#include "openasip/AlmaifCompileTCE.hh"
+#include "openasip/AlmaifCompileOpenasip.hh"
 #include "pocl_cl.h"
 #include "pocl_timing.h"
 #include "pocl_util.h"
@@ -131,21 +139,19 @@ void pocl_almaif_init_device_ops(struct pocl_device_ops *ops) {
 void pocl_almaif_write(void *data, const void *__restrict__ src_host_ptr,
                       pocl_mem_identifier *dst_mem_id, cl_mem dst_buf,
                       size_t offset, size_t size) {
-  chunk_info_t *chunk = (chunk_info_t *)dst_mem_id->mem_ptr;
-  size_t dst = chunk->start_address + offset;
   AlmaifData *d = (AlmaifData *)data;
 
-  d->Dev->writeDataToDevice(dst, (const char *__restrict)src_host_ptr, size);
+  d->Dev->writeDataToDevice(dst_mem_id, (const char *__restrict)src_host_ptr,
+                            size, offset);
 }
 
 void pocl_almaif_read(void *data, void *__restrict__ dst_host_ptr,
                      pocl_mem_identifier *src_mem_id, cl_mem src_buf,
                      size_t offset, size_t size) {
-  chunk_info_t *chunk = (chunk_info_t *)src_mem_id->mem_ptr;
-  size_t src = chunk->start_address + offset;
   AlmaifData *d = (AlmaifData *)data;
 
-  d->Dev->readDataFromDevice((char *__restrict__)dst_host_ptr, src, size);
+  d->Dev->readDataFromDevice((char *__restrict__)dst_host_ptr, src_mem_id, size,
+                             offset);
 }
 
 void pocl_almaif_copy(void *data, pocl_mem_identifier *dst_mem_id,
@@ -157,6 +163,9 @@ void pocl_almaif_copy(void *data, pocl_mem_identifier *dst_mem_id,
   chunk_info_t *dst_chunk = (chunk_info_t *)dst_mem_id->mem_ptr;
   size_t src = src_chunk->start_address + src_offset;
   size_t dst = dst_chunk->start_address + dst_offset;
+  if (src == dst) {
+    return;
+  }
   AlmaifData *d = (AlmaifData *)data;
 
   if (d->Dev->DataMemory->isInRange(dst)) {
@@ -195,41 +204,24 @@ cl_int pocl_almaif_alloc_mem_obj(cl_device_id device, cl_mem mem_obj,
                                 void *host_ptr) {
 
   AlmaifData *data = (AlmaifData *)device->data;
-  pocl_mem_identifier *p = &mem_obj->device_ptrs[device->global_mem_id];
-  assert(p->mem_ptr == NULL);
-  chunk_info_t *chunk = NULL;
 
   /* almaif driver doesn't preallocate */
   if ((mem_obj->flags & CL_MEM_ALLOC_HOST_PTR) && (mem_obj->mem_host_ptr == NULL))
     return CL_MEM_OBJECT_ALLOCATION_FAILURE;
 
-  chunk = pocl_alloc_buffer(data->Dev->AllocRegions, mem_obj->size);
-  if (chunk == NULL)
-    return CL_MEM_OBJECT_ALLOCATION_FAILURE;
-
-  POCL_MSG_PRINT_MEMORY("almaif: allocated %zu bytes from 0x%zx\n",
-                        mem_obj->size, chunk->start_address);
-
-  p->mem_ptr = chunk;
-  p->version = 0;
+  pocl_mem_identifier *p = &mem_obj->device_ptrs[device->global_mem_id];
+  cl_int alloc_success = data->Dev->allocateBuffer(p, mem_obj->size);
 
-  return CL_SUCCESS;
+  return alloc_success;
 }
 
 
 void pocl_almaif_free(cl_device_id device, cl_mem mem) {
 
   pocl_mem_identifier *p = &mem->device_ptrs[device->global_mem_id];
-  //AlmaifData *data = (AlmaifData *)device->data;
-
-  chunk_info_t *chunk =
-      (chunk_info_t *)p->mem_ptr;
-
-  POCL_MSG_PRINT_MEMORY("almaif: freed %zu bytes from 0x%zx\n", mem->size,
-                        chunk->start_address);
+  AlmaifData *data = (AlmaifData *)device->data;
 
-  assert(chunk != NULL);
-  pocl_free_chunk(chunk);
+  data->Dev->freeBuffer(p);
 
   p->mem_ptr = NULL;
   p->version = 0;
@@ -264,9 +256,9 @@ cl_int pocl_almaif_init(unsigned j, cl_device_id dev, const char *parameters) {
   SETUP_DEVICE_CL_VERSION(1, 2);
   dev->type = CL_DEVICE_TYPE_CUSTOM;
   dev->long_name = (char *)"memory mapped custom device";
+  dev->short_name = "almaif";
   dev->vendor = "pocl";
   dev->version = "1.2";
-  dev->available = CL_TRUE;
   dev->extensions = "";
   dev->profile = "FULL_PROFILE";
 
@@ -305,6 +297,8 @@ cl_int pocl_almaif_init(unsigned j, cl_device_id dev, const char *parameters) {
   dev->preferred_wg_size_multiple = 8;
 
   AlmaifData *D = new AlmaifData;
+  D->Available = CL_TRUE;
+  dev->available = &(D->Available);
   dev->data = (void *)D;
 
   char *scanParams;
@@ -322,74 +316,51 @@ cl_int pocl_almaif_init(unsigned j, cl_device_id dev, const char *parameters) {
   D->BaseAddress = strtoull(paramToken, NULL, 0);
 
   std::string supportedList;
-  char *device_init_file = NULL;
-  if (D->BaseAddress != 0xE) {
+  std::string device_init_file = "";
+  if (D->BaseAddress != POCL_ALMAIFDEVICE_EMULATION) {
     paramToken = strtok_r(NULL, ",", &savePtr);
     assert(paramToken);
-    device_init_file = (char *)malloc(strlen(paramToken) + 1);
-    assert(device_init_file);
-    strcpy(device_init_file, paramToken);
-    POCL_MSG_PRINT_ALMAIF("Enabling device with device init file name %s",
-                          device_init_file);
+    device_init_file = paramToken;
+    POCL_MSG_PRINT_ALMAIF("Enabling device with device init file name %s\n",
+                          device_init_file.c_str());
   }
 
   bool enable_compilation = false;
 
-  while ((paramToken = strtok_r(NULL, ",", &savePtr))) {
-    auto token = strtoul(paramToken, NULL, 0);
-    BuiltinKernelId kernelId = static_cast<BuiltinKernelId>(token);
-
-    bool found = false;
-    for (size_t i = 0; i < BIKERNELS; ++i) {
-      if (pocl_BIDescriptors[i].KernelId == kernelId) {
-        if (supportedList.size() > 0)
-          supportedList += ";";
-        supportedList += pocl_BIDescriptors[i].name;
-        D->SupportedKernels.insert(&pocl_BIDescriptors[i]);
-        found = true;
-        break;
-      }
-    }
-    if (kernelId == POCL_CDBI_JIT_COMPILER) {
-      enable_compilation = true;
-    } else if (!found) {
-      POCL_ABORT("almaif: Unknown Kernel ID (%lu) given\n", token);
-    }
-  }
-  free(scanParams);
-
   // almaif devices are little endian by default, but the emulation device is
   // host dependant
-  dev->endian_little = D->BaseAddress == 0xE ? !(WORDS_BIGENDIAN) : CL_TRUE;
-  if (D->BaseAddress == 0xE) {
+  dev->endian_little = D->BaseAddress == POCL_ALMAIFDEVICE_EMULATION
+                           ? !(WORDS_BIGENDIAN)
+                           : CL_TRUE;
+  if (D->BaseAddress == POCL_ALMAIFDEVICE_EMULATION) {
     dev->long_name = (char *)"almaif emulation device";
   }
 
-  dev->builtin_kernel_list = strdup(supportedList.c_str());
-  dev->num_builtin_kernels = D->SupportedKernels.size();
-  pocl_setup_builtin_kernels_with_version(dev);
-
   if (!pocl_offline_compile) {
 
-    POCL_MSG_PRINT_ALMAIF(
-        "almaif: accelerator at 0x%zx with %zu builtin kernels (%s)\n",
-        D->BaseAddress, D->SupportedKernels.size(), dev->builtin_kernel_list);
     // Recognize whether we are emulating or not
-    if (D->BaseAddress == EMULATING_ADDRESS) {
+    if (D->BaseAddress == POCL_ALMAIFDEVICE_EMULATION) {
       D->Dev = new EmulationDevice();
-    }
+    } else if (D->BaseAddress == POCL_ALMAIFDEVICE_XRT) {
 #ifdef HAVE_XRT
-    else if (D->BaseAddress == 0xA) {
-      D->Dev = new XrtDevice(device_init_file);
-    }
+      D->Dev = new XilinxXrtDevice(device_init_file, j);
+#else
+      POCL_ABORT(
+          "Almaif: tried enabling XilinxXrtDevice but it's not available\n");
 #endif
-    else if (D->BaseAddress == 0xB) {
+    } else if (D->BaseAddress == POCL_ALMAIFDEVICE_TTASIM) {
 #ifdef TCE_AVAILABLE
       D->Dev = new TTASimDevice(device_init_file);
       enable_compilation = true;
 #else
       POCL_ABORT("almaif: Tried enabling TTASim device, but it's not available. "
                  "Did you set ENABLE_TCE=1?\n");
+#endif
+    } else if (D->BaseAddress == POCL_ALMAIFDEVICE_BITSTREAMDATABASE) {
+#ifdef HAVE_DBDEVICE
+      D->Dev = new DBDevice(device_init_file);
+#else
+      POCL_ABORT("Almaif: tried enabling DBDevice but it's not available\n");
 #endif
     } else {
       D->Dev = new MMAPDevice(D->BaseAddress, device_init_file);
@@ -405,26 +376,81 @@ cl_int pocl_almaif_init(unsigned j, cl_device_id dev, const char *parameters) {
       }
       POCL_UNLOCK(globalMemIDLock);
     }
-    dev->global_mem_size = D->Dev->DataMemory->Size;
+    dev->global_mem_size = D->Dev->DataMemory->Size();
     if (D->Dev->ExternalMemory != nullptr &&
-        D->Dev->ExternalMemory->Size > D->Dev->DataMemory->Size)
-      dev->global_mem_size = D->Dev->ExternalMemory->Size;
+        D->Dev->ExternalMemory->Size() > D->Dev->DataMemory->Size())
+      dev->global_mem_size = D->Dev->ExternalMemory->Size();
 
   } else {
     POCL_MSG_PRINT_ALMAIF(
         "Starting offline compilation device initialization\n");
   }
 
+  if (D->Dev->isDBDevice()) {
+#ifdef HAVE_DBDEVICE
+    std::vector<BuiltinKernelId> bik_list =
+        ((DBDevice *)(D->Dev))->supportedBuiltinKernels();
+
+    for (const BuiltinKernelId &kernelId : bik_list) {
+
+      bool found = false;
+      for (size_t i = 0; i < BIKERNELS; ++i) {
+        if (pocl_BIDescriptors[i].KernelId == kernelId) {
+          if (supportedList.size() > 0)
+            supportedList += ";";
+          supportedList += pocl_BIDescriptors[i].name;
+          D->SupportedKernels.insert(&pocl_BIDescriptors[i]);
+          found = true;
+          break;
+        }
+      }
+      if (kernelId == POCL_CDBI_JIT_COMPILER) {
+        enable_compilation = true;
+      } else if (!found) {
+        POCL_ABORT("almaif: Unknown Kernel ID (%lu) coming from database\n",
+                   kernelId);
+      }
+    }
+#endif
+  } else {
+    while ((paramToken = strtok_r(NULL, ",", &savePtr))) {
+      auto token = strtoul(paramToken, NULL, 0);
+      BuiltinKernelId kernelId = static_cast<BuiltinKernelId>(token);
+
+      bool found = false;
+      for (size_t i = 0; i < BIKERNELS; ++i) {
+        if (pocl_BIDescriptors[i].KernelId == kernelId) {
+          if (supportedList.size() > 0)
+            supportedList += ";";
+          supportedList += pocl_BIDescriptors[i].name;
+          D->SupportedKernels.insert(&pocl_BIDescriptors[i]);
+          found = true;
+          break;
+        }
+      }
+      if (kernelId == POCL_CDBI_JIT_COMPILER) {
+        enable_compilation = true;
+      } else if (!found) {
+        POCL_ABORT("almaif: Unknown Kernel ID (%lu) given\n", token);
+      }
+    }
+  }
+
+  dev->builtin_kernel_list = strdup(supportedList.c_str());
+  dev->num_builtin_kernels = D->SupportedKernels.size();
+  pocl_setup_builtin_kernels_with_version(dev);
+  POCL_MSG_PRINT_ALMAIF(
+      "almaif: accelerator at 0x%zx with %zu builtin kernels (%s)\n",
+      D->BaseAddress, D->SupportedKernels.size(), dev->builtin_kernel_list);
+
+  free(scanParams);
+
   if (enable_compilation) {
 
     dev->compiler_available = CL_TRUE;
     dev->linker_available = CL_TRUE;
-    unsigned adf_file_length = strlen(device_init_file) + 5;
-    char *adf_file = (char *)malloc(adf_file_length);
-    assert(adf_file);
-    snprintf(adf_file, adf_file_length, "%s.adf", device_init_file);
+    std::string adf_file = device_init_file + ".adf";
     pocl_almaif_compile_init(j, dev, adf_file);
-    free(adf_file);
 
   } else {
     D->compilationData = NULL;
@@ -432,26 +458,45 @@ cl_int pocl_almaif_init(unsigned j, cl_device_id dev, const char *parameters) {
     dev->linker_available = CL_FALSE;
   }
 
-  free(device_init_file);
+  dev->device_side_printf = 1;
+  dev->printf_buffer_size = PRINTF_BUFFER_SIZE / 4;
+  chunk_info_t *chunk = NULL;
+  chunk = pocl_alloc_buffer(D->Dev->AllocRegions, dev->printf_buffer_size);
+  if (chunk == NULL) {
+    POCL_MSG_WARN("Almaif: Can't allocate %d bytes for printf buffer\n",
+                  dev->printf_buffer_size);
+    dev->device_side_printf = 0;
+  } else {
+    POCL_MSG_PRINT_ALMAIF("Allocated printf buffer of size %d from %d\n",
+                          dev->printf_buffer_size, chunk->start_address);
+    D->PrintfBuffer = chunk;
+
+    D->PrintfPosition = pocl_alloc_buffer(D->Dev->AllocRegions, 4);
+    if (D->PrintfPosition == NULL) {
+      POCL_ABORT("Almaif: Can't allocate 4 bytes for printf index\n");
+    }
+  }
 
   POCL_MSG_PRINT_ALMAIF("almaif: mmap done\n");
   if (pocl_offline_compile) {
     std::cout << "Offline compilation device initialized" << std::endl;
     return CL_SUCCESS;
   }
-  for (unsigned i = 0; i < (D->Dev->DataMemory->Size >> 2); i++) {
+  for (unsigned i = 0; i < (D->Dev->DataMemory->Size() >> 2); i++) {
     //    D->Dev->DataMemory->Write32(4 * i, 0);
   }
-  for (unsigned i = 0; i < (D->Dev->CQMemory->Size >> 2); i++) {
+  for (unsigned i = 0; i < (D->Dev->CQMemory->Size() >> 2); i++) {
     //    D->Dev->CQMemory->Write32(4 * i, 0);
   }
   // Initialize AQL queue by setting all headers to invalid
   POCL_MSG_PRINT_ALMAIF("Initializing AQL Packet cqmemory size=%zu\n",
-                       D->Dev->CQMemory->Size);
-  for (uint32_t i = AQL_PACKET_LENGTH; i < D->Dev->CQMemory->Size;
+                        D->Dev->CQMemory->Size());
+  for (uint32_t i = AQL_PACKET_LENGTH; i < D->Dev->CQMemory->Size();
        i += AQL_PACKET_LENGTH) {
     D->Dev->CQMemory->Write16(i, AQL_PACKET_INVALID);
   }
+  D->Dev->CQMemory->Write32(ALMAIF_CQ_WRITE, 0);
+  D->Dev->CQMemory->Write32(ALMAIF_CQ_READ, 0);
 
 #ifdef ALMAIF_DUMP_MEMORY
   POCL_MSG_PRINT_ALMAIF("INIT MEMORY DUMP\n");
@@ -475,7 +520,7 @@ cl_int pocl_almaif_init(unsigned j, cl_device_id dev, const char *parameters) {
   runningDeviceCount++;
   POCL_UNLOCK(runningDeviceLock);
 
-  if (D->BaseAddress == EMULATING_ADDRESS) {
+  if (D->BaseAddress == POCL_ALMAIFDEVICE_EMULATION) {
     POCL_MSG_PRINT_ALMAIF("Custom emulation device %d initialized \n", j);
   } else {
     POCL_MSG_PRINT_ALMAIF("Custom device %d initialized \n", j);
@@ -539,7 +584,7 @@ void pocl_almaif_update_event(cl_device_id device, cl_event event) {
       assert(ed);
       size_t commandMetaAddress = ed->chunk->start_address;
       assert(commandMetaAddress);
-      commandMetaAddress -= D->Dev->DataMemory->PhysAddress;
+      commandMetaAddress -= D->Dev->DataMemory->PhysAddress();
 
       timestamp.u32.a = D->Dev->DataMemory->Read32(
           commandMetaAddress + offsetof(CommandMetadata, start_timestamp));
@@ -570,20 +615,22 @@ void pocl_almaif_update_event(cl_device_id device, cl_event event) {
       }
 
       if (device->device_side_printf) {
-        chunk_info_t *printf_buffer_chunk = (chunk_info_t *)D->printf_buffer;
-        assert(printf_buffer_chunk);
-        chunk_info_t *printf_position_chunk =
-            (chunk_info_t *)D->printf_position;
-        assert(printf_position_chunk);
-        unsigned position = 0;
-        D->Dev->readDataFromDevice((char *)&position,
-                                   printf_position_chunk->start_address, 4);
+        chunk_info_t *PrintfBufferChunk = (chunk_info_t *)D->PrintfBuffer;
+        assert(PrintfBufferChunk);
+        chunk_info_t *PrintfPositionChunk = (chunk_info_t *)D->PrintfPosition;
+        assert(PrintfPositionChunk);
+        unsigned position =
+            D->Dev->DataMemory->Read32(PrintfPositionChunk->start_address -
+                                       D->Dev->DataMemory->PhysAddress());
         POCL_MSG_PRINT_ALMAIF(
             "Device wrote %u bytes to stdout. Printing them now:\n", position);
         if (position > 0) {
           char *tmp_printf_buf = (char *)malloc(position);
-          D->Dev->readDataFromDevice(
-              tmp_printf_buf, printf_buffer_chunk->start_address, position);
+          D->Dev->DataMemory->CopyFromMMAP(
+              tmp_printf_buf, PrintfBufferChunk->start_address, position);
+          D->Dev->DataMemory->Write32(PrintfPositionChunk->start_address -
+                                          D->Dev->DataMemory->PhysAddress(),
+                                      0);
           write(STDOUT_FILENO, tmp_printf_buf, position);
           free(tmp_printf_buf);
         }
@@ -786,6 +833,14 @@ void scheduleNDRange(AlmaifData *data, _cl_command_node *cmd, size_t arg_size,
       break;
     }
   }
+#ifdef HAVE_DBDEVICE
+  if (data->Dev->isDBDevice()) {
+    ((DBDevice *)(data->Dev))
+        ->programBIKernelBitstream((BuiltinKernelId)kernelID);
+    ((DBDevice *)(data->Dev))
+        ->programBIKernelFirmware((BuiltinKernelId)kernelID);
+  }
+#endif
 
   if (kernelID == -1) {
     if (data->compilationData == NULL) {
@@ -824,7 +879,23 @@ void scheduleNDRange(AlmaifData *data, _cl_command_node *cmd, size_t arg_size,
   // clear the timestamps and initial signal value
   for (unsigned offset = 0; offset < sizeof(CommandMetadata); offset += 4)
     data->Dev->DataMemory->Write32(
-        commandMetaAddress - data->Dev->DataMemory->PhysAddress + offset, 0);
+        commandMetaAddress - data->Dev->DataMemory->PhysAddress() + offset, 0);
+  if (cmd->device->device_side_printf) {
+    data->Dev->DataMemory->Write32(
+        commandMetaAddress - data->Dev->DataMemory->PhysAddress() +
+            offsetof(CommandMetadata, reserved0),
+        ((chunk_info_t *)data->PrintfBuffer)->start_address);
+    data->Dev->DataMemory->Write32(commandMetaAddress -
+                                       data->Dev->DataMemory->PhysAddress() +
+                                       offsetof(CommandMetadata, reserved1),
+                                   cmd->device->printf_buffer_size);
+
+    data->Dev->DataMemory->Write32(
+        commandMetaAddress - data->Dev->DataMemory->PhysAddress() +
+            offsetof(CommandMetadata, reserved1) + 4,
+        ((chunk_info_t *)data->PrintfPosition)->start_address);
+  }
+
   // Set arguments
   data->Dev->DataMemory->CopyToMMAP(argsAddress, arguments, arg_size);
 
@@ -862,19 +933,19 @@ void scheduleNDRange(AlmaifData *data, _cl_command_node *cmd, size_t arg_size,
     pc.global_var_buffer = 0;
 
     if (cmd->device->device_side_printf) {
-      pc.printf_buffer = ((chunk_info_t *)data->printf_buffer)->start_address;
+      pc.printf_buffer = ((chunk_info_t *)data->PrintfBuffer)->start_address;
       pc.printf_buffer_capacity = cmd->device->printf_buffer_size;
       assert(pc.printf_buffer_capacity);
 
       pc.printf_buffer_position =
-          ((chunk_info_t *)data->printf_position)->start_address;
+          ((chunk_info_t *)data->PrintfPosition)->start_address;
       POCL_MSG_PRINT_ALMAIF(
           "Device side printf buffer=%d, position: %d and capacity %d \n",
           pc.printf_buffer, pc.printf_buffer_position,
           pc.printf_buffer_capacity);
 
       data->Dev->DataMemory->Write32(
-          pc.printf_buffer_position - data->Dev->DataMemory->PhysAddress, 0);
+          pc.printf_buffer_position - data->Dev->DataMemory->PhysAddress(), 0);
     }
 
     size_t pc_start_addr = data->compilationData->pocl_context->start_address;
@@ -882,7 +953,7 @@ void scheduleNDRange(AlmaifData *data, _cl_command_node *cmd, size_t arg_size,
                                       sizeof(pocl_context32));
 
     if (data->Dev->RelativeAddressing) {
-      pc_start_addr -= data->Dev->DataMemory->PhysAddress;
+      pc_start_addr -= data->Dev->DataMemory->PhysAddress();
     }
 
     packet.reserved = pc_start_addr;
@@ -895,9 +966,9 @@ void scheduleNDRange(AlmaifData *data, _cl_command_node *cmd, size_t arg_size,
   }
 
   if (data->Dev->RelativeAddressing) {
-    packet.kernarg_address = argsAddress - data->Dev->DataMemory->PhysAddress;
+    packet.kernarg_address = argsAddress - data->Dev->DataMemory->PhysAddress();
     packet.command_meta_address =
-        commandMetaAddress - data->Dev->DataMemory->PhysAddress;
+        commandMetaAddress - data->Dev->DataMemory->PhysAddress();
   } else {
     packet.kernarg_address = argsAddress;
     packet.command_meta_address = commandMetaAddress;
@@ -908,20 +979,24 @@ void scheduleNDRange(AlmaifData *data, _cl_command_node *cmd, size_t arg_size,
                        packet.kernarg_address, packet.command_meta_address);
 
   POCL_LOCK(data->AQLQueueLock);
-  uint32_t queue_length = data->Dev->CQMemory->Size / AQL_PACKET_LENGTH - 1;
+  uint32_t queue_length = data->Dev->CQMemory->Size() / AQL_PACKET_LENGTH - 1;
 
   uint32_t write_iter = data->Dev->CQMemory->Read32(ALMAIF_CQ_WRITE);
   uint32_t read_iter = data->Dev->CQMemory->Read32(ALMAIF_CQ_READ);
   while (write_iter >= read_iter + queue_length) {
-    // POCL_MSG_PRINT_ALMAIF("write_iter=%u, read_iter=%u length=%u", write_iter,
-    // read_iter, queue_length);
+    POCL_MSG_PRINT_ALMAIF("write_iter=%u, read_iter=%u length=%u", write_iter,
+                          read_iter, queue_length);
     usleep(ALMAIF_DRIVER_SLEEP);
     read_iter = data->Dev->CQMemory->Read32(ALMAIF_CQ_READ);
+#ifdef ALMAIF_DUMP_MEMORY
+    POCL_MSG_PRINT_ALMAIF("WAITING FOR CQMEMORY TO EMPTY DUMP\n");
+    data->Dev->printMemoryDump();
+#endif
   }
   uint32_t packet_loc =
       (write_iter % queue_length) * AQL_PACKET_LENGTH + AQL_PACKET_LENGTH;
-  data->Dev->CQMemory->CopyToMMAP(packet_loc + data->Dev->CQMemory->PhysAddress,
-                                  &packet, 64);
+  data->Dev->CQMemory->CopyToMMAP(
+      packet_loc + data->Dev->CQMemory->PhysAddress(), &packet, 64);
 
 #ifdef ALMAIF_DUMP_MEMORY
   POCL_MSG_PRINT_ALMAIF("PRELAUNCH MEMORY DUMP\n");
@@ -956,7 +1031,7 @@ bool isEventDone(AlmaifData *data, cl_event event) {
   assert(commandMetaAddress);
   size_t signalAddress =
       commandMetaAddress + offsetof(CommandMetadata, completion_signal);
-  signalAddress -= data->Dev->DataMemory->PhysAddress;
+  signalAddress -= data->Dev->DataMemory->PhysAddress();
 
   uint32_t status = data->Dev->DataMemory->Read32(signalAddress);
 
@@ -1049,19 +1124,19 @@ void submit_and_barrier(AlmaifData *D, _cl_command_node *cmd) {
     packet.signal_count = i + 1;
 
     POCL_LOCK(D->AQLQueueLock);
-    uint32_t queue_length = D->Dev->CQMemory->Size / AQL_PACKET_LENGTH - 1;
+    uint32_t queue_length = D->Dev->CQMemory->Size() / AQL_PACKET_LENGTH - 1;
 
     uint32_t write_iter = D->Dev->CQMemory->Read32(ALMAIF_CQ_WRITE);
     uint32_t read_iter = D->Dev->CQMemory->Read32(ALMAIF_CQ_READ);
     while (write_iter >= read_iter + queue_length) {
-      // POCL_MSG_PRINT_ALMAIF("write_iter=%u, read_iter=%u length=%u",
-      // write_iter, read_iter, queue_length);
+      POCL_MSG_PRINT_ALMAIF("write_iter=%u, read_iter=%u length=%u", write_iter,
+                            read_iter, queue_length);
       read_iter = D->Dev->CQMemory->Read32(ALMAIF_CQ_READ);
       usleep(ALMAIF_DRIVER_SLEEP);
     }
     uint32_t packet_loc =
         (write_iter % queue_length) * AQL_PACKET_LENGTH + AQL_PACKET_LENGTH;
-    D->Dev->CQMemory->CopyToMMAP(packet_loc + D->Dev->CQMemory->PhysAddress,
+    D->Dev->CQMemory->CopyToMMAP(packet_loc + D->Dev->CQMemory->PhysAddress(),
                                  &packet, 64);
 
     D->Dev->CQMemory->Write16(packet_loc, (1 << AQL_PACKET_BARRIER_AND) |
@@ -1120,15 +1195,14 @@ void submit_kernel_packet(AlmaifData *D, _cl_command_node *cmd) {
         // almaif doesn't support SVM pointers
         assert(al->is_svm == 0);
         cl_mem m = (*(cl_mem *)(al->value));
-        auto chunk =
-            (chunk_info_t *)m->device_ptrs[cmd->device->global_mem_id].mem_ptr;
-        size_t buffer = (size_t)chunk->start_address;
+        size_t buffer = D->Dev->pointerDeviceOffset(
+            &(m->device_ptrs[cmd->device->global_mem_id]));
         buffer += al->offset;
         if (D->Dev->RelativeAddressing) {
           if (D->Dev->DataMemory->isInRange(buffer)) {
-            buffer -= D->Dev->DataMemory->PhysAddress;
+            buffer -= D->Dev->DataMemory->PhysAddress();
           } else if (D->Dev->ExternalMemory->isInRange(buffer)) {
-            buffer -= D->Dev->ExternalMemory->PhysAddress;
+            buffer -= D->Dev->ExternalMemory->PhysAddress();
           } else {
             POCL_ABORT("almaif: buffer outside of memory");
           }
@@ -1165,6 +1239,7 @@ void pocl_almaif_free_event_data(cl_event event) {
 }
 
 void *runningThreadFunc(void *) {
+  int counter = 0;
   while (!runningJoinRequested) {
     POCL_LOCK(runningLock);
     if (runningList) {
@@ -1183,6 +1258,43 @@ void *runningThreadFunc(void *) {
           POCL_UPDATE_EVENT_COMPLETE_MSG(E, "Almaif, asynchronous NDRange    ");
           POCL_LOCK(runningLock);
         }
+
+#ifdef ALMAIF_DUMP_MEMORY
+        if ((counter % 3) == 0) {
+          if (Node->device->device_side_printf) {
+            chunk_info_t *PrintfBufferChunk = (chunk_info_t *)AD->PrintfBuffer;
+            assert(PrintfBufferChunk);
+            chunk_info_t *PrintfPositionChunk =
+                (chunk_info_t *)AD->PrintfPosition;
+            assert(PrintfPositionChunk);
+            unsigned position =
+                AD->Dev->DataMemory->Read32(PrintfPositionChunk->start_address -
+                                            AD->Dev->DataMemory->PhysAddress());
+            POCL_MSG_PRINT_ALMAIF(
+                "Device wrote %u bytes to stdout. Printing them now:\n",
+                position);
+            if (position > 0) {
+              char *tmp_printf_buf = (char *)malloc(position);
+              AD->Dev->DataMemory->CopyFromMMAP(
+                  tmp_printf_buf, PrintfBufferChunk->start_address, position);
+              write(STDOUT_FILENO, tmp_printf_buf, position);
+              free(tmp_printf_buf);
+            }
+          }
+        } else {
+          uint32_t pc = AD->Dev->ControlMemory->Read32(ALMAIF_STATUS_REG_PC);
+          uint64_t cc =
+              AD->Dev->ControlMemory->Read64(ALMAIF_STATUS_REG_CC_LOW);
+          uint64_t sc =
+              AD->Dev->ControlMemory->Read64(ALMAIF_STATUS_REG_SC_LOW);
+          POCL_MSG_PRINT_ALMAIF(
+              "PC:%" PRId32 " CC:%" PRId64 " SC:%" PRId64 "\n", pc, cc, sc);
+
+          POCL_MSG_PRINT_ALMAIF("RUNNING MEMORY DUMP\n");
+          AD->Dev->printMemoryDump();
+        }
+#endif
+        counter++;
       }
     }
     POCL_UNLOCK(runningLock);
@@ -1202,8 +1314,6 @@ void pocl_almaif_copy_rect(void *data, pocl_mem_identifier *dst_mem_id,
                           size_t const src_row_pitch,
                           size_t const src_slice_pitch) {
   AlmaifData *d = (AlmaifData *)data;
-  chunk_info_t *src_chunk = (chunk_info_t *)src_mem_id->mem_ptr;
-  chunk_info_t *dst_chunk = (chunk_info_t *)dst_mem_id->mem_ptr;
 
   size_t src_offset = src_origin[0] + src_row_pitch * src_origin[1] +
                       src_slice_pitch * src_origin[2];
@@ -1218,15 +1328,12 @@ void pocl_almaif_copy_rect(void *data, pocl_mem_identifier *dst_mem_id,
     for (j = 0; j < region[1]; ++j)
       for (i = 0; i < region[0]; i++) {
         char val;
-        d->Dev->readDataFromDevice(&val,
-                                   src_chunk->start_address + src_offset +
-                                       src_row_pitch * j + src_slice_pitch * k +
-                                       i,
-                                   1);
-        d->Dev->writeDataToDevice(dst_chunk->start_address + dst_offset +
-                                      dst_row_pitch * j + dst_slice_pitch * k +
-                                      i,
-                                  &val, 1);
+        d->Dev->readDataFromDevice(&val, src_mem_id, 1,
+                                   src_offset + src_row_pitch * j +
+                                       src_slice_pitch * k + i);
+        d->Dev->writeDataToDevice(dst_mem_id, &val, 1,
+                                  dst_offset + dst_row_pitch * j +
+                                      dst_slice_pitch * k + i);
       }
 }
 
@@ -1240,10 +1347,9 @@ void pocl_almaif_write_rect(void *data, const void *__restrict__ src_host_ptr,
                            size_t const host_row_pitch,
                            size_t const host_slice_pitch) {
   AlmaifData *d = (AlmaifData *)data;
-  chunk_info_t *dst_chunk = (chunk_info_t *)dst_mem_id->mem_ptr;
-  size_t adjusted_dst_ptr = dst_chunk->start_address + buffer_origin[0] +
-                            buffer_row_pitch * buffer_origin[1] +
-                            buffer_slice_pitch * buffer_origin[2];
+  size_t adjusted_dst_offset = buffer_origin[0] +
+                               buffer_row_pitch * buffer_origin[1] +
+                               buffer_slice_pitch * buffer_origin[2];
 
   char const *__restrict__ const adjusted_host_ptr =
       (char const *)src_host_ptr + host_origin[0] +
@@ -1259,8 +1365,8 @@ void pocl_almaif_write_rect(void *data, const void *__restrict__ src_host_ptr,
 
       size_t d_offset = buffer_row_pitch * j + buffer_slice_pitch * k;
 
-      d->Dev->writeDataToDevice(adjusted_dst_ptr + d_offset,
-                                adjusted_host_ptr + s_offset, region[0]);
+      d->Dev->writeDataToDevice(dst_mem_id, adjusted_host_ptr + s_offset,
+                                region[0], adjusted_dst_offset + d_offset);
     }
 }
 
@@ -1274,10 +1380,9 @@ void pocl_almaif_read_rect(void *data, void *__restrict__ dst_host_ptr,
                           size_t const host_row_pitch,
                           size_t const host_slice_pitch) {
   AlmaifData *d = (AlmaifData *)data;
-  chunk_info_t *src_chunk = (chunk_info_t *)src_mem_id->mem_ptr;
-  size_t adjusted_src_ptr = src_chunk->start_address + buffer_origin[0] +
-                            buffer_row_pitch * buffer_origin[1] +
-                            buffer_slice_pitch * buffer_origin[2];
+  size_t adjusted_src_offset = buffer_origin[0] +
+                               buffer_row_pitch * buffer_origin[1] +
+                               buffer_slice_pitch * buffer_origin[2];
 
   char *__restrict__ const adjusted_host_ptr =
       (char *)dst_host_ptr + host_origin[0] + host_row_pitch * host_origin[1] +
@@ -1291,7 +1396,7 @@ void pocl_almaif_read_rect(void *data, void *__restrict__ dst_host_ptr,
     for (j = 0; j < region[1]; ++j) {
       size_t d_offset = host_row_pitch * j + host_slice_pitch * k;
       size_t s_offset = buffer_row_pitch * j + buffer_slice_pitch * k;
-      d->Dev->readDataFromDevice(adjusted_host_ptr + d_offset,
-                                 adjusted_src_ptr + s_offset, region[0]);
+      d->Dev->readDataFromDevice(adjusted_host_ptr + d_offset, src_mem_id,
+                                 region[0], adjusted_src_offset + s_offset);
     }
 }
diff --git a/lib/CL/devices/almaif/openasip/AlmaifCompileTCE.cc b/lib/CL/devices/almaif/openasip/AlmaifCompileOpenasip.cc
similarity index 69%
rename from lib/CL/devices/almaif/openasip/AlmaifCompileTCE.cc
rename to lib/CL/devices/almaif/openasip/AlmaifCompileOpenasip.cc
index d49aad3a0..ec2d9fc07 100644
--- a/lib/CL/devices/almaif/openasip/AlmaifCompileTCE.cc
+++ b/lib/CL/devices/almaif/openasip/AlmaifCompileOpenasip.cc
@@ -1,4 +1,4 @@
-/* AlmaifCompileTCE.cc - compiler support for custom devices
+/* AlmaifCompileOpenasip.cc - compiler support for custom devices
 
    Copyright (c) 2022 Topi Leppänen / Tampere University
 
@@ -21,7 +21,6 @@
    IN THE SOFTWARE.
 */
 
-
 #include "stdint.h"
 #include "unistd.h"
 
@@ -50,41 +49,40 @@
 
 #include "../AlmaifCompile.hh"
 #include "../AlmaifShared.hh"
-#include "AlmaifCompileTCE.hh"
+#include "AlmaifCompileOpenasip.hh"
 
 #include "TTASimDevice.hh"
 
-int pocl_almaif_tce_initialize(cl_device_id device, const char *parameters) {
+int pocl_almaif_openasip_initialize(cl_device_id device,
+                                    const std::string &parameters) {
   AlmaifData *d = (AlmaifData *)(device->data);
 
-  tce_backend_data_t *bd = (tce_backend_data_t *)pocl_aligned_malloc(
-      HOST_CPU_CACHELINE_SIZE, sizeof(tce_backend_data_t));
+  openasip_backend_data_t *bd = new openasip_backend_data_t();
   if (bd == NULL) {
-    POCL_MSG_WARN("couldn't allocate tce_backend_data\n");
+    POCL_MSG_WARN("couldn't allocate openasip_backend_data\n");
     return CL_OUT_OF_HOST_MEMORY;
   }
 
-  POCL_INIT_LOCK(bd->tce_compile_lock);
+  POCL_INIT_LOCK(bd->openasip_compile_lock);
 
   if (1) // pocl_offline_compile
   {
-    assert(parameters);
+    assert(parameters != "");
     /* Convert the filename from env variable to absolute filename.
      * This is required, since generatebits must be run in
      * destination (output) directory with ADF argument */
-    bd->machine_file = realpath(parameters, NULL);
-    if ((bd->machine_file == NULL) || (!pocl_exists(bd->machine_file)))
-      POCL_ABORT("Can't find ADF file: %s\n", bd->machine_file);
+    char *tmp_path = realpath(parameters.c_str(), NULL);
+    if ((tmp_path == NULL) || (!pocl_exists(tmp_path)))
+      POCL_ABORT("Can't find ADF file: %s\n", tmp_path);
+    bd->machine_file.assign(tmp_path);
+    free(tmp_path);
 
-    size_t len = strlen(bd->machine_file);
-    assert(len > 0);
-    // char* dev_name = malloc (len+20);
-    // snprintf (dev_name, 1024, "ALMAIF TCE: %s", bd->machine_file);
+    // snprintf (dev_name, 1024, "ALMAIF openasip: %s", bd->machine_file);
 
     /* grep the ADF file for endiannes flag */
     char *content = NULL;
     uint64_t size = 0;
-    pocl_read_file(bd->machine_file, &content, &size);
+    pocl_read_file(bd->machine_file.c_str(), &content, &size);
     if ((size == 0) || (content == NULL))
       POCL_ABORT("Can't read ADF file: %s\n", bd->machine_file);
 
@@ -100,12 +98,12 @@ int pocl_almaif_tce_initialize(cl_device_id device, const char *parameters) {
                          bd->core_count);
     POCL_MEM_FREE(content);
   } else {
-    bd->machine_file = NULL;
+    bd->machine_file = "";
     device->max_compute_units =
         d->Dev->ControlMemory->Read32(ALMAIF_INFO_CORE_COUNT);
   }
 
-  device->long_name = device->short_name = "ALMAIF TCE";
+  device->long_name = device->short_name = "ALMAIF OPENASIP";
   device->vendor = "pocl";
   device->extensions = TCE_DEVICE_EXTENSIONS;
   if (device->endian_little) {
@@ -121,42 +119,24 @@ int pocl_almaif_tce_initialize(cl_device_id device, const char *parameters) {
   d->compilationData->backend_data = (void *)bd;
   device->builtins_sources_path = "tce_builtins.cl";
 
-  device->device_side_printf = 1;
-  device->printf_buffer_size = PRINTF_BUFFER_SIZE;
-  chunk_info_t *chunk = NULL;
-  chunk = pocl_alloc_buffer(d->Dev->AllocRegions, device->printf_buffer_size);
-  if (chunk == NULL) {
-    POCL_ABORT("Almaif: Can't allocate %z bytes for printf buffer\n",
-               device->printf_buffer_size);
-  } else {
-    POCL_MSG_PRINT_ALMAIF("Allocated printf buffer of size %d from %d\n",
-                         device->printf_buffer_size, chunk->start_address);
-    d->printf_buffer = chunk;
-  }
-
-  d->printf_position = pocl_alloc_buffer(d->Dev->AllocRegions, 4);
-  if (d->printf_position == NULL) {
-    POCL_ABORT("Almaif: Can't allocate 4 bytes for printf index\n");
-  }
-
   return 0;
 }
 
-int pocl_almaif_tce_cleanup(cl_device_id device) {
+int pocl_almaif_openasip_cleanup(cl_device_id device) {
   void *data = device->data;
   AlmaifData *d = (AlmaifData *)data;
 
-  pocl_free_chunk((chunk_info_t *)d->printf_buffer);
-  pocl_free_chunk((chunk_info_t *)d->printf_position);
-
-  tce_backend_data_t *bd =
-      (tce_backend_data_t *)d->compilationData->backend_data;
+  if (device->device_side_printf) {
+    pocl_free_chunk((chunk_info_t *)d->PrintfBuffer);
+    pocl_free_chunk((chunk_info_t *)d->PrintfPosition);
+  }
 
-  POCL_DESTROY_LOCK(bd->tce_compile_lock);
+  openasip_backend_data_t *bd =
+      (openasip_backend_data_t *)d->compilationData->backend_data;
 
-  POCL_MEM_FREE(bd->machine_file);
+  POCL_DESTROY_LOCK(bd->openasip_compile_lock);
 
-  pocl_aligned_free(bd);
+  delete bd;
 
   return 0;
 }
@@ -165,77 +145,74 @@ int pocl_almaif_tce_cleanup(cl_device_id device) {
 #define OFFSET_ARG(c) SUBST(c)
 #define MAX_CMDLINE_LEN (32 * POCL_MAX_PATHNAME_LENGTH)
 
-void tceccCommandLine(char *commandline, size_t max_cmdline_len,
-                      _cl_command_run *run_cmd, AlmaifData *D,
-                      const char *tempDir, const char *inputSrc,
-                      const char *outputTpef, const char *machine_file,
-                      int is_multicore, int little_endian,
-                      const char *extraParams, bool standalone_mode) {
-
-  const char *mainC;
+std::string oaccCommandLine(_cl_command_run *run_cmd, AlmaifData *D,
+                            const std::string &tempDir,
+                            const std::string &inputSrc,
+                            const std::string &outputTpef,
+                            const std::string &machine_file, int is_multicore,
+                            int little_endian, const std::string &extraParams,
+                            bool standalone_mode) {
+  std::string mainC;
   if (is_multicore)
     mainC = "tta_device_main_dthread.c";
   else
     mainC = "tta_device_main.c";
 
-  char deviceMainSrc[POCL_MAX_PATHNAME_LENGTH];
-  const char *poclIncludePathSwitch;
+  std::string deviceMainSrc;
+  std::string poclIncludePathSwitch;
   if (pocl_get_bool_option("POCL_BUILDING", 0)) {
-    snprintf(deviceMainSrc, POCL_MAX_PATHNAME_LENGTH, "%s%s%s", SRCDIR,
-             "/lib/CL/devices/almaif/openasip/", mainC);
-    assert(access(deviceMainSrc, R_OK) == 0);
-    poclIncludePathSwitch = " -I " SRCDIR "/include"
-                            " -I " SRCDIR "/lib/CL/devices/almaif/openasip";
+    deviceMainSrc =
+        std::string(SRCDIR) + "/lib/CL/devices/almaif/openasip/" + mainC;
+    assert(access(deviceMainSrc.c_str(), R_OK) == 0);
+    poclIncludePathSwitch = " -I " + std::string(SRCDIR) + "/include" + " -I " +
+                            std::string(SRCDIR) +
+                            "/lib/CL/devices/almaif/openasip";
   } else {
-    snprintf(deviceMainSrc, POCL_MAX_PATHNAME_LENGTH, "%s%s%s",
-             POCL_INSTALL_PRIVATE_DATADIR, "/", mainC);
-    assert(access(deviceMainSrc, R_OK) == 0);
-    poclIncludePathSwitch = " -I " POCL_INSTALL_PRIVATE_DATADIR "/include";
+    deviceMainSrc = std::string(POCL_INSTALL_PRIVATE_DATADIR) + "/" + mainC;
+    assert(access(deviceMainSrc.c_str(), R_OK) == 0);
+    poclIncludePathSwitch =
+        " -I " + std::string(POCL_INSTALL_PRIVATE_DATADIR) + "/include";
   }
 
-  char extraFlags[MAX_CMDLINE_LEN];
-  const char *multicoreFlags = "";
+  std::string extraFlags;
+  std::string multicoreFlags = "";
   if (is_multicore)
     multicoreFlags = " -ldthread -lsync-lu -llockunit";
 
-  char preprocessor_directives[MAX_CMDLINE_LEN];
-  set_preprocessor_directives(preprocessor_directives, D, machine_file,
-                              standalone_mode);
+  std::string preprocessor_directives =
+      set_preprocessor_directives(D, machine_file, standalone_mode);
 
-  const char *userFlags = pocl_get_string_option("POCL_TCECC_EXTRA_FLAGS", "");
-  const char *endianFlags = little_endian ? "--little-endian" : "";
-  snprintf(extraFlags, MAX_CMDLINE_LEN, "%s %s %s %s %s -k dummy_argbuffer",
-           extraParams, multicoreFlags, userFlags, endianFlags,
-           preprocessor_directives);
+  const std::string userFlags =
+      pocl_get_string_option("POCL_TCECC_EXTRA_FLAGS", "");
+  const std::string endianFlags = little_endian ? "--little-endian" : "";
+  extraFlags = extraParams + " " + multicoreFlags + " " + userFlags + " " +
+               endianFlags + " " + preprocessor_directives +
+               " -k dummy_argbuffer";
 
-  char kernelObjSrc[POCL_MAX_PATHNAME_LENGTH];
-  snprintf(kernelObjSrc, POCL_MAX_PATHNAME_LENGTH, "%s%s", tempDir,
-           "/../descriptor.so.kernel_obj.c");
+  std::string kernelObjSrc = tempDir + "/../descriptor.so.kernel_obj.c";
 
-  char kernelMdSymbolName[POCL_MAX_PATHNAME_LENGTH];
-  snprintf(kernelMdSymbolName, POCL_MAX_PATHNAME_LENGTH, "_%s_md",
-           run_cmd->kernel->name);
+  std::string kernelMdSymbolName =
+      "_" + std::string(run_cmd->kernel->name) + "_md";
 
-  char programBcFile[POCL_MAX_PATHNAME_LENGTH];
-  snprintf(programBcFile, POCL_MAX_PATHNAME_LENGTH, "%s%s", tempDir,
-           "/program.bc");
+  std::string programBcFile = tempDir + "/program.bc";
 
   /* Compile in steps to save the program.bc for automated exploration
      use case when producing the kernel capture scripts. */
 
-  snprintf(commandline, max_cmdline_len,
-           "tcecc -llwpr %s %s %s %s -k %s -g -O3 --emit-llvm -o %s %s;"
-           "tcecc -a %s %s -O3 -o %s %s\n",
-           poclIncludePathSwitch, deviceMainSrc, kernelObjSrc, inputSrc,
-           kernelMdSymbolName, programBcFile, extraFlags,
-
-           machine_file, programBcFile, outputTpef, extraFlags);
+  std::string commandline =
+      "oacc -llwpr " + poclIncludePathSwitch + " " + deviceMainSrc + " " +
+      kernelObjSrc + " " + inputSrc + " -k " + kernelMdSymbolName +
+      " -g -O3 --emit-llvm" + " -o " + programBcFile + " " + extraFlags + ";" +
+      "oacc -a " + machine_file + " " + programBcFile + " -O3 -o " +
+      outputTpef + " " + extraFlags + "\n";
+  return commandline;
 }
 
-void pocl_tce_write_kernel_descriptor(char *content, size_t content_size,
-                                      _cl_command_node *command,
-                                      cl_kernel kernel, cl_device_id device,
-                                      int specialize) {
+void pocl_openasip_write_kernel_descriptor(char *content, size_t content_size,
+                                           _cl_command_node *command,
+                                           cl_kernel kernel,
+                                           cl_device_id device,
+                                           int specialize) {
   // Generate the kernel_obj.c file. This should be optional
   // and generated only for the heterogeneous standalone devices which
   // need the definitions to accompany the kernels, for the launcher
@@ -280,8 +257,8 @@ void pocl_tce_write_kernel_descriptor(char *content, size_t content_size,
                               content_len);
 }
 
-void pocl_almaif_tce_compile(_cl_command_node *cmd, cl_kernel kernel,
-                             cl_device_id device, int specialize) {
+void pocl_almaif_openasip_compile(_cl_command_node *cmd, cl_kernel kernel,
+                                  cl_device_id device, int specialize) {
 
   if (cmd->type != CL_COMMAND_NDRANGE_KERNEL) {
     POCL_ABORT("Almaif: trying to compile non-ndrange command\n");
@@ -289,8 +266,8 @@ void pocl_almaif_tce_compile(_cl_command_node *cmd, cl_kernel kernel,
 
   void *data = cmd->device->data;
   AlmaifData *d = (AlmaifData *)data;
-  tce_backend_data_t *bd =
-      (tce_backend_data_t *)d->compilationData->backend_data;
+  openasip_backend_data_t *bd =
+      (openasip_backend_data_t *)d->compilationData->backend_data;
 
   if (!kernel)
     kernel = cmd->command.run.kernel;
@@ -299,13 +276,13 @@ void pocl_almaif_tce_compile(_cl_command_node *cmd, cl_kernel kernel,
   assert(kernel);
   assert(device);
   POCL_MSG_PRINT_ALMAIF("COMPILATION BEFORE WG FUNC\n");
-  POCL_LOCK(bd->tce_compile_lock);
+  POCL_LOCK(bd->openasip_compile_lock);
   int error = pocl_llvm_generate_workgroup_function(
       cmd->program_device_i, device, kernel, cmd, specialize);
 
   POCL_MSG_PRINT_ALMAIF("COMPILATION AFTER WG FUNC\n");
   if (error) {
-    POCL_UNLOCK(bd->tce_compile_lock);
+    POCL_UNLOCK(bd->openasip_compile_lock);
     POCL_ABORT("TCE: pocl_llvm_generate_workgroup_function()"
                " failed for kernel %s\n",
                kernel->name);
@@ -333,29 +310,29 @@ void pocl_almaif_tce_compile(_cl_command_node *cmd, cl_kernel kernel,
 
   if (!pocl_exists(assemblyFileName)) {
     char descriptor_content[64 * 1024];
-    pocl_tce_write_kernel_descriptor(descriptor_content, (64 * 1024), cmd,
-                                     kernel, device, specialize);
+    pocl_openasip_write_kernel_descriptor(descriptor_content, (64 * 1024), cmd,
+                                          kernel, device, specialize);
 
     error = snprintf(inputBytecode, POCL_MAX_PATHNAME_LENGTH, "%s%s", cachedir,
                      POCL_PARALLEL_BC_FILENAME);
 
-    char commandLine[MAX_CMDLINE_LEN];
-    tceccCommandLine(commandLine, MAX_CMDLINE_LEN, &cmd->command.run, d,
-                     tempDir,
-                     inputBytecode, // inputSrc
-                     assemblyFileName, bd->machine_file, bd->core_count > 1,
-                     device->endian_little, "", false);
+    std::string commandLine =
+        oaccCommandLine(&cmd->command.run, d, tempDir,
+                        inputBytecode, // inputSrc
+                        assemblyFileName, bd->machine_file, bd->core_count > 1,
+                        device->endian_little, "", false);
 
-    POCL_MSG_PRINT_ALMAIF("build command: \n%s", commandLine);
+    POCL_MSG_PRINT_ALMAIF("build command: \n%s", commandLine.c_str());
 
-    error = system(commandLine);
+    error = system(commandLine.c_str());
     if (error != 0)
-      POCL_ABORT("Error while running tcecc.\n");
+      POCL_ABORT("Error while running oacc.\n");
 
     // Dump disassembled tpef for debugging
-    char tcedisasmCmd[MAX_CMDLINE_LEN];
-    snprintf(tcedisasmCmd, MAX_CMDLINE_LEN, "tcedisasm -n %s %s", bd->machine_file, assemblyFileName);
-    error = system(tcedisasmCmd);
+    char OpenasipDisAsmCmd[MAX_CMDLINE_LEN];
+    snprintf(OpenasipDisAsmCmd, MAX_CMDLINE_LEN, "tcedisasm -n %s %s",
+             bd->machine_file.c_str(), assemblyFileName);
+    error = system(OpenasipDisAsmCmd);
     if (error != 0)
         POCL_MSG_WARN("Error while running tcedisasm.\n");
   }
@@ -383,16 +360,17 @@ void pocl_almaif_tce_compile(_cl_command_node *cmd, cl_kernel kernel,
                    assemblyFileName);
     }
 
-    char wg_func_name[4 * POCL_MAX_PATHNAME_LENGTH];
-    snprintf(wg_func_name, sizeof(wg_func_name), "%s_workgroup_argbuffer",
-             cmd->command.run.kernel->name);
+    std::string wg_func_name =
+        std::string(cmd->command.run.kernel->name) + "_workgroup_argbuffer";
     if (prog->hasProcedure(wg_func_name)) {
         const TTAProgram::Procedure &proc = prog->procedure(wg_func_name);
         int kernel_address = proc.startAddress().location();
 
-        char content[64];
-        snprintf(content, 64, "kernel address = %d", kernel_address);
-        pocl_write_file(md_path, content, strlen(content), 0, 0);
+        std::string md_path = std::string(cachedir) + "/kernel_address.txt";
+        std::string content =
+            "kernel address = " + std::to_string(kernel_address);
+        pocl_write_file(md_path.c_str(), content.c_str(), content.length(), 0,
+                        0);
     } else {
         POCL_ABORT("Couldn't find wg_function procedure %s from the program\n",
                    wg_func_name);
@@ -406,15 +384,13 @@ void pocl_almaif_tce_compile(_cl_command_node *cmd, cl_kernel kernel,
            "/parallel.img");
 
   if (!pocl_exists(imem_file)) {
-    char genbits_command[POCL_MAX_PATHNAME_LENGTH * 8];
-    // --dmemwidthinmaus 4
-    snprintf(genbits_command, (POCL_MAX_PATHNAME_LENGTH * 8),
-             "SAVEDIR=$PWD; cd %s; generatebits --dmemwidthinmaus 4 "
-             "--piformat=bin2n --diformat=bin2n --program "
-             "parallel.tpef %s ; cd $SAVEDIR",
-             cachedir, bd->machine_file);
-    POCL_MSG_PRINT_ALMAIF("running genbits: \n %s \n", genbits_command);
-    error = system(genbits_command);
+    std::string genbits_command =
+        "SAVEDIR=$PWD; cd " + std::string(cachedir) +
+        "; generatebits --dmemwidthinmaus 4 " +
+        "--piformat=bin2n --diformat=bin2n --program " + "parallel.tpef " +
+        bd->machine_file + "; cd $SAVEDIR";
+    POCL_MSG_PRINT_ALMAIF("running genbits: \n %s \n", genbits_command.c_str());
+    error = system(genbits_command.c_str());
     if (error != 0)
       POCL_ABORT("Error while running generatebits.\n");
   }
@@ -422,7 +398,7 @@ void pocl_almaif_tce_compile(_cl_command_node *cmd, cl_kernel kernel,
   error = pocl_exists(imem_file);
   assert(error != 0 && "parallel.img does not exist!");
 
-  POCL_UNLOCK(bd->tce_compile_lock);
+  POCL_UNLOCK(bd->openasip_compile_lock);
 }
 
 /* This is a version number that is supposed to increase when there is
@@ -430,8 +406,8 @@ void pocl_almaif_tce_compile(_cl_command_node *cmd, cl_kernel kernel,
  * incompatible (e.g. a change in generated device image file names, etc) */
 #define POCL_TCE_ALMAIF_BINARY_VERSION "2"
 
-int pocl_almaif_tce_device_hash(const char *adf_file, const char *llvm_triplet,
-                                char *output) {
+int pocl_almaif_openasip_device_hash(const char *adf_file,
+                                     const char *llvm_triplet, char *output) {
 
   SHA1_CTX ctx;
   uint8_t bin_dig[SHA1_DIGEST_SIZE];
@@ -464,10 +440,10 @@ int pocl_almaif_tce_device_hash(const char *adf_file, const char *llvm_triplet,
   return 0;
 }
 
-char *pocl_tce_init_build(void *data) {
+char *pocl_almaif_openasip_init_build(void *data) {
   AlmaifData *D = (AlmaifData *)data;
-  tce_backend_data_t *bd =
-      (tce_backend_data_t *)D->compilationData->backend_data;
+  openasip_backend_data_t *bd =
+      (openasip_backend_data_t *)D->compilationData->backend_data;
   assert(bd);
 
   TCEString mach_tmpdir = Environment::llvmtceCachePath();
@@ -491,11 +467,11 @@ char *pocl_tce_init_build(void *data) {
     char tempfile[POCL_MAX_PATHNAME_LENGTH];
     pocl_mk_tempname(tempfile, mach_tmpdir.c_str(), ".devext", NULL);
 
-    std::string tceopgenCmd = std::string("tceopgen > ") + tempfile;
+    std::string OpenasipOpgenCmd = std::string("tceopgen > ") + tempfile;
 
-    POCL_MSG_PRINT_TCE("Running: %s \n", tceopgenCmd.c_str());
+    POCL_MSG_PRINT_TCE("Running: %s \n", OpenasipOpgenCmd.c_str());
 
-    error = system(tceopgenCmd.c_str());
+    error = system(OpenasipOpgenCmd.c_str());
     if (error == -1)
       return NULL;
 
@@ -521,11 +497,11 @@ char *pocl_tce_init_build(void *data) {
   return include_switch;
 }
 
-void pocl_almaif_tce_produce_standalone_program(AlmaifData *D,
-                                                _cl_command_node *cmd,
-                                                pocl_context32 *pc,
-                                                size_t arg_size,
-                                                void *arguments) {
+void pocl_almaif_openasip_produce_standalone_program(AlmaifData *D,
+                                                     _cl_command_node *cmd,
+                                                     pocl_context32 *pc,
+                                                     size_t arg_size,
+                                                     void *arguments) {
   _cl_command_run *run_cmd = &cmd->command.run;
 
   static int runCounter = 0;
@@ -538,8 +514,8 @@ void pocl_almaif_tce_produce_standalone_program(AlmaifData *D,
   TCEString fname = baseFname + ".c";
   TCEString parallel_bc = tempDir + "/parallel.bc";
 
-  tce_backend_data_t *bd =
-      (tce_backend_data_t *)D->compilationData->backend_data;
+  openasip_backend_data_t *bd =
+      (openasip_backend_data_t *)D->compilationData->backend_data;
 
   std::ofstream out(fname.c_str());
 
@@ -701,10 +677,9 @@ void pocl_almaif_tce_produce_standalone_program(AlmaifData *D,
   TCEString inputFiles = fname + " " + parallel_bc;
   std::ofstream scriptout(buildScriptFname.c_str());
 
-  char commandLine[MAX_CMDLINE_LEN];
-  tceccCommandLine(commandLine, MAX_CMDLINE_LEN, run_cmd, D, tempDir.c_str(),
-                   inputFiles.c_str(), "standalone.tpef", bd->machine_file,
-                   bd->core_count > 1, 1, " -D_STANDALONE_MODE=1", true);
+  std::string commandLine = oaccCommandLine(
+      run_cmd, D, tempDir.c_str(), inputFiles.c_str(), "standalone.tpef",
+      bd->machine_file, bd->core_count > 1, 1, " -D_STANDALONE_MODE=1", true);
   scriptout << commandLine;
   scriptout.close();
 
@@ -724,9 +699,10 @@ void pocl_almaif_tce_produce_standalone_program(AlmaifData *D,
   ++runCounter;
 }
 
-void set_preprocessor_directives(char *output, AlmaifData *d, const char *adf,
-                                 bool standalone_mode) {
+std::string set_preprocessor_directives(AlmaifData *d, const std::string &adf,
+                                        bool standalone_mode) {
   TTAMachine::Machine *mach = NULL;
+  std::string output = "";
   try {
     mach = TTAMachine::Machine::loadFromADF(adf);
   } catch (Exception &e) {
@@ -762,15 +738,15 @@ void set_preprocessor_directives(char *output, AlmaifData *d, const char *adf,
     POCL_ABORT("Couldn't find the global address space from machine\n");
   }
 
-  int AQL_queue_length = d->Dev->CQMemory->Size / AQL_PACKET_LENGTH - 1;
-  unsigned dmem_size = d->Dev->DataMemory->Size;
-  unsigned cq_size = d->Dev->CQMemory->Size;
+  int AQL_queue_length = d->Dev->CQMemory->Size() / AQL_PACKET_LENGTH - 1;
+  unsigned DmemSize = d->Dev->DataMemory->Size();
+  unsigned CQSize = d->Dev->CQMemory->Size();
 
   bool relativeAddressing = d->Dev->RelativeAddressing;
   int i = 0;
-  i = snprintf(output, MAX_CMDLINE_LEN, "-DQUEUE_LENGTH=%i ", AQL_queue_length);
+  output += "-DQUEUE_LENGTH=" + std::to_string(AQL_queue_length) + " ";
   if (!separatePrivateMem) {
-    unsigned initsp = dmem_size;
+    unsigned initsp = DmemSize;
     unsigned private_mem_start = 0;
     if (!standalone_mode) {
       // The standalone mode, cannot separate the automatic allocation of
@@ -783,42 +759,47 @@ void set_preprocessor_directives(char *output, AlmaifData *d, const char *adf,
       int private_mem_size = pocl_get_int_option(
           "POCL_ALMAIF_PRIVATE_MEM_SIZE", ALMAIF_DEFAULT_PRIVATE_MEM_SIZE);
       initsp += private_mem_size;
-      private_mem_start += dmem_size;
+      private_mem_start += DmemSize;
       if (!separateCQMem) {
-        initsp += cq_size;
-        private_mem_start += cq_size;
+        initsp += CQSize;
+        private_mem_start += CQSize;
       }
     }
 
     if (!relativeAddressing) {
-      initsp += d->Dev->DataMemory->PhysAddress;
-      private_mem_start += d->Dev->DataMemory->PhysAddress;
+      initsp += d->Dev->DataMemory->PhysAddress();
+      private_mem_start += d->Dev->DataMemory->PhysAddress();
     }
-    i +=
-        snprintf(output + i, MAX_CMDLINE_LEN, "--init-sp=%u --data-start=%s,%u",
-                 initsp, private_as_name, private_mem_start);
+    output += "--init-sp=";
+    output += std::to_string(initsp);
+    output += " --data-start=";
+    output += private_as_name;
+    output += ",";
+    output += std::to_string(private_mem_start);
   }
   if (!relativeAddressing && standalone_mode) {
     // Appends to the data-start option
-    char data_start_option_string[MAX_CMDLINE_LEN];
+    std::string data_start_option_string;
     if (!separatePrivateMem) {
-      strcpy(data_start_option_string, ",");
+      data_start_option_string = ",";
     } else {
-      strcpy(data_start_option_string, " --data-start=");
+      data_start_option_string = " --data-start=";
     }
-    i += snprintf(output + i, MAX_CMDLINE_LEN,
-                  "%s%s,${STANDALONE_GLOBAL_AS_OFFSET}",
-                  data_start_option_string, global_as_name);
+    output += data_start_option_string + global_as_name +
+              ",${STANDALONE_GLOBAL_AS_OFFSET}";
   }
 
   if (!separateCQMem) {
-    unsigned queue_start = d->Dev->CQMemory->PhysAddress;
+    unsigned queue_start = d->Dev->CQMemory->PhysAddress();
     if (relativeAddressing) {
-      queue_start -= d->Dev->DataMemory->PhysAddress;
+      queue_start -= d->Dev->DataMemory->PhysAddress();
     }
-    i += snprintf(output + i, MAX_CMDLINE_LEN, " -DQUEUE_START=%u ",
-                  queue_start);
+    output += " -DQUEUE_START=";
+    output += std::to_string(queue_start);
+    output += " ";
   }
 
   delete mach;
+
+  return output;
 }
diff --git a/lib/CL/devices/almaif/openasip/AlmaifCompileOpenasip.hh b/lib/CL/devices/almaif/openasip/AlmaifCompileOpenasip.hh
new file mode 100644
index 000000000..2fd350343
--- /dev/null
+++ b/lib/CL/devices/almaif/openasip/AlmaifCompileOpenasip.hh
@@ -0,0 +1,69 @@
+/* AlmaifCompileOpenasip.hh - compiler support for custom devices
+
+   Copyright (c) 2022 Topi Leppänen / Tampere University
+
+   Permission is hereby granted, free of charge, to any person obtaining a copy
+   of this software and associated documentation files (the "Software"), to
+   deal in the Software without restriction, including without limitation the
+   rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+   sell copies of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be included in
+   all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+   IN THE SOFTWARE.
+*/
+
+#ifndef POCL_ALMAIFCOMPILETCE_H
+#define POCL_ALMAIFCOMPILETCE_H
+
+#include "pocl_util.h"
+// #include "AlmaifShared.hh"
+// #include "AlmaifCompile.hh"
+
+int pocl_almaif_openasip_initialize(cl_device_id device,
+                                    const std::string &parameters);
+int pocl_almaif_openasip_cleanup(cl_device_id device);
+void pocl_almaif_openasip_compile(_cl_command_node *cmd, cl_kernel kernel,
+                                  cl_device_id device, int specialize);
+void pocl_almaif_openasip_produce_standalone_program(AlmaifData *D,
+                                                     _cl_command_node *cmd,
+                                                     pocl_context32 *pc,
+                                                     size_t arg_size,
+                                                     void *arguments);
+
+char *pocl_almaif_openasip_init_build(void *data);
+
+typedef struct openasip_backend_data_s {
+  pocl_lock_t openasip_compile_lock
+      __attribute__((aligned(HOST_CPU_CACHELINE_SIZE)));
+  std::string machine_file;
+  int core_count;
+} openasip_backend_data_t;
+
+std::string oaccCommandLine(_cl_command_run *run_cmd, AlmaifData *D,
+                            const std::string &tempDir,
+                            const std::string &inputSrc,
+                            const std::string &outputTpef,
+                            const std::string &machine_file, int is_multicore,
+                            int little_endian, const std::string &extraParams,
+                            bool standalone_mode);
+void pocl_openasip_write_kernel_descriptor(char *content, size_t content_size,
+                                           _cl_command_node *command,
+                                           cl_kernel kernel,
+                                           cl_device_id device, int specialize);
+
+int pocl_almaif_openasip_device_hash(const char *adf_file,
+                                     const char *llvm_triplet, char *output);
+
+std::string set_preprocessor_directives(AlmaifData *d, const std::string &adf,
+                                        bool standalone_mode);
+
+#endif
diff --git a/lib/CL/devices/almaif/openasip/AlmaifCompileTCE.hh b/lib/CL/devices/almaif/openasip/AlmaifCompileTCE.hh
deleted file mode 100644
index d26728ebf..000000000
--- a/lib/CL/devices/almaif/openasip/AlmaifCompileTCE.hh
+++ /dev/null
@@ -1,67 +0,0 @@
-/* AlmaifCompileTCE.hh - compiler support for custom devices
-
-   Copyright (c) 2022 Topi Leppänen / Tampere University
-
-   Permission is hereby granted, free of charge, to any person obtaining a copy
-   of this software and associated documentation files (the "Software"), to
-   deal in the Software without restriction, including without limitation the
-   rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-   sell copies of the Software, and to permit persons to whom the Software is
-   furnished to do so, subject to the following conditions:
-
-   The above copyright notice and this permission notice shall be included in
-   all copies or substantial portions of the Software.
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-   AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-   IN THE SOFTWARE.
-*/
-
-#ifndef POCL_ALMAIFCOMPILETCE_H
-#define POCL_ALMAIFCOMPILETCE_H
-
-#include "pocl_util.h"
-//#include "AlmaifShared.hh"
-//#include "AlmaifCompile.hh"
-
-int pocl_almaif_tce_initialize(cl_device_id device, const char *parameters);
-int pocl_almaif_tce_cleanup(cl_device_id device);
-void pocl_almaif_tce_compile(_cl_command_node *cmd, cl_kernel kernel,
-                             cl_device_id device, int specialize);
-void pocl_almaif_tce_produce_standalone_program(AlmaifData *D,
-                                                _cl_command_node *cmd,
-                                                pocl_context32 *pc,
-                                                size_t arg_size,
-                                                void *arguments);
-
-char *pocl_tce_init_build(void *data);
-
-typedef struct tce_backend_data_s {
-  pocl_lock_t tce_compile_lock
-      __attribute__((aligned(HOST_CPU_CACHELINE_SIZE)));
-  char *machine_file;
-  int core_count;
-} tce_backend_data_t;
-
-void tceccCommandLine(char *commandline, size_t max_cmdline_len,
-                      _cl_command_run *run_cmd, AlmaifData *D,
-                      const char *tempDir, const char *inputSrc,
-                      const char *outputTpef, const char *machine_file,
-                      int is_multicore, int little_endian,
-                      const char *extraParams, bool standalone_mode);
-void pocl_tce_write_kernel_descriptor(char *content, size_t content_size,
-                                      _cl_command_node *command,
-                                      cl_kernel kernel, cl_device_id device,
-                                      int specialize);
-
-int pocl_almaif_tce_device_hash(const char *adf_file, const char *llvm_triplet,
-                                char *output);
-
-void set_preprocessor_directives(char *output, AlmaifData *d, const char *adf,
-                                 bool standalone_mode);
-
-#endif
diff --git a/lib/CL/devices/almaif/openasip/TTASimControlRegion.cc b/lib/CL/devices/almaif/openasip/TTASimControlRegion.cc
index aaad80984..1687e1502 100644
--- a/lib/CL/devices/almaif/openasip/TTASimControlRegion.cc
+++ b/lib/CL/devices/almaif/openasip/TTASimControlRegion.cc
@@ -37,8 +37,8 @@ TTASimControlRegion::TTASimControlRegion(const TTAMachine::Machine &mach,
                                          TTASimDevice *parent) {
 
   POCL_MSG_PRINT_ALMAIF_MMAP("TTASim: Initializing TTASimControlRegion\n");
-  PhysAddress = 0;
-  Size = ALMAIF_DEFAULT_CTRL_SIZE;
+  PhysAddress_ = 0;
+  Size_ = ALMAIF_DEFAULT_CTRL_SIZE;
   parent_ = parent;
   assert(parent_ != nullptr &&
          "simulator parent handle NULL, is the sim opened properly?");
@@ -49,17 +49,17 @@ TTASimControlRegion::TTASimControlRegion(const TTAMachine::Machine &mach,
 uint32_t TTASimControlRegion::Read32(size_t offset) {
 
   POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Reading from physical address 0x%zx with "
-                            "offset 0x%zx\n",
-                            PhysAddress, offset);
-  assert(offset < Size && "Attempt to access data outside MMAP'd buffer");
+                             "offset 0x%zx\n",
+                             PhysAddress_, offset);
+  assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer");
   auto value = ControlRegisters_[offset / sizeof(uint32_t)];
   return value;
 }
 
 void TTASimControlRegion::Write32(size_t offset, uint32_t value) {
   POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Writing to physical address 0x%zx with "
-                            "offset 0x%zx\n",
-                            PhysAddress, offset);
+                             "offset 0x%zx\n",
+                             PhysAddress_, offset);
 
   if (offset == ALMAIF_CONTROL_REG_COMMAND) {
     switch (value) {
@@ -84,9 +84,9 @@ void TTASimControlRegion::Write16(size_t offset, uint16_t value) {
 uint64_t TTASimControlRegion::Read64(size_t offset) {
 
   POCL_MSG_PRINT_ALMAIF_MMAP("MMAP: Reading from physical address 0x%zx with "
-                            "offset 0x%zx\n",
-                            PhysAddress, offset);
-  assert(offset < Size && "Attempt to access data outside MMAP'd buffer");
+                             "offset 0x%zx\n",
+                             PhysAddress_, offset);
+  assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer");
   auto value = reinterpret_cast<uint64_t *>(
       ControlRegisters_)[offset / sizeof(uint64_t)];
   return value;
@@ -114,66 +114,66 @@ void TTASimControlRegion::setupControlRegisters(
   bool hasPrivateMem = false;
   bool sharedDataAndCq = false;
   bool relativeAddressing = true;
-  int dmem_size = 0;
-  int cq_size = 0;
-  int imem_size = 0;
+  int DmemSize = 0;
+  int CQSize = 0;
+  int ImemSize = 0;
   const TTAMachine::Machine::AddressSpaceNavigator &nav =
       mach.addressSpaceNavigator();
   for (int i = 0; i < nav.count(); i++) {
     TTAMachine::AddressSpace *as = nav.item(i);
     if (as->hasNumericalId(TTA_ASID_GLOBAL)) {
       if (as->end() == UINT32_MAX) {
-        dmem_size = pow(2, 15); // TODO magic number from almaifintegrator.cc
+        DmemSize = pow(2, 15); // TODO magic number from almaifintegrator.cc
         relativeAddressing = false;
       } else {
-        dmem_size = as->end() + 1;
+        DmemSize = as->end() + 1;
       }
       if (as->hasNumericalId(TTA_ASID_CQ)) {
         sharedDataAndCq = true;
       }
     } else if (as->hasNumericalId(TTA_ASID_CQ)) {
-      cq_size = as->end() + 1;
+      CQSize = as->end() + 1;
     } else if (as->hasNumericalId(TTA_ASID_PRIVATE)) {
       hasPrivateMem = true;
     } else if (as->name() == "instructions") {
 
-      imem_size = (as->end() + 1) * as->width();
+      ImemSize = (as->end() + 1) * as->width();
     }
   }
 
-  int segment_size = dmem_size > imem_size ? dmem_size : imem_size;
+  int segment_size = DmemSize > ImemSize ? DmemSize : ImemSize;
 
-  int dmem_start, cq_start;
+  int DmemStart, CQStart;
   if (relativeAddressing) {
-    dmem_start = 0;
-    cq_start = 0;
+    DmemStart = 0;
+    CQStart = 0;
   } else {
-    cq_start = 2 * segment_size;
-    dmem_start = 3 * segment_size;
+    CQStart = 2 * segment_size;
+    DmemStart = 3 * segment_size;
   }
 
   if (!hasPrivateMem) {
     // No private mem, so the latter half of the dmem is reserved for it
     int fallback_mem_size = pocl_get_int_option("POCL_ALMAIF_PRIVATE_MEM_SIZE",
                                                 ALMAIF_DEFAULT_PRIVATE_MEM_SIZE);
-    dmem_size -= fallback_mem_size;
+    DmemSize -= fallback_mem_size;
     POCL_MSG_PRINT_ALMAIF(
         "Almaif: No separate private mem found. Setting it to %d\n",
         fallback_mem_size);
   }
   if (sharedDataAndCq) {
     // No separate Cq so reserve small slice of dmem for it
-    cq_size = 4 * AQL_PACKET_LENGTH;
-    dmem_size -= cq_size;
-    cq_start = dmem_start + dmem_size;
+    CQSize = 4 * AQL_PACKET_LENGTH;
+    DmemSize -= CQSize;
+    CQStart = DmemStart + DmemSize;
   }
 
-  int imem_start = 0;
+  int ImemStart = 0;
 
   if (!relativeAddressing) {
     unsigned default_baseaddress = 0x40000000; // TODO get from env variable
-    cq_start += default_baseaddress;
-    dmem_start += default_baseaddress;
+    CQStart += default_baseaddress;
+    DmemStart += default_baseaddress;
   }
 
   memset(ControlRegisters_, 0, ALMAIF_DEFAULT_CTRL_SIZE);
@@ -183,12 +183,12 @@ void TTASimControlRegion::setupControlRegisters(
   ControlRegisters_[ALMAIF_INFO_IF_TYPE / 4] = 3;
   ControlRegisters_[ALMAIF_INFO_CORE_COUNT / 4] = 1;
   ControlRegisters_[ALMAIF_INFO_CTRL_SIZE / 4] = 1024;
-  ControlRegisters_[ALMAIF_INFO_IMEM_SIZE / 4] = imem_size;
-  ControlRegisters_[ALMAIF_INFO_IMEM_START_LOW / 4] = imem_start;
-  ControlRegisters_[ALMAIF_INFO_CQMEM_SIZE_LOW / 4] = cq_size;
-  ControlRegisters_[ALMAIF_INFO_CQMEM_START_LOW / 4] = cq_start;
-  ControlRegisters_[ALMAIF_INFO_DMEM_SIZE_LOW / 4] = dmem_size;
-  ControlRegisters_[ALMAIF_INFO_DMEM_START_LOW / 4] = dmem_start;
+  ControlRegisters_[ALMAIF_INFO_IMEM_SIZE / 4] = ImemSize;
+  ControlRegisters_[ALMAIF_INFO_IMEM_START_LOW / 4] = ImemStart;
+  ControlRegisters_[ALMAIF_INFO_CQMEM_SIZE_LOW / 4] = CQSize;
+  ControlRegisters_[ALMAIF_INFO_CQMEM_START_LOW / 4] = CQStart;
+  ControlRegisters_[ALMAIF_INFO_DMEM_SIZE_LOW / 4] = DmemSize;
+  ControlRegisters_[ALMAIF_INFO_DMEM_START_LOW / 4] = DmemStart;
   ControlRegisters_[ALMAIF_INFO_FEATURE_FLAGS_LOW / 4] =
       (relativeAddressing) ? 0 : 1;
   ControlRegisters_[ALMAIF_INFO_PTR_SIZE / 4] = 4;
diff --git a/lib/CL/devices/almaif/openasip/TTASimDevice.cc b/lib/CL/devices/almaif/openasip/TTASimDevice.cc
index c38f72883..194ebf6a5 100644
--- a/lib/CL/devices/almaif/openasip/TTASimDevice.cc
+++ b/lib/CL/devices/almaif/openasip/TTASimDevice.cc
@@ -70,23 +70,18 @@ private:
   TTASimDevice *d_;
 };
 
-TTASimDevice::TTASimDevice(char *adf_name) {
+TTASimDevice::TTASimDevice(const std::string &adf_name) {
 
 #ifdef ALMAIF_TTASimMMAP_DEBUG
   POCL_MSG_PRINT_ALMAIF("TTASimMMAP: Initializing TTASimMMAPregion with Address "
                        "%zu and Size %zu\n",
                        Address, RegionSize);
 #endif
-  unsigned adf_name_length = strlen(adf_name) + 5;
-  char *adf_char = (char *)malloc(adf_name_length);
-  assert(adf_char);
-  snprintf(adf_char, adf_name_length, "%s.adf", adf_name);
+  std::string adf_char = adf_name + ".adf";
 
   simulator_ = new SimpleSimulatorFrontend(adf_char, false);
   assert(simulator_ != NULL && "simulator null\n");
 
-  free(adf_char);
-
   simulatorCLI_ = new SimulatorCLI(simulator_->frontend());
 
   SigINTHandler *ctrlcHandler = new SigINTHandler(this);
@@ -124,27 +119,24 @@ TTASimDevice::TTASimDevice(char *adf_name) {
   // Doesn't exist and should not ever be accessed
   InstructionMemory = nullptr;
   if ((global_as != cq_as) && !RelativeAddressing) {
-    CQMemory = new TTASimRegion(0, cq_size, cq_mem);
+    CQMemory = new TTASimRegion(0, CQSize, cq_mem);
   } else {
-    CQMemory = new TTASimRegion(cq_start, cq_size, cq_mem);
+    CQMemory = new TTASimRegion(CQStart, CQSize, cq_mem);
   }
-  DataMemory = new TTASimRegion(dmem_start, dmem_size, mem);
+  DataMemory = new TTASimRegion(DmemStart, DmemSize, mem);
 
   // For built-in kernel use-case. If the firmware.tpef exists, load it in
 
-  int tpef_file_length = strlen(adf_name) + 6;
-  char *tpef_file = (char *)malloc(tpef_file_length);
-  assert(tpef_file);
-  snprintf(tpef_file, tpef_file_length, "%s.tpef", adf_name);
-  if (pocl_exists(tpef_file)) {
+  std::string tpef_file = adf_name + ".tpef";
+  if (pocl_exists(tpef_file.c_str())) {
     POCL_MSG_PRINT_ALMAIF(
         "Almaif: Found built-in kernel firmware for ttasim. Loading it in.\n");
     loadProgram(tpef_file);
   } else {
-    POCL_MSG_PRINT_ALMAIF("File %s not found. Skipping program initialization\n",
-                         tpef_file);
+    POCL_MSG_PRINT_ALMAIF(
+        "File %s not found. Skipping program initialization\n",
+        tpef_file.c_str());
   }
-  free(tpef_file);
 
   if (!RelativeAddressing) {
     if (pocl_is_option_set("POCL_ALMAIF_EXTERNALREGION")) {
@@ -191,7 +183,7 @@ TTASimDevice::~TTASimDevice() {
   delete simulatorCLI_;
 }
 
-void TTASimDevice::loadProgram(char *tpef_file) {
+void TTASimDevice::loadProgram(const std::string &tpef_file) {
   if (simulator_->isRunning())
     ControlMemory->Write32(ALMAIF_CONTROL_REG_COMMAND, ALMAIF_RESET_CMD);
   while (simulator_->isRunning())
@@ -200,7 +192,7 @@ void TTASimDevice::loadProgram(char *tpef_file) {
      over all the simulations. */
   // if (currentProgram != NULL)
   //  globalCycleCount += simulator_.cycleCount();
-  simulator_->loadProgram(tpef_file);
+  simulator_->loadProgram(tpef_file.c_str());
 }
 
 void TTASimDevice::loadProgramToDevice(almaif_kernel_data_s *kd,
@@ -234,9 +226,7 @@ void TTASimDevice::loadProgramToDevice(almaif_kernel_data_s *kd,
 
   loadProgram(tpef_file);
 
-  char wg_func_name[120];
-  snprintf(wg_func_name, sizeof(wg_func_name), "%s_workgroup_argbuffer",
-           kernel->name);
+  std::string wg_func_name = std::string(kernel->name) + "_workgroup_argbuffer";
   const TTAProgram::Program *prog = &simulator_->program();
   if (prog->hasProcedure(wg_func_name)) {
     const TTAProgram::Procedure &proc = prog->procedure(wg_func_name);
diff --git a/lib/CL/devices/almaif/openasip/TTASimDevice.hh b/lib/CL/devices/almaif/openasip/TTASimDevice.hh
index f6f792942..8d92ea7e8 100644
--- a/lib/CL/devices/almaif/openasip/TTASimDevice.hh
+++ b/lib/CL/devices/almaif/openasip/TTASimDevice.hh
@@ -32,7 +32,7 @@ class SimulatorCLI;
 
 class TTASimDevice : public AlmaIFDevice {
 public:
-  TTASimDevice(char *adf_name);
+  TTASimDevice(const std::string &adf_name);
   ~TTASimDevice() override;
 
   virtual void loadProgramToDevice(almaif_kernel_data_s *kd, cl_kernel kernel,
@@ -51,7 +51,7 @@ public:
   void stopProgram();
 
 private:
-  void loadProgram(char *loadProgram);
+  void loadProgram(const std::string &loadProgram);
 };
 
 #endif
diff --git a/lib/CL/devices/almaif/openasip/TTASimRegion.cc b/lib/CL/devices/almaif/openasip/TTASimRegion.cc
index 1de638734..b2c8ba437 100644
--- a/lib/CL/devices/almaif/openasip/TTASimRegion.cc
+++ b/lib/CL/devices/almaif/openasip/TTASimRegion.cc
@@ -37,8 +37,8 @@ TTASimRegion::TTASimRegion(size_t Address, size_t RegionSize,
       "TTASim: Initializing TTASimRegion with Address %zu "
       "and Size %zu and memptr %p\n",
       Address, RegionSize, (void*)mem.get());
-  PhysAddress = Address;
-  Size = RegionSize;
+  PhysAddress_ = Address;
+  Size_ = RegionSize;
   mem_ = mem;
   assert(mem != nullptr && "memory handle NULL, is the sim opened properly?");
 }
@@ -46,57 +46,57 @@ TTASimRegion::TTASimRegion(size_t Address, size_t RegionSize,
 uint32_t TTASimRegion::Read32(size_t offset) {
 
   POCL_MSG_PRINT_ALMAIF_MMAP("TTASim: Reading from physical address 0x%zx with "
-                            "offset 0x%zx\n",
-                            PhysAddress, offset);
+                             "offset 0x%zx\n",
+                             PhysAddress_, offset);
   assert(mem_ != nullptr && "No memory handle; read before mapping?");
-  assert(offset < Size && "Attempt to access data outside MMAP'd buffer");
+  assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer");
 
   uint64_t result = 0;
-  mem_->read(PhysAddress + offset, 4, result);
+  mem_->read(PhysAddress_ + offset, 4, result);
   return result;
 }
 
 void TTASimRegion::Write32(size_t offset, uint32_t value) {
 
   POCL_MSG_PRINT_ALMAIF_MMAP("TTASim: Writing to physical address 0x%zx with "
-                            "offset 0x%zx\n",
-                            PhysAddress, offset);
+                             "offset 0x%zx\n",
+                             PhysAddress_, offset);
   assert(mem_ != nullptr && "No memory handle; write before mapping?");
-  assert(offset < Size && "Attempt to access data outside MMAP'd buffer");
-  mem_->writeDirectlyLE(PhysAddress + offset, 4, value);
+  assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer");
+  mem_->writeDirectlyLE(PhysAddress_ + offset, 4, value);
 }
 
 void TTASimRegion::Write16(size_t offset, uint16_t value) {
   POCL_MSG_PRINT_ALMAIF_MMAP("TTASim: Writing to physical address 0x%zx with "
-                            "offset 0x%zx\n",
-                            PhysAddress, offset);
+                             "offset 0x%zx\n",
+                             PhysAddress_, offset);
   assert(mem_ != nullptr && "No memory handle; write before mapping?");
-  assert(offset < Size && "Attempt to access data outside MMAP'd buffer");
+  assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer");
 
-  mem_->writeDirectlyLE(PhysAddress + offset, 2, value);
+  mem_->writeDirectlyLE(PhysAddress_ + offset, 2, value);
 }
 
 uint64_t TTASimRegion::Read64(size_t offset) {
   POCL_MSG_PRINT_ALMAIF_MMAP("TTASim: Reading from physical address 0x%zx with "
-                            "offset 0x%zx\n",
-                            PhysAddress, offset);
+                             "offset 0x%zx\n",
+                             PhysAddress_, offset);
 
   assert(mem_ != nullptr && "No memory handle; write before mapping?");
-  assert(offset < Size && "Attempt to access data outside MMAP'd buffer");
+  assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer");
 
   uint64_t result = 0;
-  mem_->read(PhysAddress + offset, 8, result);
+  mem_->read(PhysAddress_ + offset, 8, result);
   return result;
 }
 
 void TTASimRegion::Write64(size_t offset, uint64_t value) {
 
   POCL_MSG_PRINT_ALMAIF_MMAP("TTASim: Writing to physical address 0x%zx with "
-                            "offset 0x%zx\n",
-                            PhysAddress, offset);
+                             "offset 0x%zx\n",
+                             PhysAddress_, offset);
   assert(mem_ != nullptr && "No memory handle; write before mapping?");
-  assert(offset < Size && "Attempt to access data outside MMAP'd buffer");
-  mem_->writeDirectlyLE(PhysAddress + offset, 8, value);
+  assert(offset < Size_ && "Attempt to access data outside MMAP'd buffer");
+  mem_->writeDirectlyLE(PhysAddress_ + offset, 8, value);
 }
 
 void TTASimRegion::CopyToMMAP(size_t destination, const void *source,
@@ -104,10 +104,10 @@ void TTASimRegion::CopyToMMAP(size_t destination, const void *source,
   POCL_MSG_PRINT_ALMAIF_MMAP(
       "TTASim: Writing 0x%zx bytes to buffer at 0x%zx with "
       "address 0x%zx\n",
-      bytes, PhysAddress, destination);
+      bytes, PhysAddress_, destination);
   auto src = (uint8_t *)source;
-  size_t offset = destination - PhysAddress;
-  assert(offset < Size && "Attempt to access data outside TTASim Region");
+  size_t offset = destination - PhysAddress_;
+  assert(offset < Size_ && "Attempt to access data outside TTASim Region");
 
   for (size_t i = 0; i < bytes; ++i) {
     mem_->writeDirectlyLE(destination + i, 1, (Memory::MAU)src[i]);
@@ -117,11 +117,11 @@ void TTASimRegion::CopyToMMAP(size_t destination, const void *source,
 void TTASimRegion::CopyFromMMAP(void *destination, size_t source,
                                 size_t bytes) {
   POCL_MSG_PRINT_ALMAIF_MMAP("TTASim: Reading 0x%zx bytes from buffer at 0x%zx "
-                            "with address 0x%zx\n",
-                            bytes, PhysAddress, source);
+                             "with address 0x%zx\n",
+                             bytes, PhysAddress_, source);
   auto dst = (uint8_t *)destination;
-  size_t offset = source - PhysAddress;
-  assert(offset < Size && "Attempt to access data outside TTASim Region");
+  size_t offset = source - PhysAddress_;
+  assert(offset < Size_ && "Attempt to access data outside TTASim Region");
 
   for (size_t i = 0; i < bytes; ++i) {
     dst[i] = mem_->read(source + i);
@@ -132,11 +132,11 @@ void TTASimRegion::CopyInMem(size_t source, size_t destination, size_t bytes) {
   POCL_MSG_PRINT_ALMAIF_MMAP("TTASim: Copying 0x%zx bytes from 0x%zx "
                             "to 0x%zx\n",
                             bytes, source, destination);
-  size_t src_offset = source - PhysAddress;
-  size_t dst_offset = destination - PhysAddress;
-  assert(src_offset < Size && (src_offset + bytes) <= Size &&
+  size_t src_offset = source - PhysAddress_;
+  size_t dst_offset = destination - PhysAddress_;
+  assert(src_offset < Size_ && (src_offset + bytes) <= Size_ &&
          "Attempt to access data outside TTASim Region");
-  assert(dst_offset < Size && (dst_offset + bytes) <= Size &&
+  assert(dst_offset < Size_ && (dst_offset + bytes) <= Size_ &&
          "Attempt to access data outside TTASim Region");
   for (size_t i = 0; i < bytes; ++i) {
     Memory::MAU m = mem_->read(source + i);
diff --git a/lib/CL/devices/builtin_kernels.cc b/lib/CL/devices/builtin_kernels.cc
index 06dabfc86..f8f764223 100644
--- a/lib/CL/devices/builtin_kernels.cc
+++ b/lib/CL/devices/builtin_kernels.cc
@@ -214,6 +214,44 @@ BIKD pocl_BIDescriptors[BIKERNELS] = {
              BIArg("unsigned int*", "minloc", WRITE_BUF),
              BIArg("unsigned int*", "maxloc", WRITE_BUF),
          }),
+    BIKD(POCL_CDBI_SOBEL3X3_U8,
+         "pocl.sobel3x3.u8",
+         {
+             BIArg("unsigned char*", "input", READ_BUF),
+             BIArg("unsigned short*", "sobel_x", WRITE_BUF),
+             BIArg("unsigned short*", "sobel_y", WRITE_BUF),
+         }),
+    BIKD(POCL_CDBI_PHASE_U8,
+         "pocl.phase.u8",
+         {
+             BIArg("unsigned short*", "in_x", READ_BUF),
+             BIArg("unsigned short*", "in_y", READ_BUF),
+             BIArg("unsigned char*", "output", WRITE_BUF),
+         }),
+    BIKD(POCL_CDBI_MAGNITUDE_U16,
+         "pocl.magnitude.u16",
+         {
+             BIArg("unsigned short*", "in_x", READ_BUF),
+             BIArg("unsigned short*", "in_y", READ_BUF),
+             BIArg("unsigned short*", "output", WRITE_BUF),
+         }),
+    BIKD(POCL_CDBI_ORIENTED_NONMAX_U16,
+         "pocl.oriented.nonmaxsuppression.u16",
+         {
+             BIArg("unsigned short*", "magnitude", READ_BUF),
+             BIArg("unsigned char*", "phase", READ_BUF),
+             BIArg("unsigned char*", "output", WRITE_BUF),
+             BIArg("unsigned short", "threshold_lower", POD_ARG_32b),
+             BIArg("unsigned short", "threshold_upper", POD_ARG_32b),
+         }),
+    BIKD(POCL_CDBI_CANNY_U8,
+         "pocl.canny.u8",
+         {
+             BIArg("unsigned char*", "input", READ_BUF),
+             BIArg("unsigned char*", "output", WRITE_BUF),
+             BIArg("unsigned short", "threshold_lower", POD_ARG_32b),
+             BIArg("unsigned short", "threshold_upper", POD_ARG_32b),
+         }),
 };
 
 BIKD::BIKD(BuiltinKernelId KernelIdentifier, const char *KernelName,
diff --git a/lib/CL/devices/builtin_kernels.hh b/lib/CL/devices/builtin_kernels.hh
index a8603c141..2f47b789e 100644
--- a/lib/CL/devices/builtin_kernels.hh
+++ b/lib/CL/devices/builtin_kernels.hh
@@ -38,8 +38,7 @@
 
 #include <vector>
 
-enum BuiltinKernelId : uint16_t
-{
+enum BuiltinKernelId : uint16_t {
   // CD = custom device, BI = built-in
   // 1D array byte copy, get_global_size(0) defines the size of data to copy
   // kernel prototype: pocl.copy(char *input, char *output)
@@ -68,6 +67,11 @@ enum BuiltinKernelId : uint16_t
   POCL_CDBI_OPENVX_SCALEIMAGE_BL_U8 = 22,
   POCL_CDBI_OPENVX_TENSORCONVERTDEPTH_WRAP_U8_F32 = 23,
   POCL_CDBI_OPENVX_MINMAXLOC_R1_U8 = 24,
+  POCL_CDBI_SOBEL3X3_U8 = 25,
+  POCL_CDBI_PHASE_U8 = 26,
+  POCL_CDBI_MAGNITUDE_U16 = 27,
+  POCL_CDBI_ORIENTED_NONMAX_U16 = 28,
+  POCL_CDBI_CANNY_U8 = 29,
   POCL_CDBI_LAST,
   POCL_CDBI_JIT_COMPILER = 0xFFFF
 };
diff --git a/tools/data/tta_test_machines/axim_sep.adf b/tools/data/tta_test_machines/axim_sep.adf
index ee1eb7f37..996e52354 100644
--- a/tools/data/tta_test_machines/axim_sep.adf
+++ b/tools/data/tta_test_machines/axim_sep.adf
@@ -1000,7 +1000,7 @@
   <address-space name="instructions">
     <width>8</width>
     <min-address>0</min-address>
-    <max-address>4095</max-address>
+    <max-address>2047</max-address>
   </address-space>
 
   <address-space name="data">
diff --git a/tools/data/tta_test_machines/relative_sep.adf b/tools/data/tta_test_machines/relative_sep.adf
index 604ee92c3..4d2755996 100644
--- a/tools/data/tta_test_machines/relative_sep.adf
+++ b/tools/data/tta_test_machines/relative_sep.adf
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no" ?>
-<adf version="1.10">
+<adf version="1.20">
 
   <little-endian/>
 
@@ -835,7 +835,7 @@
   <address-space name="param">
     <width>8</width>
     <min-address>0</min-address>
-    <max-address>32767</max-address>
+    <max-address>16383</max-address>
     <numerical-id>1</numerical-id>
     <numerical-id>2</numerical-id>
   </address-space>
@@ -843,7 +843,7 @@
   <address-space name="instructions">
     <width>8</width>
     <min-address>0</min-address>
-    <max-address>4095</max-address>
+    <max-address>2047</max-address>
   </address-space>
 
   <address-space name="data">
-- 
GitLab