diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index cce67309b595eb3ed8879ba2c82504b80ba659b1..863d99773954cddedcda60e7b81e94630e060f37 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -63,6 +63,13 @@ amo-quest:
   dependencies:
     - build
 
+fp-quest:
+  stage: standard
+  script:
+    - make -j${NUM_JOBS} run-fp-tests batch-mode=1
+  dependencies:
+    - build
+
 bench-quest:
   stage: standard
   script:
@@ -102,6 +109,14 @@ amo-ver:
   dependencies:
     - build
 
+# floating point
+fp-ver:
+  stage: standard
+  script:
+    - make -j${NUM_JOBS} run-fp-tests-verilator 
+  dependencies:
+    - build
+
 bench-ver:
   stage: standard
   script:
@@ -136,7 +151,6 @@ serdiv-quest:
 ###################################
 # tests with serpent cache system
 
-# rv64ui-p-* and rv64ui-v-* tests
 s-asm-quest:
   stage: serpent
   script:
@@ -144,6 +158,22 @@ s-asm-quest:
   dependencies:
     - build
 
+# atomics
+s-amo-quest:
+  stage: serpent
+  script:
+    - make -j${NUM_JOBS} run-amo-tests defines=PITON_ARIANE+AXI64_CACHE_PORTS batch-mode=1
+  dependencies:
+    - build
+
+# floating point    
+s-fp-quest:
+  stage: serpent
+  script:
+    - make -j${NUM_JOBS} run-fp-tests defines=PITON_ARIANE+AXI64_CACHE_PORTS batch-mode=1
+  dependencies:
+    - build
+
 s-bench-quest:
   stage: serpent
   script:
@@ -169,12 +199,28 @@ s-asm2-ver:
 
 # rv64um-*-* tests
 mul-ver:
-  stage: standard
+  stage: serpent
   script:
     - make -j${NUM_JOBS} run-mul-verilator defines=PITON_ARIANE+AXI64_CACHE_PORTS
   dependencies:
     - build
 
+# atomics
+amo-ver:
+  stage: serpent
+  script:
+    - make -j${NUM_JOBS} run-amo-verilator defines=PITON_ARIANE+AXI64_CACHE_PORTS
+  dependencies:
+    - build
+
+# floating point
+s-fp-ver:
+  stage: serpent
+  script:
+    - make -j${NUM_JOBS} run-fp-tests-verilator defines=PITON_ARIANE+AXI64_CACHE_PORTS
+  dependencies:
+    - build
+
 s-bench-ver:
   stage: serpent
   script:
diff --git a/.gitmodules b/.gitmodules
index 477fa5145b864e69b6d3160d4295a338b101cc30..7552cd5068f3335a91eed29dc92a97c2a190a023 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -28,9 +28,6 @@
 [submodule "fpga/src/axi_slice"]
     path = fpga/src/axi_slice
     url = https://github.com/pulp-platform/axi_slice.git
-[submodule "src/fpu_div_sqrt_mvp"]
-	path = src/fpu_div_sqrt_mvp
-	url = https://github.com/pulp-platform/fpu_div_sqrt_mvp.git
 [submodule "src/tech_cells_generic"]
 	path = src/tech_cells_generic
 	url = https://github.com/pulp-platform/tech_cells_generic.git
@@ -43,3 +40,6 @@
 [submodule "src/axi_riscv_atomics"]
 	path = src/axi_riscv_atomics
 	url = https://github.com/pulp-platform/axi_riscv_atomics.git
+[submodule "src/riscv-dbg"]
+	path = src/riscv-dbg
+	url = https://github.com/pulp-platform/riscv-dbg.git
diff --git a/.travis.yml b/.travis.yml
index 18d44591af5f3a3bbf5aabca11bdce256ed7b379..5c6fdea07dfbcbef307c3f5a6498f22c884e8daa 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -134,7 +134,12 @@ jobs:
       script:
         - ci/build-riscv-tests.sh
         - make -j${NUM_JOBS} run-asm-tests2-verilator  defines=PITON_ARIANE+AXI64_CACHE_PORTS
-
+    # amo tests
+    - stage: test
+      name: run amo tests
+      script:
+        - ci/build-riscv-tests.sh
+        - make -j${NUM_JOBS} run-amo-verilator defines=PITON_ARIANE+AXI64_CACHE_PORTS
     - stage: test
       name: run torture (serpent)
       script:
diff --git a/Bender.yml b/Bender.yml
index a6b8f1592600ebb9168067f51a29ef8a2b8f0a59..5e2a02bf8ec5de0d849da3085ee55c5a4227e879 100644
--- a/Bender.yml
+++ b/Bender.yml
@@ -11,15 +11,15 @@ dependencies:
   fpga-support:       { git: "https://github.com/pulp-platform/fpga-support.git",       version: 0.3.2 }
 
 sources:
-  - src/fpu_div_sqrt_mvp/hdl/fpu_ff.sv
-  - src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
-  - src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
-  - src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv
-  - src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
-  - src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
-  - src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
-  - src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
-  - src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
+  - src/fpu/src/fpu_div_sqrt_mvp/hdl/fpu_ff.sv
+  - src/fpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
+  - src/fpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
+  - src/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv
+  - src/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
+  - src/fpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
+  - src/fpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
+  - src/fpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
+  - src/fpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
   - src/fpu/src/pkg/fpnew_pkg.vhd
   - src/fpu/src/pkg/fpnew_fmts_pkg.vhd
   - src/fpu/src/pkg/fpnew_comps_pkg.vhd
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 59bc9143567d5b234f36e812cabc03054eda1826..4cf06263863358a718d4b374c9b756e72db27ad1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,13 +8,17 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ### Added
 
+- Official support for floating point unit
+- Added AXI-64bit adapter for write-through cache system
+- Added AXI atomic ops and exclusive access support to write-through cache system
+
 ### Changed
 
 - Rerouted the JTAG from PMOD to second channel of FTDI 2232 chip on Genesys 2 board
 - Increase available RAM size on Genesys II board to 1 GiB
 - Fixed problem which decoded compressed hints as illegal instructions
 - Reduce clock frequency of FPGA to 30 MHz to accomodate FPU
-- Official support for floating point unit
+- Bugfixes in write-through cache system
 
 ### 4.0.0
 
diff --git a/Flist.ariane b/Flist.ariane
index 61656d6407a37fd588f99aa8ff28cccb7d0210d9..dc9021775bd751b1ee09b41f5b87987df2ac1eec 100644
--- a/Flist.ariane
+++ b/Flist.ariane
@@ -15,40 +15,45 @@
 // Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
 // Date: 15.08.2018
 // Description: File list for OpenPiton flow
-// src/fpu_div_sqrt_mvp/hdl/fpu_ff.sv
-// src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
-// src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
-// src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv
-// src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
-// src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
-// src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
-// src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
-// src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
-// src/fpu/src/pkg/fpnew_pkg.vhd
-// src/fpu/src/pkg/fpnew_fmts_pkg.vhd
-// src/fpu/src/pkg/fpnew_comps_pkg.vhd
-// src/fpu/src/pkg/fpnew_pkg_constants.vhd
-// src/fpu/src/utils/fp_pipe.vhd
-// src/fpu/src/utils/fp_rounding.vhd
-// src/fpu/src/utils/fp_arbiter.vhd
-// src/fpu/src/ops/fma_core.vhd
-// src/fpu/src/ops/fp_fma.vhd
-// src/fpu/src/ops/fp_divsqrt_multi.vhd
-// src/fpu/src/ops/fp_noncomp.vhd
-// src/fpu/src/ops/fp_f2fcasts_fmt.vhd
-// src/fpu/src/ops/fp_f2icasts_fmt.vhd
-// src/fpu/src/ops/fp_i2fcasts_fmt.vhd
-// src/fpu/src/subunits/addmul_fmt_slice.vhd
-// src/fpu/src/subunits/addmul_block.vhd
-// src/fpu/src/subunits/divsqrt_multifmt_slice.vhd
-// src/fpu/src/subunits/divsqrt_block.vhd
-// src/fpu/src/subunits/noncomp_fmt_slice.vhd
-// src/fpu/src/subunits/noncomp_block.vhd
-// src/fpu/src/subunits/conv_fmt_slice.vhd
-// src/fpu/src/subunits/conv_ifmt_slice.vhd
-// src/fpu/src/subunits/conv_block.vhd
-// src/fpu/src/fpnew.vhd
-// src/fpu/src/fpnew_top.vhd
+src/common_cells/include/common_cells/registers.svh
+src/common_cells/src/fifo_v1.sv
+src/common_cells/src/fifo_v2.sv
+src/common_cells/src/fifo_v3.sv
+src/common_cells/src/lfsr_8bit.sv
+src/common_cells/src/lzc.sv
+src/common_cells/src/rrarbiter.sv
+src/common_cells/src/rstgen_bypass.sv
+src/common_cells/src/sync_wedge.sv
+src/common_cells/src/cdc_2phase.sv
+src/common_cells/src/pipe_reg_simple.sv
+src/common_cells/src/stream_arbiter_flushable.sv
+src/common_cells/src/shift_reg.sv
+src/fpu/src/fpu_div_sqrt_mvp/hdl/fpu_ff.sv
+src/fpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
+src/fpu/src/fpu_div_sqrt_mvp/hdl/control_mvp.sv
+src/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_mvp_wrapper.sv
+src/fpu/src/fpu_div_sqrt_mvp/hdl/div_sqrt_top_mvp.sv
+src/fpu/src/fpu_div_sqrt_mvp/hdl/iteration_div_sqrt_mvp.sv
+src/fpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt_mvp.sv
+src/fpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv
+src/fpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv
+src/fpu/src/fpnew_cast_multi.sv
+src/fpu/src/fpnew_classifier.sv
+src/fpu/src/fpnew_divsqrt_multi.sv
+src/fpu/src/fpnew_f2fcast.sv
+src/fpu/src/fpnew_f2icast.sv
+src/fpu/src/fpnew_fma_multi.sv
+src/fpu/src/fpnew_fma.sv
+src/fpu/src/fpnew_i2fcast.sv
+src/fpu/src/fpnew_noncomp.sv
+src/fpu/src/fpnew_opgroup_block.sv
+src/fpu/src/fpnew_opgroup_fmt_slice.sv
+src/fpu/src/fpnew_opgroup_multifmt_slice.sv
+src/fpu/src/fpnew_pipe_in.sv
+src/fpu/src/fpnew_pipe_out.sv
+src/fpu/src/fpnew_pkg.sv
+src/fpu/src/fpnew_rounding.sv
+src/fpu/src/fpnew_top.sv
 src/axi/src/axi_pkg.sv
 src/debug/dm_pkg.sv
 include/riscv_pkg.sv
@@ -64,16 +69,6 @@ src/util/axi_master_connect.sv
 src/util/axi_master_connect_rev.sv
 src/util/axi_slave_connect.sv
 src/util/axi_slave_connect_rev.sv
-src/common_cells/src/fifo_v1.sv
-src/common_cells/src/fifo_v2.sv
-src/common_cells/src/fifo_v3.sv
-src/common_cells/src/lfsr_8bit.sv
-src/common_cells/src/lzc.sv
-src/common_cells/src/rrarbiter.sv
-src/common_cells/src/rstgen_bypass.sv
-src/common_cells/src/sync_wedge.sv
-src/common_cells/src/cdc_2phase.sv
-src/common_cells/src/pipe_reg_simple.sv
 src/fpga-support/rtl/SyncSpRamBeNx64.sv
 src/axi_mem_if/src/axi2mem.sv
 src/tech_cells_generic/src/cluster_clock_inverter.sv
@@ -122,19 +117,19 @@ src/cache_subsystem/serpent_dcache.sv
 src/cache_subsystem/serpent_icache.sv
 src/cache_subsystem/serpent_l15_adapter.sv
 src/cache_subsystem/serpent_cache_subsystem.sv
-src/debug/dm_csrs.sv
 src/clint/clint.sv
 src/clint/axi_lite_interface.sv
-src/debug/dm_mem.sv
-src/debug/dm_top.sv
-src/debug/dmi_cdc.sv
-src/debug/dmi_jtag.sv
-src/debug/dm_sba.sv
-src/debug/dmi_jtag_tap.sv
-src/debug/debug_rom/debug_rom.sv
+src/riscv-dbg/src/dm_csrs.sv
+src/riscv-dbg/src/dm_mem.sv
+src/riscv-dbg/src/dm_top.sv
+src/riscv-dbg/src/dmi_cdc.sv
+src/riscv-dbg/src/dmi_jtag.sv
+src/riscv-dbg/src/dm_sba.sv
+src/riscv-dbg/src/dmi_jtag_tap.sv
+src/riscv-dbg/debug_rom/debug_rom.sv
 openpiton/ariane_verilog_wrap.sv
 openpiton/serpent_peripherals.sv
-bootrom/bootrom.sv
+openpiton/bootrom/bootrom.sv
 src/plic/plic.sv
 src/plic/plic_claim_complete_tracker.sv
 src/plic/plic_comparator.sv
diff --git a/Makefile b/Makefile
index 26ca3eae276c7b28b05c04a80b6f2543622ea52d..2ac83f0c74be662fd9b53baea5073a348985594a 100644
--- a/Makefile
+++ b/Makefile
@@ -44,26 +44,26 @@ endif
 # Sources
 # Package files -> compile first
 ariane_pkg := include/riscv_pkg.sv                          \
-			  src/debug/dm_pkg.sv                           \
-			  include/ariane_pkg.sv                         \
-			  include/std_cache_pkg.sv                      \
-			  include/serpent_cache_pkg.sv                  \
-			  src/axi/src/axi_pkg.sv                        \
-			  src/register_interface/src/reg_intf.sv        \
-			  include/axi_intf.sv                           \
-			  tb/ariane_soc_pkg.sv                          \
-			  include/ariane_axi_pkg.sv                     \
-			  src/fpu/src/fpnew_pkg.sv                      \
-			  src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
+			        src/riscv-dbg/src/dm_pkg.sv                   \
+			        include/ariane_pkg.sv                         \
+			        include/std_cache_pkg.sv                      \
+			        include/serpent_cache_pkg.sv                  \
+			        src/axi/src/axi_pkg.sv                        \
+			        src/register_interface/src/reg_intf.sv        \
+			        include/axi_intf.sv                           \
+			        tb/ariane_soc_pkg.sv                          \
+			        include/ariane_axi_pkg.sv                     \
+			        src/fpu/src/fpnew_pkg.sv                      \
+			        src/fpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv
 ariane_pkg := $(addprefix $(root-dir), $(ariane_pkg))
 
 # utility modules
-util := $(wildcard src/util/*.svh)                            \
-		src/util/instruction_tracer_pkg.sv                    \
-		src/util/instruction_tracer_if.sv                     \
-		src/tech_cells_generic/src/cluster_clock_gating.sv    \
-		tb/common/mock_uart.sv                                \
-		src/util/sram.sv
+util := $(wildcard src/util/*.svh)                          \
+        src/util/instruction_tracer_pkg.sv                  \
+        src/util/instruction_tracer_if.sv                   \
+        src/tech_cells_generic/src/cluster_clock_gating.sv  \
+        tb/common/mock_uart.sv                              \
+        src/util/sram.sv
 util := $(addprefix $(root-dir), $(util))
 # Test packages
 test_pkg := $(wildcard tb/test/*/*sequence_pkg.sv*) \
@@ -73,69 +73,75 @@ dpi := $(patsubst tb/dpi/%.cc,${dpi-library}/%.o,$(wildcard tb/dpi/*.cc))
 dpi_hdr := $(wildcard tb/dpi/*.h)
 dpi_hdr := $(addprefix $(root-dir), $(dpi_hdr))
 CFLAGS := -I$(QUESTASIM_HOME)/include         \
-          -I$(RISCV)/include  \
+          -I$(RISCV)/include                  \
           -std=c++11 -I../tb/dpi
 
 # this list contains the standalone components
-src :=  $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv))      \
-		$(filter-out src/fpu/src/fpnew_pkg.sv, $(wildcard src/fpu/src/*.sv)) \
-		$(filter-out src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv,    \
-		$(wildcard src/fpu_div_sqrt_mvp/hdl/*.sv))                     \
-		$(wildcard src/frontend/*.sv)                                  \
-		$(filter-out src/cache_subsystem/std_no_dcache.sv,             \
-		$(wildcard src/cache_subsystem/*.sv))                          \
-		$(wildcard bootrom/*.sv)                                       \
-		$(wildcard src/clint/*.sv)                                     \
-		$(wildcard fpga/src/axi2apb/src/*.sv)                          \
-		$(wildcard fpga/src/axi_slice/src/*.sv)                        \
-		$(wildcard src/plic/*.sv)                                      \
-		$(wildcard src/axi_node/src/*.sv)                              \
-		$(wildcard src/axi_riscv_atomics/src/*.sv)                     \
-		$(wildcard src/axi_mem_if/src/*.sv)                            \
-		$(filter-out src/debug/dm_pkg.sv, $(wildcard src/debug/*.sv))  \
-		$(wildcard src/debug/debug_rom/*.sv)                           \
-		src/register_interface/src/apb_to_reg.sv                       \
-		src/axi/src/axi_multicut.sv                                    \
-		src/common_cells/src/deprecated/generic_fifo.sv                \
-		src/common_cells/src/deprecated/pulp_sync.sv                   \
-		src/common_cells/src/deprecated/find_first_one.sv              \
-		src/common_cells/src/rstgen_bypass.sv                          \
-		src/common_cells/src/rstgen.sv                                 \
-		src/common_cells/src/stream_mux.sv                             \
-		src/common_cells/src/stream_demux.sv                           \
-		src/common_cells/src/stream_arbiter.sv                         \
-		src/common_cells/src/stream_arbiter_flushable.sv               \
-		src/util/axi_master_connect.sv                                 \
-		src/util/axi_slave_connect.sv                                  \
-		src/util/axi_master_connect_rev.sv                             \
-		src/util/axi_slave_connect_rev.sv                              \
-		src/axi/src/axi_cut.sv                                         \
-		src/axi/src/axi_join.sv                                        \
-		src/axi/src/axi_delayer.sv                                     \
-		src/axi/src/axi_to_axi_lite.sv                                 \
-		src/fpga-support/rtl/SyncSpRamBeNx64.sv                        \
-		src/common_cells/src/sync.sv                                   \
-		src/common_cells/src/cdc_2phase.sv                             \
-		src/common_cells/src/spill_register.sv                         \
-		src/common_cells/src/sync_wedge.sv                             \
-		src/common_cells/src/edge_detect.sv                            \
-		src/common_cells/src/fifo_v3.sv                                \
-		src/common_cells/src/fifo_v2.sv                                \
-		src/common_cells/src/fifo_v1.sv                                \
-		src/common_cells/src/lzc.sv                                    \
-		src/common_cells/src/rrarbiter.sv                              \
-		src/common_cells/src/stream_delay.sv                           \
-		src/common_cells/src/lfsr_8bit.sv                              \
-		src/common_cells/src/lfsr_16bit.sv                             \
-		src/common_cells/src/counter.sv                                \
-		src/common_cells/src/shift_reg.sv                              \
-		src/tech_cells_generic/src/cluster_clock_inverter.sv           \
-		src/tech_cells_generic/src/pulp_clock_mux2.sv                  \
-		tb/ariane_testharness.sv                                       \
-		tb/ariane_peripherals.sv                                       \
-		tb/common/uart.sv                                              \
-		tb/common/SimDTM.sv                                            \
-		tb/common/SimJTAG.sv
+src :=  $(filter-out src/ariane_regfile.sv, $(wildcard src/*.sv))              \
+        $(filter-out src/fpu/src/fpnew_pkg.sv, $(wildcard src/fpu/src/*.sv))   \
+        $(filter-out src/fpu/src/fpu_div_sqrt_mvp/hdl/defs_div_sqrt_mvp.sv,    \
+        $(wildcard src/fpu/src/fpu_div_sqrt_mvp/hdl/*.sv))                     \
+        $(wildcard src/frontend/*.sv)                                          \
+        $(filter-out src/cache_subsystem/std_no_dcache.sv,                     \
+        $(wildcard src/cache_subsystem/*.sv))                                  \
+        $(wildcard bootrom/*.sv)                                               \
+        $(wildcard src/clint/*.sv)                                             \
+        $(wildcard fpga/src/axi2apb/src/*.sv)                                  \
+        $(wildcard fpga/src/axi_slice/src/*.sv)                                \
+        $(wildcard src/plic/*.sv)                                              \
+        $(wildcard src/axi_node/src/*.sv)                                      \
+        $(wildcard src/axi_riscv_atomics/src/*.sv)                             \
+        $(wildcard src/axi_mem_if/src/*.sv)                                    \
+        src/riscv-dbg/src/dmi_cdc.sv                                           \
+        src/riscv-dbg/src/dmi_jtag.sv                                          \
+        src/riscv-dbg/src/dmi_jtag_tap.sv                                      \
+        src/riscv-dbg/src/dm_csrs.sv                                           \
+        src/riscv-dbg/src/dm_mem.sv                                            \
+        src/riscv-dbg/src/dm_sba.sv                                            \
+        src/riscv-dbg/src/dm_top.sv                                            \
+        src/riscv-dbg/debug_rom/debug_rom.sv                                   \
+        src/register_interface/src/apb_to_reg.sv                               \
+        src/axi/src/axi_multicut.sv                                            \
+        src/common_cells/src/deprecated/generic_fifo.sv                        \
+        src/common_cells/src/deprecated/pulp_sync.sv                           \
+        src/common_cells/src/deprecated/find_first_one.sv                      \
+        src/common_cells/src/rstgen_bypass.sv                                  \
+        src/common_cells/src/rstgen.sv                                         \
+        src/common_cells/src/stream_mux.sv                                     \
+        src/common_cells/src/stream_demux.sv                                   \
+        src/common_cells/src/stream_arbiter.sv                                 \
+        src/common_cells/src/stream_arbiter_flushable.sv                       \
+        src/util/axi_master_connect.sv                                         \
+        src/util/axi_slave_connect.sv                                          \
+        src/util/axi_master_connect_rev.sv                                     \
+        src/util/axi_slave_connect_rev.sv                                      \
+        src/axi/src/axi_cut.sv                                                 \
+        src/axi/src/axi_join.sv                                                \
+        src/axi/src/axi_delayer.sv                                             \
+        src/axi/src/axi_to_axi_lite.sv                                         \
+        src/fpga-support/rtl/SyncSpRamBeNx64.sv                                \
+        src/common_cells/src/sync.sv                                           \
+        src/common_cells/src/cdc_2phase.sv                                     \
+        src/common_cells/src/spill_register.sv                                 \
+        src/common_cells/src/sync_wedge.sv                                     \
+        src/common_cells/src/edge_detect.sv                                    \
+        src/common_cells/src/fifo_v3.sv                                        \
+        src/common_cells/src/fifo_v2.sv                                        \
+        src/common_cells/src/fifo_v1.sv                                        \
+        src/common_cells/src/lzc.sv                                            \
+        src/common_cells/src/rrarbiter.sv                                      \
+        src/common_cells/src/stream_delay.sv                                   \
+        src/common_cells/src/lfsr_8bit.sv                                      \
+        src/common_cells/src/lfsr_16bit.sv                                     \
+        src/common_cells/src/counter.sv                                        \
+        src/common_cells/src/shift_reg.sv                                      \
+        src/tech_cells_generic/src/cluster_clock_inverter.sv                   \
+        src/tech_cells_generic/src/pulp_clock_mux2.sv                          \
+        tb/ariane_testharness.sv                                               \
+        tb/ariane_peripherals.sv                                               \
+        tb/common/uart.sv                                                      \
+        tb/common/SimDTM.sv                                                    \
+        tb/common/SimJTAG.sv
 
 src := $(addprefix $(root-dir), $(src))
 
@@ -307,32 +313,32 @@ check-benchmarks:
 	ci/check-tests.sh tmp/riscv-benchmarks- $(shell wc -l $(riscv-benchmarks-list) | awk -F " " '{ print $1 }')
 
 # verilator-specific
-verilate_command := $(verilator)                                                             \
-					$(filter-out %.vhd, $(ariane_pkg))                                                 \
-					$(filter-out src/fpu_wrap.sv, $(filter-out %.vhd, $(src)))                         \
-					+define+$(defines)                                                                 \
-					src/util/sram.sv                                                                   \
-					+incdir+src/axi_node                                                               \
-					$(if $(verilator_threads), --threads $(verilator_threads))                         \
-					--unroll-count 256                                                                 \
-					-Werror-PINMISSING                                                                 \
-					-Werror-IMPLICIT                                                                   \
-					-Wno-fatal                                                                         \
-					-Wno-PINCONNECTEMPTY                                                               \
-					-Wno-ASSIGNDLY                                                                     \
-					-Wno-DECLFILENAME                                                                  \
-					-Wno-UNUSED                                                                        \
-					-Wno-UNOPTFLAT                                                                     \
-					-Wno-style                                                                         \
-					$(if $(PROFILE),--stats --stats-vars --profile-cfuncs,)                            \
-					-Wno-lint                                                                          \
-					$(if $(DEBUG),--trace-structs --trace,)                                            \
-					-LDFLAGS "-L$(RISCV)/lib -Wl,-rpath,$(RISCV)/lib -lfesvr$(if $(PROFILE), -g -pg,)" \
-					-CFLAGS "$(CFLAGS)$(if $(PROFILE), -g -pg,)" -Wall --cc  --vpi                     \
-					$(list_incdir) --top-module ariane_testharness                                     \
-					--Mdir $(ver-library) -O3                                                          \
-					--exe tb/ariane_tb.cpp tb/dpi/SimDTM.cc tb/dpi/SimJTAG.cc                          \
-					tb/dpi/remote_bitbang.cc tb/dpi/msim_helper.cc
+verilate_command := $(verilator)                                                                       \
+                    $(filter-out %.vhd, $(ariane_pkg))                                                 \
+                    $(filter-out src/fpu_wrap.sv, $(filter-out %.vhd, $(src)))                         \
+                    +define+$(defines)                                                                 \
+                    src/util/sram.sv                                                                   \
+                    +incdir+src/axi_node                                                               \
+                    $(if $(verilator_threads), --threads $(verilator_threads))                         \
+                    --unroll-count 256                                                                 \
+                    -Werror-PINMISSING                                                                 \
+                    -Werror-IMPLICIT                                                                   \
+                    -Wno-fatal                                                                         \
+                    -Wno-PINCONNECTEMPTY                                                               \
+                    -Wno-ASSIGNDLY                                                                     \
+                    -Wno-DECLFILENAME                                                                  \
+                    -Wno-UNUSED                                                                        \
+                    -Wno-UNOPTFLAT                                                                     \
+                    -Wno-style                                                                         \
+                    $(if $(PROFILE),--stats --stats-vars --profile-cfuncs,)                            \
+                    -Wno-lint                                                                          \
+                    $(if $(DEBUG),--trace --trace-structs,)                                            \
+                    -LDFLAGS "-L$(RISCV)/lib -Wl,-rpath,$(RISCV)/lib -lfesvr$(if $(PROFILE), -g -pg,)" \
+                    -CFLAGS "$(CFLAGS)$(if $(PROFILE), -g -pg,)" -Wall --cc  --vpi                     \
+                    $(list_incdir) --top-module ariane_testharness                                     \
+                    --Mdir $(ver-library) -O3                                                          \
+                    --exe tb/ariane_tb.cpp tb/dpi/SimDTM.cc tb/dpi/SimJTAG.cc                          \
+					          tb/dpi/remote_bitbang.cc tb/dpi/msim_helper.cc
 
 # User Verilator, at some point in the future this will be auto-generated
 verilate:
@@ -347,7 +353,7 @@ $(addsuffix -verilator,$(riscv-asm-tests)): verilate
 	$(ver-library)/Variane_testharness $(riscv-test-dir)/$(subst -verilator,,$@)
 
 $(addsuffix -verilator,$(riscv-amo-tests)): verilate
-	$(ver-library)/Variane_testharness $(riscv-test-dir)/$(subst -verilator,,$@)
+	$(ver-library)/Variane_testharness $(riscv-test-dir)/$(subst -verilator,,$@) 
 
 $(addsuffix -verilator,$(riscv-mul-tests)): verilate
 	$(ver-library)/Variane_testharness $(riscv-test-dir)/$(subst -verilator,,$@)
@@ -424,7 +430,7 @@ fpga: $(ariane_pkg) $(util) $(src) $(fpga_src) $(util) $(uart_src)
 	@echo read_vhdl        {$(uart_src)}    > fpga/scripts/add_sources.tcl
 	@echo read_verilog -sv {$(ariane_pkg)} >> fpga/scripts/add_sources.tcl
 	@echo read_verilog -sv {$(util)}       >> fpga/scripts/add_sources.tcl
-	@echo read_verilog -sv {$(src)}        >> fpga/scripts/add_sources.tcl
+	@echo read_verilog -sv {$(filter-out bootrom.sv, $(src))} 	   >> fpga/scripts/add_sources.tcl
 	@echo read_verilog -sv {$(fpga_src)}   >> fpga/scripts/add_sources.tcl
 	@echo "[FPGA] Generate Bitstream"
 	cd fpga && make BOARD="genesys2" XILINX_PART="xc7k325tffg900-2" XILINX_BOARD="digilentinc.com:genesys2:part0:1.1" CLK_PERIOD_NS="20"
diff --git a/README.md b/README.md
index 1d629bde27029533e9588c21b136279781f153af..8d091751b1ae0937781a37fef0c6c212618e007e 100644
--- a/README.md
+++ b/README.md
@@ -61,7 +61,7 @@ The Verilator testbench makes use of the `riscv-fesvr`. This means that you can
 Both, the Verilator model as well as the Questa simulation will produce trace logs. The Verilator trace is more basic but you can feed the log to `spike-dasm` to resolve instructions to mnemonics. Unfortunately value inspection is currently not possible for the Verilator trace file.
 
 ```
-$ spike-dasm < trace_hart_00_0.dasm > logfile.txt
+$ spike-dasm < trace_hart_00.dasm > logfile.txt
 ```
 
 ### Running User-Space Applications
@@ -161,7 +161,8 @@ To get started, connect the micro USB port that is labeled with JTAG to your mac
 > SUBSYSTEM=="usb", ACTION=="add", ATTRS{idProduct}=="6010", ATTRS{idVendor}=="0403", MODE="664", GROUP="plugdev"
 >```
 
-Once attached to your system, the FTDI chip should be listed when you type `lsusb`
+Once attached to your system, the FTDI chip should be listed when you type `lsusb`:
+
 
 Bus 005 Device 019: ID 0403:6010 Future Technology Devices International, Ltd FT2232C/D/H Dual UART/FIFO IC
 ```
diff --git a/bootrom/Makefile b/bootrom/Makefile
index faedc68d9f2807519797c8cc32fa78ab7afa08e3..50bcd294f9224d4f3a8093e971bbe0d22f6d75d2 100644
--- a/bootrom/Makefile
+++ b/bootrom/Makefile
@@ -23,4 +23,4 @@ all: $(bootrom_img)
 	$(PYTHON) ./gen_rom.py $<
 
 clean:
-	rm $(bootrom_img) $(DTB)
+	rm -f $(bootrom_img) $(DTB)
diff --git a/bootrom/gen_rom.py b/bootrom/gen_rom.py
index 1c825dfa6e4ab8c26244daca6a286087f2902e30..bb2abc356bf26076bc54c853e658700b7895c5e6 100755
--- a/bootrom/gen_rom.py
+++ b/bootrom/gen_rom.py
@@ -4,7 +4,8 @@ from string import Template
 import argparse
 import os.path
 import sys
-from bitstring import ConstBitStream, BitArray, BitStream
+import binascii
+
 
 parser = argparse.ArgumentParser(description='Convert binary file to verilog rom')
 parser.add_argument('filename', metavar='filename', nargs=1,
@@ -77,13 +78,10 @@ $content
 """
 
 def read_bin():
-    s = ConstBitStream(filename=filename + ".img")
-    rom = []
-    try:
-        while True:
-            rom.append(s.read("hex:8"))
-    except Exception as e:
-        pass
+
+    with open(filename + ".img", 'rb') as f:
+        rom = binascii.hexlify(f.read())
+        rom = map(''.join, zip(rom[::2], rom[1::2]))
 
 
     # align to 64 bit
diff --git a/fpga/constraints/ariane.xdc b/fpga/constraints/ariane.xdc
index b60ab9110f7e1d93bc9055ed8ef082c9f90450aa..714bc451cb5145d7f4a0ef856d5ed4c8d3dab618 100644
--- a/fpga/constraints/ariane.xdc
+++ b/fpga/constraints/ariane.xdc
@@ -3,6 +3,13 @@
 create_clock -period 100.000 -name tck -waveform {0.000 50.000} [get_ports tck]
 set_input_jitter tck 1.000
 
+# minimize routing delay
+set_input_delay  -clock tck -clock_fall 5 [get_ports tdi    ]
+set_input_delay  -clock tck -clock_fall 5 [get_ports tms    ]
+set_output_delay -clock tck             5 [get_ports tdo    ]
+set_false_path   -from                    [get_ports trst_n ] 
+
+
 set_max_delay -datapath_only -from [get_pins i_dmi_jtag/i_dmi_cdc/i_cdc_resp/i_src/data_src_q_reg*/C] -to [get_pins i_dmi_jtag/i_dmi_cdc/i_cdc_resp/i_dst/data_dst_q_reg*/D] 20.000
 set_max_delay -datapath_only -from [get_pins i_dmi_jtag/i_dmi_cdc/i_cdc_resp/i_src/req_src_q_reg/C] -to [get_pins i_dmi_jtag/i_dmi_cdc/i_cdc_resp/i_dst/req_dst_q_reg/D] 20.000
 set_max_delay -datapath_only -from [get_pins i_dmi_jtag/i_dmi_cdc/i_cdc_req/i_dst/ack_dst_q_reg/C] -to [get_pins i_dmi_jtag/i_dmi_cdc/i_cdc_req/i_src/ack_src_q_reg/D] 20.000
diff --git a/fpga/constraints/genesys-2.xdc b/fpga/constraints/genesys-2.xdc
index 79668f4db55198ae19434ce7404d4e248835cae4..fd4302bba7979991b1a80354b0cdfe348ddaf4ed 100644
--- a/fpga/constraints/genesys-2.xdc
+++ b/fpga/constraints/genesys-2.xdc
@@ -62,9 +62,11 @@ set_property -dict {PACKAGE_PIN AG12 IOSTANDARD LVCMOS15} [get_ports { eth_mdio
 #############################################
 # Modified for 125MHz receive clock
 create_clock -period 8.000 -name eth_rxck [get_ports eth_rxck]
+
 set_clock_groups -asynchronous -group [get_clocks eth_rxck -include_generated_clocks]
 set_clock_groups -asynchronous -group [get_clocks clk_out2_xlnx_clk_gen]
 
+
 ## SD Card
 set_property -dict {PACKAGE_PIN R28 IOSTANDARD LVCMOS33} [get_ports spi_clk_o]
 set_property -dict {PACKAGE_PIN T30 IOSTANDARD LVCMOS33} [get_ports spi_ss]
diff --git a/fpga/src/ariane_xilinx.sv b/fpga/src/ariane_xilinx.sv
index 7413c2647dffe1a09e98f7245e792bf2c8f12075..e92a15b9560be5e8c349399c883d914c2591906d 100644
--- a/fpga/src/ariane_xilinx.sv
+++ b/fpga/src/ariane_xilinx.sv
@@ -86,7 +86,7 @@ module ariane_xilinx (
 // 24 MByte in 8 byte words
 localparam NumWords = (24 * 1024 * 1024) / 8;
 localparam NBSlave = 2; // debug, ariane
-localparam CacheStartAddr = (1 << 31);
+localparam logic [63:0] CacheStartAddr = 64'h80000000;
 localparam AxiAddrWidth = 64;
 localparam AxiDataWidth = 64;
 localparam AxiIdWidthMaster = 4;
@@ -235,40 +235,107 @@ dmi_jtag i_dmi_jtag (
     .tdo_oe_o             (        )
 );
 
-ariane_axi::req_t    dm_axi_m_req,  dm_axi_s_req;
-ariane_axi::resp_t   dm_axi_m_resp, dm_axi_s_resp;
+ariane_axi::req_t    dm_axi_m_req;
+ariane_axi::resp_t   dm_axi_m_resp;
+
+logic                dm_slave_req;
+logic                dm_slave_we;
+logic [64-1:0]       dm_slave_addr;
+logic [64/8-1:0]     dm_slave_be;
+logic [64-1:0]       dm_slave_wdata;
+logic [64-1:0]       dm_slave_rdata;
+
+logic                dm_master_req;
+logic [64-1:0]       dm_master_add;
+logic                dm_master_we;
+logic [64-1:0]       dm_master_wdata;
+logic [64/8-1:0]     dm_master_be;
+logic                dm_master_gnt;
+logic                dm_master_r_valid;
+logic [64-1:0]       dm_master_r_rdata;
 
 // debug module
 dm_top #(
-    // current implementation only supports 1 hart
-    .NrHarts          ( 1                ),
-    .AxiIdWidth       ( AxiIdWidthSlaves ),
-    .AxiAddrWidth     ( AxiAddrWidth     ),
-    .AxiDataWidth     ( AxiDataWidth     ),
-    .AxiUserWidth     ( AxiUserWidth     )
-) i_dm_top (
-    .clk_i            ( clk              ),
-    .rst_ni           ( rst_n            ), // PoR
-    .testmode_i       ( test_en          ),
-    .ndmreset_o       ( ndmreset         ),
-    .dmactive_o       ( dmactive         ), // active debug session
-    .debug_req_o      ( debug_req_irq    ),
-    .unavailable_i    ( '0               ),
-    .axi_s_req_i      ( dm_axi_s_req     ),
-    .axi_s_resp_o     ( dm_axi_s_resp    ),
-    .axi_m_req_o      ( dm_axi_m_req     ),
-    .axi_m_resp_i     ( dm_axi_m_resp    ),
-    .dmi_rst_ni       ( rst_n            ),
-    .dmi_req_valid_i  ( debug_req_valid  ),
-    .dmi_req_ready_o  ( debug_req_ready  ),
-    .dmi_req_i        ( debug_req        ),
-    .dmi_resp_valid_o ( debug_resp_valid ),
-    .dmi_resp_ready_i ( debug_resp_ready ),
-    .dmi_resp_o       ( debug_resp       )
+    .NrHarts          ( 1                 ),
+    .BusWidth         ( AxiDataWidth      ),
+    .Selectable_Harts ( 1'b1              )
+) i_dm_top ( 
+    .clk_i            ( clk               ),
+    .rst_ni           ( rst_n             ), // PoR
+    .testmode_i       ( test_en           ),
+    .ndmreset_o       ( ndmreset          ),
+    .dmactive_o       ( dmactive          ), // active debug session
+    .debug_req_o      ( debug_req_irq     ),
+    .unavailable_i    ( '0                ),
+    .slave_req_i      ( dm_slave_req      ),
+    .slave_we_i       ( dm_slave_we       ),
+    .slave_addr_i     ( dm_slave_addr     ),
+    .slave_be_i       ( dm_slave_be       ),
+    .slave_wdata_i    ( dm_slave_wdata    ),
+    .slave_rdata_o    ( dm_slave_rdata    ),
+    .master_req_o     ( dm_master_req     ),
+    .master_add_o     ( dm_master_add     ),
+    .master_we_o      ( dm_master_we      ),
+    .master_wdata_o   ( dm_master_wdata   ),
+    .master_be_o      ( dm_master_be      ),
+    .master_gnt_i     ( dm_master_gnt     ),
+    .master_r_valid_i ( dm_master_r_valid ),
+    .master_r_rdata_i ( dm_master_r_rdata ),
+    .dmi_rst_ni       ( rst_n             ),
+    .dmi_req_valid_i  ( debug_req_valid   ),
+    .dmi_req_ready_o  ( debug_req_ready   ),
+    .dmi_req_i        ( debug_req         ),
+    .dmi_resp_valid_o ( debug_resp_valid  ),
+    .dmi_resp_ready_i ( debug_resp_ready  ),
+    .dmi_resp_o       ( debug_resp        )
 );
 
-axi_master_connect i_axi_master_dm (.axi_req_i(dm_axi_m_req), .axi_resp_o(dm_axi_m_resp), .master(slave[1]));
-axi_slave_connect  i_axi_slave_dm  (.axi_req_o(dm_axi_s_req), .axi_resp_i(dm_axi_s_resp), .slave(master[ariane_soc::Debug]));
+axi2mem #(
+    .AXI_ID_WIDTH   ( AxiIdWidthSlaves    ),
+    .AXI_ADDR_WIDTH ( AxiAddrWidth        ),
+    .AXI_DATA_WIDTH ( AxiDataWidth        ),
+    .AXI_USER_WIDTH ( AxiUserWidth        )
+) i_dm_axi2mem (
+    .clk_i      ( clk_i                     ),
+    .rst_ni     ( rst_ni                    ),
+    .slave      ( master[ariane_soc::Debug] ),
+    .req_o      ( dm_slave_req              ),
+    .we_o       ( dm_slave_we               ),
+    .addr_o     ( dm_slave_addr             ),
+    .be_o       ( dm_slave_be               ),
+    .data_o     ( dm_slave_wdata            ),
+    .data_i     ( dm_slave_rdata            )
+);        
+
+axi_master_connect i_dm_axi_master_connect (
+  .axi_req_i(dm_axi_m_req), 
+  .axi_resp_o(dm_axi_m_resp), 
+  .master(slave[1])
+);
+
+axi_adapter #(
+    .DATA_WIDTH            ( AxiDataWidth              )
+) i_dm_axi_master (
+    .clk_i                 ( clk_i                     ),
+    .rst_ni                ( rst_ni                    ),
+    .req_i                 ( dm_master_req             ),
+    .type_i                ( ariane_axi::SINGLE_REQ    ),
+    .gnt_o                 ( dm_master_gnt             ),
+    .gnt_id_o              (                           ),
+    .addr_i                ( dm_master_add             ),
+    .we_i                  ( dm_master_we              ),
+    .wdata_i               ( dm_master_wdata           ),
+    .be_i                  ( dm_master_be              ),
+    .size_i                ( 2'b11                     ), // always do 64bit here and use byte enables to gate
+    .id_i                  ( '0                        ),
+    .valid_o               ( dm_master_r_valid         ),
+    .rdata_o               ( dm_master_r_rdata         ),
+    .id_o                  (                           ),
+    .critical_word_o       (                           ), 
+    .critical_word_valid_o (                           ), 
+    .axi_req_o             ( dm_axi_m_req              ),
+    .axi_resp_i            ( dm_axi_m_resp             )
+);
 
 // ---------------
 // Core
diff --git a/fpga/src/bootrom/bootrom.h b/fpga/src/bootrom/bootrom.h
index 840467648d682ddfa2a0ad9cb7698aab07a2991d..a650a9a378cfc096ef077fa05e33f95a68635984 100644
--- a/fpga/src/bootrom/bootrom.h
+++ b/fpga/src/bootrom/bootrom.h
@@ -1,6 +1,6 @@
 // Auto-generated code
 
-const int reset_vec_size = 1732;
+const int reset_vec_size = 1720;
 
 uint32_t reset_vec[reset_vec_size] = {
     0x00800913,
@@ -157,7 +157,7 @@ uint32_t reset_vec[reset_vec_size] = {
     0x80824108,
     0x00001517,
     0x05131101,
-    0xec065c05,
+    0xec065905,
     0xe426e822,
     0xf05ff0ef,
     0x200007b7,
@@ -168,30 +168,30 @@ uint32_t reset_vec[reset_vec_size] = {
     0x10400793,
     0x5064d03c,
     0x00001517,
-    0x5a050513,
+    0x57050513,
     0xeddff0ef,
     0x95132481,
     0x91010204,
     0xf53ff0ef,
     0x00001517,
-    0x5a850513,
+    0x57850513,
     0xec5ff0ef,
     0x16600793,
     0x5064d03c,
     0x00001517,
-    0x57450513,
+    0x54450513,
     0xeb1ff0ef,
     0x95132481,
     0x91010204,
     0xf27ff0ef,
     0x00001517,
-    0x57c50513,
+    0x54c50513,
     0xe99ff0ef,
     0xd03c4799,
     0x60e26442,
     0x151764a2,
     0x05130000,
-    0x61055565,
+    0x61055265,
     0xe81ff06f,
     0x07b71101,
     0xec062000,
@@ -209,12 +209,12 @@ uint32_t reset_vec[reset_vec_size] = {
     0x8b852401,
     0x1517e395,
     0x05130000,
-    0xf0ef5265,
+    0xf0ef4f65,
     0x4088e3bf,
     0x91011502,
     0xeb3ff0ef,
     0x00001517,
-    0x50850513,
+    0x4d850513,
     0xe25ff0ef,
     0x200007b7,
     0x7513577d,
@@ -287,19 +287,19 @@ uint32_t reset_vec[reset_vec_size] = {
     0x11018082,
     0x842ae822,
     0x00001517,
-    0x40450513,
+    0x3d450513,
     0xe42eec06,
     0xcfdff0ef,
     0xf0ef8522,
     0x1517cf7f,
     0x05130000,
-    0xf0ef3fe5,
+    0xf0ef3ce5,
     0x65a2cebf,
     0xf0ef852e,
     0x6442da7f,
     0x151760e2,
     0x05130000,
-    0x61053b65,
+    0x61053865,
     0xcd1ff06f,
     0xe8221101,
     0xe4266409,
@@ -313,7 +313,7 @@ uint32_t reset_vec[reset_vec_size] = {
     0xc00df25f,
     0xfe9915e3,
     0x00001517,
-    0x3bc50513,
+    0x38c50513,
     0xf0ef4585,
     0x4505f89f,
     0x644260e2,
@@ -351,7 +351,7 @@ uint32_t reset_vec[reset_vec_size] = {
     0xf0ef842a,
     0x1517e89f,
     0x85a20000,
-    0x32e50513,
+    0x2fe50513,
     0xef3ff0ef,
     0x0004051b,
     0x640260a2,
@@ -367,7 +367,7 @@ uint32_t reset_vec[reset_vec_size] = {
     0xe53ff0ef,
     0x85aa842a,
     0x00001517,
-    0x2f450513,
+    0x2c450513,
     0xeb3ff0ef,
     0xe37ff0ef,
     0xfc940ce3,
@@ -379,7 +379,7 @@ uint32_t reset_vec[reset_vec_size] = {
     0xe022e406,
     0xc89ff0ef,
     0x00001517,
-    0x2cc50513,
+    0x29c50513,
     0xb91ff0ef,
     0x347d4429,
     0xe03ff0ef,
@@ -474,7 +474,7 @@ uint32_t reset_vec[reset_vec_size] = {
     0x0359e7b3,
     0x1517e799,
     0x05130000,
-    0xf0ef18a5,
+    0xf0ef15a5,
     0x19fda17f,
     0xf93046e3,
     0x46054401,
@@ -491,7 +491,7 @@ uint32_t reset_vec[reset_vec_size] = {
     0xf0efc5df,
     0x1517c59f,
     0x05130000,
-    0xf0ef1265,
+    0xf0ef0f65,
     0x547d9d3f,
     0x60ae8522,
     0x74ea640e,
@@ -510,7 +510,7 @@ uint32_t reset_vec[reset_vec_size] = {
     0xdf3ff0ef,
     0x1517c51d,
     0x05130000,
-    0xf0ef1025,
+    0xf0ef0d25,
     0x54fd987f,
     0xfb040113,
     0x852660a6,
@@ -521,74 +521,74 @@ uint32_t reset_vec[reset_vec_size] = {
     0x61616c02,
     0x15178082,
     0x05130000,
-    0xf0ef0fe5,
+    0xf0ef0ce5,
     0x710195bf,
     0x45854605,
     0xf0ef850a,
     0x890ae49f,
     0xc90584aa,
     0x00001517,
-    0x0f850513,
+    0x0c850513,
     0x93dff0ef,
     0x00001517,
-    0x10450513,
+    0x0d450513,
     0x931ff0ef,
     0xf0ef8526,
     0x15179adf,
     0x05130000,
-    0xf0ef0025,
+    0xf0effd25,
     0x54f991ff,
     0x1517bf61,
     0x05130000,
-    0xf0ef0fa5,
+    0xf0ef0ca5,
     0x151790ff,
     0x05130000,
-    0xf0ef10e5,
+    0xf0ef0de5,
     0x6502903f,
     0xf0ef4b91,
     0x151797df,
     0x05130000,
-    0xf0ef10a5,
+    0xf0ef0da5,
     0x45228eff,
     0x92bff0ef,
     0x00001517,
-    0x10850513,
+    0x0d850513,
     0x8ddff0ef,
     0xf0ef4532,
     0x1517919f,
     0x05130000,
-    0xf0ef1065,
+    0xf0ef0d65,
     0x45428cbf,
     0x907ff0ef,
     0x00001517,
-    0x10450513,
+    0x0d450513,
     0x8b9ff0ef,
     0xf0ef4552,
     0x15178f5f,
     0x05130000,
-    0xf0ef1025,
+    0xf0ef0d25,
     0x65628a7f,
     0x923ff0ef,
     0x00001517,
-    0x10850513,
+    0x0d850513,
     0x895ff0ef,
     0xf0ef7502,
     0x1517911f,
     0x05130000,
-    0xf0ef1065,
+    0xf0ef0d65,
     0x6526883f,
     0x8ffff0ef,
     0x00001517,
-    0x11450513,
+    0x0e450513,
     0x871ff0ef,
     0xf0ef4546,
     0x15178adf,
     0x05130000,
-    0xf0ef1225,
+    0xf0ef0f25,
     0x455685ff,
     0x89bff0ef,
     0x00001517,
-    0xf3050513,
+    0xf0050513,
     0x84dff0ef,
     0x04892583,
     0x46057101,
@@ -597,29 +597,29 @@ uint32_t reset_vec[reset_vec_size] = {
     0x080489aa,
     0x1517c50d,
     0x05130000,
-    0xf0effe65,
+    0xf0effb65,
     0x151782bf,
     0x05130000,
-    0xf0efff25,
+    0xf0effc25,
     0x854e81ff,
     0x89bff0ef,
     0x00001517,
-    0xef050513,
+    0xec050513,
     0x1517b5fd,
     0x05130000,
-    0xf0ef0e65,
+    0xf0ef0b65,
     0xf513803f,
     0xf0ef0ff9,
     0x15178bff,
     0x05130000,
-    0xf0ef0ea5,
+    0xf0ef0ba5,
     0x8913feef,
     0x4503ff04,
     0x09050009,
     0x8a5ff0ef,
     0xfe991be3,
     0x00001517,
-    0x0ec50513,
+    0x0bc50513,
     0xfd0ff0ef,
     0x01090c13,
     0x00094503,
@@ -627,51 +627,51 @@ uint32_t reset_vec[reset_vec_size] = {
     0x1be3887f,
     0x1517ff2c,
     0x05130000,
-    0xf0ef0ee5,
+    0xf0ef0be5,
     0x6888fb2f,
     0x02848913,
     0x07048c13,
     0x827ff0ef,
     0x00001517,
-    0x0e450513,
+    0x0b450513,
     0xf98ff0ef,
     0xf0ef6c88,
     0x1517815f,
     0x05130000,
-    0xf0ef0e25,
+    0xf0ef0b25,
     0x7088f86f,
     0x803ff0ef,
     0x00001517,
-    0x0e050513,
+    0x0b050513,
     0xf74ff0ef,
     0x00094503,
     0xf0ef0905,
     0x1be382ff,
     0x1517ff2c,
     0x05130000,
-    0x2985e3e5,
+    0x2985e0e5,
     0xf58ff0ef,
     0x08048493,
     0xf57993e3,
     0x00001517,
-    0x0c050513,
+    0x09050513,
     0xf44ff0ef,
     0x020aa583,
     0x8552865a,
     0xc33ff0ef,
     0xc50d84aa,
     0x00001517,
-    0xee450513,
+    0xeb450513,
     0xf28ff0ef,
     0x00001517,
-    0xef050513,
+    0xec050513,
     0xf1cff0ef,
     0xf0ef8526,
     0x1517f98f,
     0x05130000,
-    0xb3f5dee5,
+    0xb3f5dbe5,
     0x00001517,
-    0x09450513,
+    0x06450513,
     0xf00ff0ef,
     0x65f1bbb5,
     0x01c9c537,
@@ -680,7 +680,7 @@ uint32_t reset_vec[reset_vec_size] = {
     0xe4063805,
     0xea4ff0ef,
     0x00001517,
-    0xd8050513,
+    0xd5050513,
     0xedcff0ef,
     0x65a14505,
     0xf0ef057e,
@@ -772,15 +772,15 @@ uint32_t reset_vec[reset_vec_size] = {
     0x00000000,
     0x00000000,
     0xedfe0dd0,
-    0x100c0000,
+    0xda0b0000,
     0x38000000,
-    0x38090000,
+    0x08090000,
     0x28000000,
     0x11000000,
     0x10000000,
     0x00000000,
-    0xd8020000,
-    0x00090000,
+    0xd2020000,
+    0xd0080000,
     0x00000000,
     0x00000000,
     0x00000000,
@@ -907,10 +907,6 @@ uint32_t reset_vec[reset_vec_size] = {
     0x04000000,
     0xb5000000,
     0x02000000,
-    0x03000000,
-    0x04000000,
-    0xbb000000,
-    0x02000000,
     0x02000000,
     0x02000000,
     0x02000000,
@@ -948,19 +944,19 @@ uint32_t reset_vec[reset_vec_size] = {
     0x00000064,
     0x03000000,
     0x0c000000,
-    0xc3000000,
+    0xbd000000,
     0x01000000,
     0x01000000,
     0x00000000,
     0x03000000,
     0x0a000000,
-    0xc9000000,
+    0xc3000000,
     0x72616568,
     0x61656274,
     0x00000074,
     0x03000000,
     0x00000000,
-    0xdf000000,
+    0xd9000000,
     0x02000000,
     0x02000000,
     0x01000000,
@@ -986,7 +982,7 @@ uint32_t reset_vec[reset_vec_size] = {
     0x00007375,
     0x03000000,
     0x00000000,
-    0xf6000000,
+    0xf0000000,
     0x01000000,
     0x6e696c63,
     0x30324074,
@@ -1001,7 +997,7 @@ uint32_t reset_vec[reset_vec_size] = {
     0x00000000,
     0x03000000,
     0x10000000,
-    0xfd000000,
+    0xf7000000,
     0x02000000,
     0x03000000,
     0x02000000,
@@ -1015,7 +1011,7 @@ uint32_t reset_vec[reset_vec_size] = {
     0x00000c00,
     0x03000000,
     0x08000000,
-    0x11010000,
+    0x0b010000,
     0x746e6f63,
     0x006c6f72,
     0x02000000,
@@ -1047,7 +1043,7 @@ uint32_t reset_vec[reset_vec_size] = {
     0xa0000000,
     0x03000000,
     0x10000000,
-    0xfd000000,
+    0xf7000000,
     0x02000000,
     0x0b000000,
     0x02000000,
@@ -1061,20 +1057,16 @@ uint32_t reset_vec[reset_vec_size] = {
     0x00000004,
     0x03000000,
     0x04000000,
-    0x1b010000,
+    0x15010000,
     0x07000000,
     0x03000000,
     0x04000000,
-    0x2e010000,
+    0x28010000,
     0x03000000,
     0x03000000,
     0x04000000,
     0xb5000000,
     0x03000000,
-    0x03000000,
-    0x04000000,
-    0xbb000000,
-    0x03000000,
     0x02000000,
     0x01000000,
     0x75626564,
@@ -1091,7 +1083,7 @@ uint32_t reset_vec[reset_vec_size] = {
     0x00333130,
     0x03000000,
     0x08000000,
-    0xfd000000,
+    0xf7000000,
     0x02000000,
     0xffff0000,
     0x03000000,
@@ -1103,7 +1095,7 @@ uint32_t reset_vec[reset_vec_size] = {
     0x00100000,
     0x03000000,
     0x08000000,
-    0x11010000,
+    0x0b010000,
     0x746e6f63,
     0x006c6f72,
     0x02000000,
@@ -1130,23 +1122,23 @@ uint32_t reset_vec[reset_vec_size] = {
     0x80c3c901,
     0x03000000,
     0x04000000,
-    0x39010000,
+    0x33010000,
     0x00c20100,
     0x03000000,
     0x04000000,
-    0x47010000,
+    0x41010000,
     0x03000000,
     0x03000000,
     0x04000000,
-    0x58010000,
+    0x52010000,
     0x01000000,
     0x03000000,
     0x04000000,
-    0x63010000,
+    0x5d010000,
     0x02000000,
     0x03000000,
     0x04000000,
-    0x6d010000,
+    0x67010000,
     0x04000000,
     0x02000000,
     0x01000000,
@@ -1178,11 +1170,11 @@ uint32_t reset_vec[reset_vec_size] = {
     0x00000000,
     0x03000000,
     0x04000000,
-    0x47010000,
+    0x41010000,
     0x03000000,
     0x03000000,
     0x08000000,
-    0x58010000,
+    0x52010000,
     0x02000000,
     0x02000000,
     0x03000000,
@@ -1194,24 +1186,24 @@ uint32_t reset_vec[reset_vec_size] = {
     0x00100000,
     0x03000000,
     0x08000000,
-    0x7a010000,
+    0x74010000,
     0x746e696b,
     0x00377865,
     0x03000000,
     0x04000000,
-    0x86010000,
+    0x80010000,
     0x01000000,
     0x03000000,
     0x04000000,
-    0x96010000,
+    0x90010000,
     0x01000000,
     0x03000000,
     0x04000000,
-    0xa7010000,
+    0xa1010000,
     0x08000000,
     0x03000000,
     0x04000000,
-    0xbe010000,
+    0xb8010000,
     0x04000000,
     0x01000000,
     0x40636d6d,
@@ -1229,16 +1221,16 @@ uint32_t reset_vec[reset_vec_size] = {
     0x00000000,
     0x03000000,
     0x04000000,
-    0xcd010000,
+    0xc7010000,
     0x20bcbe00,
     0x03000000,
     0x08000000,
-    0xdf010000,
+    0xd9010000,
     0xe40c0000,
     0xe40c0000,
     0x03000000,
     0x00000000,
-    0xee010000,
+    0xe8010000,
     0x02000000,
     0x02000000,
     0x01000000,
@@ -1261,16 +1253,16 @@ uint32_t reset_vec[reset_vec_size] = {
     0x006b726f,
     0x03000000,
     0x04000000,
-    0x47010000,
+    0x41010000,
     0x03000000,
     0x03000000,
     0x08000000,
-    0x58010000,
+    0x52010000,
     0x03000000,
     0x00000000,
     0x03000000,
     0x06000000,
-    0xf9010000,
+    0xf3010000,
     0x023e1800,
     0x00007fe3,
     0x03000000,
@@ -1288,7 +1280,7 @@ uint32_t reset_vec[reset_vec_size] = {
     0x00000030,
     0x03000000,
     0x04000000,
-    0x0b020000,
+    0x05020000,
     0x02000000,
     0x03000000,
     0x15000000,
@@ -1301,7 +1293,7 @@ uint32_t reset_vec[reset_vec_size] = {
     0x00000000,
     0x03000000,
     0x00000000,
-    0x17020000,
+    0x11020000,
     0x03000000,
     0x10000000,
     0x67000000,
@@ -1311,52 +1303,48 @@ uint32_t reset_vec[reset_vec_size] = {
     0x00000100,
     0x03000000,
     0x04000000,
-    0x27020000,
+    0x21020000,
     0x00000000,
     0x03000000,
     0x04000000,
-    0x37020000,
+    0x31020000,
     0x00000000,
     0x03000000,
     0x04000000,
-    0x49020000,
+    0x43020000,
     0x00000000,
     0x03000000,
     0x04000000,
-    0x5b020000,
+    0x55020000,
     0x00000000,
     0x03000000,
     0x04000000,
-    0x6f020000,
+    0x69020000,
     0x08000000,
     0x03000000,
     0x04000000,
-    0x7f020000,
+    0x79020000,
     0x08000000,
     0x03000000,
     0x04000000,
-    0x90020000,
+    0x8a020000,
     0x00000000,
     0x03000000,
     0x04000000,
-    0xa7020000,
+    0xa1020000,
     0x01000000,
     0x03000000,
     0x04000000,
-    0xb4020000,
+    0xae020000,
     0xffffffff,
     0x03000000,
     0x04000000,
-    0xc5020000,
+    0xbf020000,
     0xffffffff,
     0x03000000,
     0x04000000,
     0xb5000000,
     0x01000000,
-    0x03000000,
-    0x04000000,
-    0xbb000000,
-    0x01000000,
     0x02000000,
     0x02000000,
     0x02000000,
@@ -1406,143 +1394,143 @@ uint32_t reset_vec[reset_vec_size] = {
     0x6f632d74,
     0x6f72746e,
     0x72656c6c,
-    0x6e696c00,
-    0x702c7875,
-    0x646e6168,
-    0x6700656c,
-    0x736f6970,
-    0x6e696c00,
-    0x642c7875,
-    0x75616665,
-    0x742d746c,
-    0x67676972,
-    0x72007265,
-    0x69617465,
-    0x74732d6e,
-    0x2d657461,
-    0x70737573,
-    0x65646e65,
-    0x61720064,
-    0x7365676e,
-    0x746e6900,
-    0x75727265,
-    0x2d737470,
-    0x65747865,
-    0x6465646e,
-    0x67657200,
-    0x6d616e2d,
-    0x72007365,
-    0x76637369,
-    0x78616d2c,
-    0x6972702d,
-    0x7469726f,
-    0x69720079,
-    0x2c766373,
-    0x7665646e,
-    0x72756300,
-    0x746e6572,
-    0x6570732d,
-    0x69006465,
+    0x61687000,
+    0x656c646e,
+    0x69706700,
+    0x6c00736f,
+    0x78756e69,
+    0x6665642c,
+    0x746c7561,
+    0x6972742d,
+    0x72656767,
+    0x74657200,
+    0x2d6e6961,
+    0x74617473,
+    0x75732d65,
+    0x6e657073,
+    0x00646564,
+    0x676e6172,
+    0x69007365,
     0x7265746e,
     0x74707572,
-    0x7261702d,
-    0x00746e65,
-    0x65746e69,
-    0x70757272,
-    0x72007374,
-    0x732d6765,
-    0x74666968,
+    0x78652d73,
+    0x646e6574,
+    0x72006465,
+    0x6e2d6765,
+    0x73656d61,
+    0x73697200,
+    0x6d2c7663,
+    0x702d7861,
+    0x726f6972,
+    0x00797469,
+    0x63736972,
+    0x646e2c76,
+    0x63007665,
+    0x65727275,
+    0x732d746e,
+    0x64656570,
+    0x746e6900,
+    0x75727265,
+    0x702d7470,
+    0x6e657261,
+    0x6e690074,
+    0x72726574,
+    0x73747075,
     0x67657200,
-    0x2d6f692d,
-    0x74646977,
-    0x6c780068,
-    0x662c786e,
-    0x6c696d61,
-    0x6c780079,
-    0x662c786e,
-    0x2d6f6669,
-    0x73697865,
-    0x6c780074,
-    0x6e2c786e,
-    0x732d6d75,
-    0x69622d73,
-    0x78007374,
-    0x2c786e6c,
-    0x2d6d756e,
-    0x6e617274,
-    0x72656673,
-    0x7469622d,
-    0x6c780073,
-    0x732c786e,
-    0x722d6b63,
-    0x6f697461,
-    0x69707300,
-    0x78616d2d,
-    0x6572662d,
-    0x6e657571,
-    0x76007963,
-    0x61746c6f,
-    0x722d6567,
-    0x65676e61,
-    0x69640073,
-    0x6c626173,
-    0x70772d65,
-    0x636f6c00,
-    0x6d2d6c61,
-    0x612d6361,
-    0x65726464,
-    0x23007373,
-    0x6f697067,
-    0x6c65632d,
-    0x6700736c,
-    0x2d6f6970,
-    0x746e6f63,
-    0x6c6c6f72,
-    0x78007265,
-    0x2c786e6c,
-    0x2d6c6c61,
-    0x75706e69,
-    0x78007374,
+    0x6968732d,
+    0x72007466,
+    0x692d6765,
+    0x69772d6f,
+    0x00687464,
+    0x786e6c78,
+    0x6d61662c,
+    0x00796c69,
+    0x786e6c78,
+    0x6669662c,
+    0x78652d6f,
+    0x00747369,
+    0x786e6c78,
+    0x6d756e2c,
+    0x2d73732d,
+    0x73746962,
+    0x6e6c7800,
+    0x756e2c78,
+    0x72742d6d,
+    0x66736e61,
+    0x622d7265,
+    0x00737469,
+    0x786e6c78,
+    0x6b63732c,
+    0x7461722d,
+    0x73006f69,
+    0x6d2d6970,
+    0x662d7861,
+    0x75716572,
+    0x79636e65,
+    0x6c6f7600,
+    0x65676174,
+    0x6e61722d,
+    0x00736567,
+    0x61736964,
+    0x2d656c62,
+    0x6c007077,
+    0x6c61636f,
+    0x63616d2d,
+    0x6464612d,
+    0x73736572,
+    0x70672300,
+    0x632d6f69,
+    0x736c6c65,
+    0x69706700,
+    0x6f632d6f,
+    0x6f72746e,
+    0x72656c6c,
+    0x6e6c7800,
+    0x6c612c78,
+    0x6e692d6c,
+    0x73747570,
+    0x6e6c7800,
+    0x6c612c78,
+    0x6e692d6c,
+    0x73747570,
+    0x7800322d,
     0x2c786e6c,
-    0x2d6c6c61,
-    0x75706e69,
-    0x322d7374,
+    0x74756f64,
+    0x6665642d,
+    0x746c7561,
     0x6e6c7800,
     0x6f642c78,
     0x642d7475,
     0x75616665,
+    0x322d746c,
+    0x6e6c7800,
+    0x70672c78,
+    0x772d6f69,
+    0x68746469,
+    0x6e6c7800,
+    0x70672c78,
+    0x2d326f69,
+    0x74646977,
+    0x6c780068,
+    0x692c786e,
+    0x7265746e,
+    0x74707572,
+    0x6572702d,
+    0x746e6573,
+    0x6e6c7800,
+    0x73692c78,
+    0x6175642d,
+    0x6c78006c,
+    0x742c786e,
+    0x642d6972,
+    0x75616665,
     0x7800746c,
     0x2c786e6c,
-    0x74756f64,
-    0x6665642d,
-    0x746c7561,
-    0x7800322d,
-    0x2c786e6c,
-    0x6f697067,
-    0x6469772d,
-    0x78006874,
-    0x2c786e6c,
-    0x6f697067,
-    0x69772d32,
-    0x00687464,
-    0x786e6c78,
-    0x746e692c,
-    0x75727265,
-    0x702d7470,
-    0x65736572,
-    0x7800746e,
-    0x2c786e6c,
-    0x642d7369,
-    0x006c6175,
-    0x786e6c78,
-    0x6972742c,
-    0x6665642d,
-    0x746c7561,
-    0x6e6c7800,
-    0x72742c78,
-    0x65642d69,
-    0x6c756166,
-    0x00322d74,
+    0x2d697274,
+    0x61666564,
+    0x2d746c75,
+    0x00000032,
+    0x00000000,
     0x6c6c6548,
     0x6f57206f,
     0x21646c72,
diff --git a/fpga/src/bootrom/bootrom.sv b/fpga/src/bootrom/bootrom.sv
index b59bad47a6af14e19a9d0e44c512c33e64cb4712..344c4f5a1bf90d587c797f2f0849892bd1e0a3cb 100644
--- a/fpga/src/bootrom/bootrom.sv
+++ b/fpga/src/bootrom/bootrom.sv
@@ -20,7 +20,7 @@ module bootrom (
    input  logic [63:0]  addr_i,
    output logic [63:0]  rdata_o
 );
-    localparam int RomSize = 866;
+    localparam int RomSize = 860;
 
     const logic [RomSize-1:0][63:0] mem = {
         64'h00000000_00000000,
@@ -119,75 +119,75 @@ module bootrom (
         64'h49505320_74696e69,
         64'h00000a0d_21646c72,
         64'h6f57206f_6c6c6548,
-        64'h00322d74_6c756166,
-        64'h65642d69_72742c78,
+        64'h00000000_00000032,
+        64'h2d746c75_61666564,
+        64'h2d697274_2c786e6c,
+        64'h7800746c_75616665,
+        64'h642d6972_742c786e,
+        64'h6c78006c_6175642d,
+        64'h73692c78_6e6c7800,
+        64'h746e6573_6572702d,
+        64'h74707572_7265746e,
+        64'h692c786e_6c780068,
+        64'h74646977_2d326f69,
+        64'h70672c78_6e6c7800,
+        64'h68746469_772d6f69,
+        64'h70672c78_6e6c7800,
+        64'h322d746c_75616665,
+        64'h642d7475_6f642c78,
         64'h6e6c7800_746c7561,
-        64'h6665642d_6972742c,
-        64'h786e6c78_006c6175,
-        64'h642d7369_2c786e6c,
-        64'h7800746e_65736572,
-        64'h702d7470_75727265,
-        64'h746e692c_786e6c78,
-        64'h00687464_69772d32,
-        64'h6f697067_2c786e6c,
-        64'h78006874_6469772d,
-        64'h6f697067_2c786e6c,
-        64'h7800322d_746c7561,
         64'h6665642d_74756f64,
-        64'h2c786e6c_7800746c,
-        64'h75616665_642d7475,
-        64'h6f642c78_6e6c7800,
-        64'h322d7374_75706e69,
-        64'h2d6c6c61_2c786e6c,
-        64'h78007374_75706e69,
-        64'h2d6c6c61_2c786e6c,
-        64'h78007265_6c6c6f72,
-        64'h746e6f63_2d6f6970,
-        64'h6700736c_6c65632d,
-        64'h6f697067_23007373,
-        64'h65726464_612d6361,
-        64'h6d2d6c61_636f6c00,
-        64'h70772d65_6c626173,
-        64'h69640073_65676e61,
-        64'h722d6567_61746c6f,
-        64'h76007963_6e657571,
-        64'h6572662d_78616d2d,
-        64'h69707300_6f697461,
-        64'h722d6b63_732c786e,
-        64'h6c780073_7469622d,
-        64'h72656673_6e617274,
-        64'h2d6d756e_2c786e6c,
-        64'h78007374_69622d73,
-        64'h732d6d75_6e2c786e,
-        64'h6c780074_73697865,
-        64'h2d6f6669_662c786e,
-        64'h6c780079_6c696d61,
-        64'h662c786e_6c780068,
-        64'h74646977_2d6f692d,
-        64'h67657200_74666968,
-        64'h732d6765_72007374,
-        64'h70757272_65746e69,
-        64'h00746e65_7261702d,
+        64'h2c786e6c_7800322d,
+        64'h73747570_6e692d6c,
+        64'h6c612c78_6e6c7800,
+        64'h73747570_6e692d6c,
+        64'h6c612c78_6e6c7800,
+        64'h72656c6c_6f72746e,
+        64'h6f632d6f_69706700,
+        64'h736c6c65_632d6f69,
+        64'h70672300_73736572,
+        64'h6464612d_63616d2d,
+        64'h6c61636f_6c007077,
+        64'h2d656c62_61736964,
+        64'h00736567_6e61722d,
+        64'h65676174_6c6f7600,
+        64'h79636e65_75716572,
+        64'h662d7861_6d2d6970,
+        64'h73006f69_7461722d,
+        64'h6b63732c_786e6c78,
+        64'h00737469_622d7265,
+        64'h66736e61_72742d6d,
+        64'h756e2c78_6e6c7800,
+        64'h73746962_2d73732d,
+        64'h6d756e2c_786e6c78,
+        64'h00747369_78652d6f,
+        64'h6669662c_786e6c78,
+        64'h00796c69_6d61662c,
+        64'h786e6c78_00687464,
+        64'h69772d6f_692d6765,
+        64'h72007466_6968732d,
+        64'h67657200_73747075,
+        64'h72726574_6e690074,
+        64'h6e657261_702d7470,
+        64'h75727265_746e6900,
+        64'h64656570_732d746e,
+        64'h65727275_63007665,
+        64'h646e2c76_63736972,
+        64'h00797469_726f6972,
+        64'h702d7861_6d2c7663,
+        64'h73697200_73656d61,
+        64'h6e2d6765_72006465,
+        64'h646e6574_78652d73,
         64'h74707572_7265746e,
-        64'h69006465_6570732d,
-        64'h746e6572_72756300,
-        64'h7665646e_2c766373,
-        64'h69720079_7469726f,
-        64'h6972702d_78616d2c,
-        64'h76637369_72007365,
-        64'h6d616e2d_67657200,
-        64'h6465646e_65747865,
-        64'h2d737470_75727265,
-        64'h746e6900_7365676e,
-        64'h61720064_65646e65,
-        64'h70737573_2d657461,
-        64'h74732d6e_69617465,
-        64'h72007265_67676972,
-        64'h742d746c_75616665,
-        64'h642c7875_6e696c00,
-        64'h736f6970_6700656c,
-        64'h646e6168_702c7875,
-        64'h6e696c00_72656c6c,
+        64'h69007365_676e6172,
+        64'h00646564_6e657073,
+        64'h75732d65_74617473,
+        64'h2d6e6961_74657200,
+        64'h72656767_6972742d,
+        64'h746c7561_6665642c,
+        64'h78756e69_6c00736f,
+        64'h69706700_656c646e,
+        64'h61687000_72656c6c,
         64'h6f72746e_6f632d74,
         64'h70757272_65746e69,
         64'h00736c6c_65632d74,
@@ -212,41 +212,39 @@ module bootrom (
         64'h73736572_64646123,
         64'h09000000_02000000,
         64'h02000000_02000000,
-        64'h01000000_bb000000,
-        64'h04000000_03000000,
         64'h01000000_b5000000,
         64'h04000000_03000000,
-        64'hffffffff_c5020000,
+        64'hffffffff_bf020000,
         64'h04000000_03000000,
-        64'hffffffff_b4020000,
+        64'hffffffff_ae020000,
         64'h04000000_03000000,
-        64'h01000000_a7020000,
+        64'h01000000_a1020000,
         64'h04000000_03000000,
-        64'h00000000_90020000,
+        64'h00000000_8a020000,
         64'h04000000_03000000,
-        64'h08000000_7f020000,
+        64'h08000000_79020000,
         64'h04000000_03000000,
-        64'h08000000_6f020000,
+        64'h08000000_69020000,
         64'h04000000_03000000,
-        64'h00000000_5b020000,
+        64'h00000000_55020000,
         64'h04000000_03000000,
-        64'h00000000_49020000,
+        64'h00000000_43020000,
         64'h04000000_03000000,
-        64'h00000000_37020000,
+        64'h00000000_31020000,
         64'h04000000_03000000,
-        64'h00000000_27020000,
+        64'h00000000_21020000,
         64'h04000000_03000000,
         64'h00000100_00000000,
         64'h00000040_00000000,
         64'h67000000_10000000,
-        64'h03000000_17020000,
+        64'h03000000_11020000,
         64'h00000000_03000000,
         64'h00000000_612e3030,
         64'h2e312d6f_6970672d,
         64'h7370782c_786e6c78,
         64'h1b000000_15000000,
         64'h03000000_02000000,
-        64'h0b020000_04000000,
+        64'h05020000_04000000,
         64'h03000000_00000030,
         64'h30303030_30303440,
         64'h6f697067_01000000,
@@ -255,11 +253,11 @@ module bootrom (
         64'h00000000_67000000,
         64'h10000000_03000000,
         64'h00007fe3_023e1800,
-        64'hf9010000_06000000,
+        64'hf3010000_06000000,
         64'h03000000_00000000,
-        64'h03000000_58010000,
+        64'h03000000_52010000,
         64'h08000000_03000000,
-        64'h03000000_47010000,
+        64'h03000000_41010000,
         64'h04000000_03000000,
         64'h006b726f_7774656e,
         64'h5b000000_08000000,
@@ -271,11 +269,11 @@ module bootrom (
         64'h40687465_2d637369,
         64'h72776f6c_01000000,
         64'h02000000_02000000,
-        64'hee010000_00000000,
+        64'he8010000_00000000,
         64'h03000000_e40c0000,
-        64'he40c0000_df010000,
+        64'he40c0000_d9010000,
         64'h08000000_03000000,
-        64'h20bcbe00_cd010000,
+        64'h20bcbe00_c7010000,
         64'h04000000_03000000,
         64'h00000000_67000000,
         64'h04000000_03000000,
@@ -284,24 +282,24 @@ module bootrom (
         64'h1b000000_0d000000,
         64'h03000000_00000030,
         64'h40636d6d_01000000,
-        64'h04000000_be010000,
+        64'h04000000_b8010000,
         64'h04000000_03000000,
-        64'h08000000_a7010000,
+        64'h08000000_a1010000,
         64'h04000000_03000000,
-        64'h01000000_96010000,
+        64'h01000000_90010000,
         64'h04000000_03000000,
-        64'h01000000_86010000,
+        64'h01000000_80010000,
         64'h04000000_03000000,
         64'h00377865_746e696b,
-        64'h7a010000_08000000,
+        64'h74010000_08000000,
         64'h03000000_00100000,
         64'h00000000_00000020,
         64'h00000000_67000000,
         64'h10000000_03000000,
         64'h02000000_02000000,
-        64'h58010000_08000000,
+        64'h52010000_08000000,
         64'h03000000_03000000,
-        64'h47010000_04000000,
+        64'h41010000_04000000,
         64'h03000000_00000000,
         64'h0f000000_04000000,
         64'h03000000_01000000,
@@ -317,15 +315,15 @@ module bootrom (
         64'h30303032_40697073,
         64'h2d737078_01000000,
         64'h02000000_04000000,
-        64'h6d010000_04000000,
+        64'h67010000_04000000,
         64'h03000000_02000000,
-        64'h63010000_04000000,
+        64'h5d010000_04000000,
         64'h03000000_01000000,
-        64'h58010000_04000000,
+        64'h52010000_04000000,
         64'h03000000_03000000,
-        64'h47010000_04000000,
+        64'h41010000_04000000,
         64'h03000000_00c20100,
-        64'h39010000_04000000,
+        64'h33010000_04000000,
         64'h03000000_80c3c901,
         64'h4b000000_04000000,
         64'h03000000_00100000,
@@ -338,13 +336,13 @@ module bootrom (
         64'h30303030_30303140,
         64'h74726175_01000000,
         64'h02000000_006c6f72,
-        64'h746e6f63_11010000,
+        64'h746e6f63_0b010000,
         64'h08000000_03000000,
         64'h00100000_00000000,
         64'h00000000_00000000,
         64'h67000000_10000000,
         64'h03000000_ffff0000,
-        64'h02000000_fd000000,
+        64'h02000000_f7000000,
         64'h08000000_03000000,
         64'h00333130_2d677562,
         64'h65642c76_63736972,
@@ -353,20 +351,18 @@ module bootrom (
         64'h72656c6c_6f72746e,
         64'h6f632d67_75626564,
         64'h01000000_02000000,
-        64'h03000000_bb000000,
-        64'h04000000_03000000,
         64'h03000000_b5000000,
         64'h04000000_03000000,
-        64'h03000000_2e010000,
+        64'h03000000_28010000,
         64'h04000000_03000000,
-        64'h07000000_1b010000,
+        64'h07000000_15010000,
         64'h04000000_03000000,
         64'h00000004_00000000,
         64'h0000000c_00000000,
         64'h67000000_10000000,
         64'h03000000_09000000,
         64'h02000000_0b000000,
-        64'h02000000_fd000000,
+        64'h02000000_f7000000,
         64'h10000000_03000000,
         64'ha0000000_00000000,
         64'h03000000_00306369,
@@ -382,14 +378,14 @@ module bootrom (
         64'h6f632d74_70757272,
         64'h65746e69_01000000,
         64'h02000000_006c6f72,
-        64'h746e6f63_11010000,
+        64'h746e6f63_0b010000,
         64'h08000000_03000000,
         64'h00000c00_00000000,
         64'h00000002_00000000,
         64'h67000000_10000000,
         64'h03000000_07000000,
         64'h02000000_03000000,
-        64'h02000000_fd000000,
+        64'h02000000_f7000000,
         64'h10000000_03000000,
         64'h00000000_30746e69,
         64'h6c632c76_63736972,
@@ -397,7 +393,7 @@ module bootrom (
         64'h03000000_00000030,
         64'h30303030_30324074,
         64'h6e696c63_01000000,
-        64'hf6000000_00000000,
+        64'hf0000000_00000000,
         64'h03000000_00007375,
         64'h622d656c_706d6973,
         64'h00636f73_2d657261,
@@ -410,13 +406,13 @@ module bootrom (
         64'h04000000_03000000,
         64'h00636f73_01000000,
         64'h02000000_02000000,
-        64'hdf000000_00000000,
+        64'hd9000000_00000000,
         64'h03000000_00000074,
         64'h61656274_72616568,
-        64'hc9000000_0a000000,
+        64'hc3000000_0a000000,
         64'h03000000_00000000,
         64'h01000000_01000000,
-        64'hc3000000_0c000000,
+        64'hbd000000_0c000000,
         64'h03000000_00000064,
         64'h656c2d74_61656274,
         64'h72616568_01000000,
@@ -435,8 +431,6 @@ module bootrom (
         64'h38407972_6f6d656d,
         64'h01000000_02000000,
         64'h02000000_02000000,
-        64'h02000000_bb000000,
-        64'h04000000_03000000,
         64'h02000000_b5000000,
         64'h04000000_03000000,
         64'h00006374_6e692d75,
@@ -500,11 +494,11 @@ module bootrom (
         64'h00000000_01000000,
         64'h00000000_00000000,
         64'h00000000_00000000,
-        64'h00090000_d8020000,
+        64'hd0080000_d2020000,
         64'h00000000_10000000,
         64'h11000000_28000000,
-        64'h38090000_38000000,
-        64'h100c0000_edfe0dd0,
+        64'h08090000_38000000,
+        64'hda0b0000_edfe0dd0,
         64'h00000000_00000000,
         64'h00000000_00000000,
         64'h00000000_00000000,
@@ -550,92 +544,92 @@ module bootrom (
         64'h01f41413_0010041b,
         64'he911d23f_f0ef057e,
         64'h65a14505_edcff0ef,
-        64'hd8050513_00001517,
+        64'hd5050513_00001517,
         64'hea4ff0ef_e4063805,
         64'h05132005_85931141,
         64'h01c9c537_65f1bbb5,
-        64'hf00ff0ef_09450513,
-        64'h00001517_b3f5dee5,
+        64'hf00ff0ef_06450513,
+        64'h00001517_b3f5dbe5,
         64'h05130000_1517f98f,
         64'hf0ef8526_f1cff0ef,
-        64'hef050513_00001517,
-        64'hf28ff0ef_ee450513,
+        64'hec050513_00001517,
+        64'hf28ff0ef_eb450513,
         64'h00001517_c50d84aa,
         64'hc33ff0ef_8552865a,
         64'h020aa583_f44ff0ef,
-        64'h0c050513_00001517,
+        64'h09050513_00001517,
         64'hf57993e3_08048493,
-        64'hf58ff0ef_2985e3e5,
+        64'hf58ff0ef_2985e0e5,
         64'h05130000_1517ff2c,
         64'h1be382ff_f0ef0905,
         64'h00094503_f74ff0ef,
-        64'h0e050513_00001517,
+        64'h0b050513_00001517,
         64'h803ff0ef_7088f86f,
-        64'hf0ef0e25_05130000,
+        64'hf0ef0b25_05130000,
         64'h1517815f_f0ef6c88,
-        64'hf98ff0ef_0e450513,
+        64'hf98ff0ef_0b450513,
         64'h00001517_827ff0ef,
         64'h07048c13_02848913,
-        64'h6888fb2f_f0ef0ee5,
+        64'h6888fb2f_f0ef0be5,
         64'h05130000_1517ff2c,
         64'h1be3887f_f0ef0905,
         64'h00094503_01090c13,
-        64'hfd0ff0ef_0ec50513,
+        64'hfd0ff0ef_0bc50513,
         64'h00001517_fe991be3,
         64'h8a5ff0ef_09050009,
         64'h4503ff04_8913feef,
-        64'hf0ef0ea5_05130000,
+        64'hf0ef0ba5_05130000,
         64'h15178bff_f0ef0ff9,
-        64'hf513803f_f0ef0e65,
+        64'hf513803f_f0ef0b65,
         64'h05130000_1517b5fd,
-        64'hef050513_00001517,
+        64'hec050513_00001517,
         64'h89bff0ef_854e81ff,
-        64'hf0efff25_05130000,
-        64'h151782bf_f0effe65,
+        64'hf0effc25_05130000,
+        64'h151782bf_f0effb65,
         64'h05130000_1517c50d,
         64'h080489aa_8a8ad39f,
         64'hf0ef850a_46057101,
         64'h04892583_84dff0ef,
-        64'hf3050513_00001517,
+        64'hf0050513_00001517,
         64'h89bff0ef_455685ff,
-        64'hf0ef1225_05130000,
+        64'hf0ef0f25_05130000,
         64'h15178adf_f0ef4546,
-        64'h871ff0ef_11450513,
+        64'h871ff0ef_0e450513,
         64'h00001517_8ffff0ef,
-        64'h6526883f_f0ef1065,
+        64'h6526883f_f0ef0d65,
         64'h05130000_1517911f,
         64'hf0ef7502_895ff0ef,
-        64'h10850513_00001517,
+        64'h0d850513_00001517,
         64'h923ff0ef_65628a7f,
-        64'hf0ef1025_05130000,
+        64'hf0ef0d25_05130000,
         64'h15178f5f_f0ef4552,
-        64'h8b9ff0ef_10450513,
+        64'h8b9ff0ef_0d450513,
         64'h00001517_907ff0ef,
-        64'h45428cbf_f0ef1065,
+        64'h45428cbf_f0ef0d65,
         64'h05130000_1517919f,
         64'hf0ef4532_8ddff0ef,
-        64'h10850513_00001517,
+        64'h0d850513_00001517,
         64'h92bff0ef_45228eff,
-        64'hf0ef10a5_05130000,
+        64'hf0ef0da5_05130000,
         64'h151797df_f0ef4b91,
-        64'h6502903f_f0ef10e5,
+        64'h6502903f_f0ef0de5,
         64'h05130000_151790ff,
-        64'hf0ef0fa5_05130000,
+        64'hf0ef0ca5_05130000,
         64'h1517bf61_54f991ff,
-        64'hf0ef0025_05130000,
+        64'hf0effd25_05130000,
         64'h15179adf_f0ef8526,
-        64'h931ff0ef_10450513,
+        64'h931ff0ef_0d450513,
         64'h00001517_93dff0ef,
-        64'h0f850513_00001517,
+        64'h0c850513_00001517,
         64'hc90584aa_890ae49f,
         64'hf0ef850a_45854605,
-        64'h710195bf_f0ef0fe5,
+        64'h710195bf_f0ef0ce5,
         64'h05130000_15178082,
         64'h61616c02_6ba26b42,
         64'h6ae27a02_79a27942,
         64'h74e26406_852660a6,
         64'hfb040113_54fd987f,
-        64'hf0ef1025_05130000,
+        64'hf0ef0d25_05130000,
         64'h1517c51d_df3ff0ef,
         64'h8b2e8a2a_0880e062,
         64'he45eec56_f44ef84a,
@@ -644,7 +638,7 @@ module bootrom (
         64'h80826169_6baa6b4a,
         64'h6aea7a0a_79aa794a,
         64'h74ea640e_60ae8522,
-        64'h547d9d3f_f0ef1265,
+        64'h547d9d3f_f0ef0f65,
         64'h05130000_1517c59f,
         64'hf0efc5df_f0efc61f,
         64'hf0efc65f_f0efc69f,
@@ -653,7 +647,7 @@ module bootrom (
         64'hc7bff0ef_c87ff0ef,
         64'h45314581_46054401,
         64'hf93046e3_19fda17f,
-        64'hf0ef18a5_05130000,
+        64'hf0ef15a5_05130000,
         64'h1517e799_0359e7b3,
         64'h07241a63_29019041,
         64'h14428c49_cafff0ef,
@@ -700,13 +694,13 @@ module bootrom (
         64'h57f9efbf_f0efc911,
         64'h57fdeb7f_f0effc6d,
         64'he03ff0ef_347d4429,
-        64'hb91ff0ef_2cc50513,
+        64'hb91ff0ef_29c50513,
         64'h00001517_c89ff0ef,
         64'he022e406_11418082,
         64'h61050015_351364a2,
         64'h644260e2_0004051b,
         64'hfc940ce3_e37ff0ef,
-        64'heb3ff0ef_2f450513,
+        64'heb3ff0ef_2c450513,
         64'h00001517_85aa842a,
         64'he53ff0ef_02900513,
         64'h400005b7_07700613,
@@ -714,7 +708,7 @@ module bootrom (
         64'hec06e426_11018082,
         64'h01410015_3513157d,
         64'h640260a2_0004051b,
-        64'hef3ff0ef_32e50513,
+        64'hef3ff0ef_2fe50513,
         64'h85a20000_1517e89f,
         64'hf0ef842a_e97ff0ef,
         64'he022e406_03700513,
@@ -733,20 +727,20 @@ module bootrom (
         64'h06131101_bfcd4501,
         64'h80826105_690264a2,
         64'h644260e2_4505f89f,
-        64'hf0ef4585_3bc50513,
+        64'hf0ef4585_38c50513,
         64'h00001517_fe9915e3,
         64'hc00df25f_f0ef892a,
         64'h347df35f_f0ef4501,
         64'h45810950_06134485,
         64'h71040413_e04aec06,
         64'he4266409_e8221101,
-        64'hcd1ff06f_61053b65,
+        64'hcd1ff06f_61053865,
         64'h05130000_151760e2,
         64'h6442da7f_f0ef852e,
-        64'h65a2cebf_f0ef3fe5,
+        64'h65a2cebf_f0ef3ce5,
         64'h05130000_1517cf7f,
         64'hf0ef8522_cfdff0ef,
-        64'he42eec06_40450513,
+        64'he42eec06_3d450513,
         64'h00001517_842ae822,
         64'h11018082_614564e2,
         64'h740270a2_f47d147d,
@@ -783,9 +777,9 @@ module bootrom (
         64'hd3b84719_dbb86442,
         64'h60e20ff4_7513577d,
         64'h200007b7_e25ff0ef,
-        64'h50850513_00001517,
+        64'h4d850513_00001517,
         64'heb3ff0ef_91011502,
-        64'h4088e3bf_f0ef5265,
+        64'h4088e3bf_f0ef4f65,
         64'h05130000_1517e395,
         64'h8b852401_53fc57e0,
         64'hff658b05_06478493,
@@ -794,24 +788,24 @@ module bootrom (
         64'h00010640_0793d7a8,
         64'hdbb85779_e426e822,
         64'hec062000_07b71101,
-        64'he81ff06f_61055565,
+        64'he81ff06f_61055265,
         64'h05130000_151764a2,
         64'h60e26442_d03c4799,
-        64'he99ff0ef_57c50513,
+        64'he99ff0ef_54c50513,
         64'h00001517_f27ff0ef,
         64'h91010204_95132481,
-        64'heb1ff0ef_57450513,
+        64'heb1ff0ef_54450513,
         64'h00001517_5064d03c,
         64'h16600793_ec5ff0ef,
-        64'h5a850513_00001517,
+        64'h57850513_00001517,
         64'hf53ff0ef_91010204,
         64'h95132481_eddff0ef,
-        64'h5a050513_00001517,
+        64'h57050513_00001517,
         64'h5064d03c_10400793,
         64'h20000437_fff537fd,
         64'h000147a9_c3b84729,
         64'h200007b7_f05ff0ef,
-        64'he426e822_ec065c05,
+        64'he426e822_ec065905,
         64'h05131101_00001517,
         64'h80824108_8082c10c,
         64'h80826105_60e2ecff,
diff --git a/include/ariane_pkg.sv b/include/ariane_pkg.sv
index 0c39f4bc11b9bbbaab9a789fac3e626c134391bb..33570e91ce2d26840824b20cbf0ba92216bb8473 100644
--- a/include/ariane_pkg.sv
+++ b/include/ariane_pkg.sv
@@ -55,7 +55,7 @@ package ariane_pkg;
     // in this case we can use a small commit queue since we have a write buffer in the dcache
     // we could in principle do without the commit queue in this case, but the timing degrades if we do that due
     // to longer paths into the commit stage
-    localparam int unsigned DEPTH_COMMIT = 2;
+    localparam int unsigned DEPTH_COMMIT = 4;
 `else
     // allocate more space for the commit buffer to be on the save side, this needs to be a power of two
     localparam int unsigned DEPTH_COMMIT = 8;
@@ -283,11 +283,11 @@ package ariane_pkg;
 `endif
 
 `ifndef CONFIG_L1D_ASSOCIATIVITY
-    `define CONFIG_L1D_ASSOCIATIVITY 4
+    `define CONFIG_L1D_ASSOCIATIVITY 8
 `endif
 
 `ifndef CONFIG_L1D_SIZE
-    `define CONFIG_L1D_SIZE 16*1024
+    `define CONFIG_L1D_SIZE 32*1024
 `endif
 
     // I$
diff --git a/include/serpent_cache_pkg.sv b/include/serpent_cache_pkg.sv
index 9d192de170f93fa4b03a227f9607c200cfccfa5f..fd5ad02f2da84122d30625ab7ed4c23d1182f64c 100644
--- a/include/serpent_cache_pkg.sv
+++ b/include/serpent_cache_pkg.sv
@@ -33,7 +33,8 @@ package serpent_cache_pkg;
 `endif
 
 `ifndef L15_THREADID_WIDTH
-    `define L15_THREADID_WIDTH 1
+    // this results in 4 pending tx slots in the writebuffer
+    `define L15_THREADID_WIDTH 2
 `endif
 
 `ifndef TLB_CSM_WIDTH
@@ -71,7 +72,7 @@ package serpent_cache_pkg;
   // write buffer parameterization
   localparam DCACHE_WBUF_DEPTH       = 8;
   localparam DCACHE_MAX_TX           = 2**L15_TID_WIDTH;
-  localparam DCACHE_ID_WIDTH         = $clog2(DCACHE_MAX_TX);
+  localparam CACHE_ID_WIDTH          = L15_TID_WIDTH;
 
 
   typedef struct packed {
@@ -126,16 +127,14 @@ package serpent_cache_pkg;
     logic [$clog2(ariane_pkg::ICACHE_SET_ASSOC)-1:0] way;         // way to replace
     logic [63:0]                                     paddr;       // physical address
     logic                                            nc;          // noncacheable
-    logic [L15_TID_WIDTH-1:0]                        tid;         // threadi id (used as transaction id in Ariane)
+    logic [CACHE_ID_WIDTH-1:0]                       tid;         // threadi id (used as transaction id in Ariane)
   } icache_req_t;
 
   typedef struct packed {
     icache_in_t                                      rtype;       // see definitions above
     logic [ariane_pkg::ICACHE_LINE_WIDTH-1:0]        data;        // full cache line width
     cache_inval_t                                    inv;         // invalidation vector
-    logic                                            nc;          // noncacheable
-    logic [L15_TID_WIDTH-1:0]                        tid;         // threadi id (used as transaction id in Ariane)
-    logic                                            f4b;         // fetch 4 bytes only (from I/O space)
+    logic [CACHE_ID_WIDTH-1:0]                       tid;         // threadi id (used as transaction id in Ariane)
   } icache_rtrn_t;
 
   // dcache interface
@@ -146,7 +145,7 @@ package serpent_cache_pkg;
     logic [63:0]                                     paddr;       // physical address
     logic [63:0]                                     data;        // word width of processor (no block stores at the moment)
     logic                                            nc;          // noncacheable
-    logic [L15_TID_WIDTH-1:0]                        tid;         // threadi id (used as transaction id in Ariane)
+    logic [CACHE_ID_WIDTH-1:0]                       tid;         // threadi id (used as transaction id in Ariane)
     ariane_pkg::amo_t                                amo_op;      // amo opcode
   } dcache_req_t;
 
@@ -154,8 +153,7 @@ package serpent_cache_pkg;
     dcache_in_t                                      rtype;       // see definitions above
     logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0]        data;        // full cache line width
     cache_inval_t                                    inv;         // invalidation vector
-    logic                                            nc;          // noncacheable
-    logic [L15_TID_WIDTH-1:0]                        tid;         // threadi id (used as transaction id in Ariane)
+    logic [CACHE_ID_WIDTH-1:0]                       tid;         // threadi id (used as transaction id in Ariane)
   } dcache_rtrn_t;
 
 
diff --git a/openpiton/ariane_verilog_wrap.sv b/openpiton/ariane_verilog_wrap.sv
index cb90761c93fef16ab8cfd61a6a1f8ed93155cc46..869de9ec05668b3defffaf9edf435b46f23bf9bb 100644
--- a/openpiton/ariane_verilog_wrap.sv
+++ b/openpiton/ariane_verilog_wrap.sv
@@ -111,20 +111,6 @@ module ariane_verilog_wrap #(
   // reset gate this
   assign rst_n = wake_up_cnt_q[$high(wake_up_cnt_q)] & reset_l;
 
-  // reset_synchronizer #(
-  //    .NUM_REGS(2)
-  // ) i_sync (
-  //    .clk_i   ( clk_i      ),
-  //    .rst_ni  ( rst_n      ),
-  //    .tmode_i ( 1'b0       ),
-  //    .rst_no  ( spc_grst_l )
-  // );
-
-  synchronizer i_sync (
-    .clk         ( clk_i      ),
-    .presyncdata ( rst_n      ),
-    .syncdata    ( spc_grst_l )
-  );
 
   /////////////////////////////
   // synchronizers
diff --git a/openpiton/bootrom/.gitignore b/openpiton/bootrom/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..b553ec26ea03717ea46bdf0627d2c07c18dd84be
--- /dev/null
+++ b/openpiton/bootrom/.gitignore
@@ -0,0 +1,3 @@
+*.elf
+*.img
+*.dtb
diff --git a/openpiton/bootrom/Makefile b/openpiton/bootrom/Makefile
new file mode 120000
index 0000000000000000000000000000000000000000..a79fc07889056da5204ee7dad781d0e791716890
--- /dev/null
+++ b/openpiton/bootrom/Makefile
@@ -0,0 +1 @@
+../../bootrom/Makefile
\ No newline at end of file
diff --git a/openpiton/bootrom/ariane.dts b/openpiton/bootrom/ariane.dts
new file mode 100644
index 0000000000000000000000000000000000000000..5e8852b791f3bb2421d7a222e1d69aaec48b127e
--- /dev/null
+++ b/openpiton/bootrom/ariane.dts
@@ -0,0 +1,95 @@
+// DTS generated with gen_riscv_dts(...) 
+// OpenPiton + Ariane framework
+// Date: Feb 04 2019 17:25:17
+
+/dts-v1/;
+
+/ {
+    #address-cells = <2>;
+    #size-cells = <2>;
+    compatible = "eth,ariane-bare-dev";
+    model = "eth,ariane-bare";
+    cpus {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        timebase-frequency = <520835>; 
+        
+        CPU0: cpu@0 {
+            clock-frequency = <66667000>; 
+            device_type = "cpu";
+            reg = <0>;
+            status = "okay";
+            compatible = "eth, ariane", "riscv";
+            riscv,isa = "rv64imacsu";
+            mmu-type = "riscv,sv39";
+            tlb-split;
+            // HLIC - hart local interrupt controller
+            CPU0_intc: interrupt-controller {
+                #interrupt-cells = <1>;
+                interrupt-controller;
+                compatible = "riscv,cpu-intc";
+            };
+        };
+        
+    };
+    
+    memory@80000000 {
+        device_type = "memory";
+        reg = < 0x00000000 0x80000000 0x00000000 0x40000000 >;
+    };
+                    
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+        compatible = "eth,ariane-bare-soc", "simple-bus";
+        ranges;
+    
+        uart@FFF0C2C000 {
+            compatible = "ns16550";
+            reg = < 0x000000FF 0xF0C2C000 0x00000000 0x000D4000 >;
+            clock-frequency = <66667000>;
+            current-speed = <115200>;
+            interrupt-parent = <&PLIC0>;
+            interrupts = <1>;
+            reg-shift = <1>; // regs are spaced on 8 bit boundary
+            reg-io-width = <1>; // only 8-bit access are supported
+        };
+            
+        eth: lowrisc-eth@FFF0D00000 {
+            compatible = "lowrisc-eth";
+            device_type = "network";
+            interrupt-parent = <&PLIC0>;
+            interrupts = <3 0>;
+            local-mac-address = [ee e1 e2 e3 e4 e5];
+            reg = < 0x000000FF 0xF0D00000 0x00000000 0x00100000 >;
+        };
+            
+        debug-controller@FFF1000000 {
+            compatible = "riscv,debug-013";
+            interrupts-extended = <&CPU0_intc 65535 >;
+            reg = < 0x000000FF 0xF1000000 0x00000000 0x00001000 >;
+            reg-names = "control";
+        };
+            
+        clint@FFF1020000 {
+            compatible = "riscv,clint0";
+            interrupts-extended = <&CPU0_intc 3 &CPU0_intc 7 >;
+            reg = < 0x000000FF 0xF1020000 0x00000000 0x00010000 >;
+            reg-names = "control";
+        };
+            
+        PLIC0: plic@FFF1030000 {
+            #address-cells = <0>;
+            #interrupt-cells = <1>;
+            compatible = "sifive,plic-1.0.0", "riscv,plic0";
+            interrupt-controller;
+            interrupts-extended = <&CPU0_intc 11 &CPU0_intc 9 >;
+            reg = < 0x000000FF 0xF1030000 0x00000000 0x04000000 >;
+            reg-names = "control";
+            riscv,max-priority = <7>;
+            riscv,ndev = <2>;
+        };
+                
+    };
+};
+    
\ No newline at end of file
diff --git a/openpiton/bootrom/bootrom.S b/openpiton/bootrom/bootrom.S
new file mode 120000
index 0000000000000000000000000000000000000000..449148c94fe8ba7265e2b6f70e19123338a1a8d1
--- /dev/null
+++ b/openpiton/bootrom/bootrom.S
@@ -0,0 +1 @@
+../../bootrom/bootrom.S
\ No newline at end of file
diff --git a/openpiton/bootrom/bootrom.h b/openpiton/bootrom/bootrom.h
new file mode 100644
index 0000000000000000000000000000000000000000..0d54d9ed8986e1ba23647079c21cf747069971fe
--- /dev/null
+++ b/openpiton/bootrom/bootrom.h
@@ -0,0 +1,498 @@
+// Auto-generated code
+
+const int reset_vec_size = 492;
+
+uint32_t reset_vec[reset_vec_size] = {
+    0x0010041b,
+    0x01f41413,
+    0xf1402573,
+    0x00000597,
+    0x07458593,
+    0x00008402,
+    0x00000000,
+    0x00000000,
+    0x00000000,
+    0x00000000,
+    0x00000000,
+    0x00000000,
+    0x00000000,
+    0x00000000,
+    0x00000000,
+    0x00000000,
+    0xf1402573,
+    0x00000597,
+    0x03c58593,
+    0x10500073,
+    0x0000bff5,
+    0x00000000,
+    0x00000000,
+    0x00000000,
+    0x00000000,
+    0x00000000,
+    0x00000000,
+    0x00000000,
+    0x00000000,
+    0x00000000,
+    0x00000000,
+    0x00000000,
+    0xedfe0dd0,
+    0x2b070000,
+    0x38000000,
+    0xe4050000,
+    0x28000000,
+    0x11000000,
+    0x10000000,
+    0x00000000,
+    0x47010000,
+    0xac050000,
+    0x00000000,
+    0x00000000,
+    0x00000000,
+    0x00000000,
+    0x01000000,
+    0x00000000,
+    0x03000000,
+    0x04000000,
+    0x00000000,
+    0x02000000,
+    0x03000000,
+    0x04000000,
+    0x0f000000,
+    0x02000000,
+    0x03000000,
+    0x14000000,
+    0x1b000000,
+    0x2c687465,
+    0x61697261,
+    0x622d656e,
+    0x2d657261,
+    0x00766564,
+    0x03000000,
+    0x10000000,
+    0x26000000,
+    0x2c687465,
+    0x61697261,
+    0x622d656e,
+    0x00657261,
+    0x01000000,
+    0x73757063,
+    0x00000000,
+    0x03000000,
+    0x04000000,
+    0x00000000,
+    0x01000000,
+    0x03000000,
+    0x04000000,
+    0x0f000000,
+    0x00000000,
+    0x03000000,
+    0x04000000,
+    0x2c000000,
+    0x83f20700,
+    0x01000000,
+    0x40757063,
+    0x00000030,
+    0x03000000,
+    0x04000000,
+    0x3f000000,
+    0xf841f903,
+    0x03000000,
+    0x04000000,
+    0x4f000000,
+    0x00757063,
+    0x03000000,
+    0x04000000,
+    0x5b000000,
+    0x00000000,
+    0x03000000,
+    0x05000000,
+    0x5f000000,
+    0x79616b6f,
+    0x00000000,
+    0x03000000,
+    0x12000000,
+    0x1b000000,
+    0x2c687465,
+    0x69726120,
+    0x00656e61,
+    0x63736972,
+    0x00000076,
+    0x03000000,
+    0x0b000000,
+    0x66000000,
+    0x34367672,
+    0x63616d69,
+    0x00007573,
+    0x03000000,
+    0x0b000000,
+    0x70000000,
+    0x63736972,
+    0x76732c76,
+    0x00003933,
+    0x03000000,
+    0x00000000,
+    0x79000000,
+    0x01000000,
+    0x65746e69,
+    0x70757272,
+    0x6f632d74,
+    0x6f72746e,
+    0x72656c6c,
+    0x00000000,
+    0x03000000,
+    0x04000000,
+    0x83000000,
+    0x01000000,
+    0x03000000,
+    0x00000000,
+    0x94000000,
+    0x03000000,
+    0x0f000000,
+    0x1b000000,
+    0x63736972,
+    0x70632c76,
+    0x6e692d75,
+    0x00006374,
+    0x03000000,
+    0x04000000,
+    0xa9000000,
+    0x02000000,
+    0x02000000,
+    0x02000000,
+    0x02000000,
+    0x01000000,
+    0x6f6d656d,
+    0x38407972,
+    0x30303030,
+    0x00303030,
+    0x03000000,
+    0x07000000,
+    0x4f000000,
+    0x6f6d656d,
+    0x00007972,
+    0x03000000,
+    0x10000000,
+    0x5b000000,
+    0x00000000,
+    0x00000080,
+    0x00000000,
+    0x00000040,
+    0x02000000,
+    0x01000000,
+    0x00636f73,
+    0x03000000,
+    0x04000000,
+    0x00000000,
+    0x02000000,
+    0x03000000,
+    0x04000000,
+    0x0f000000,
+    0x02000000,
+    0x03000000,
+    0x1f000000,
+    0x1b000000,
+    0x2c687465,
+    0x61697261,
+    0x622d656e,
+    0x2d657261,
+    0x00636f73,
+    0x706d6973,
+    0x622d656c,
+    0x00007375,
+    0x03000000,
+    0x00000000,
+    0xb1000000,
+    0x01000000,
+    0x74726175,
+    0x46464640,
+    0x43324330,
+    0x00303030,
+    0x03000000,
+    0x08000000,
+    0x1b000000,
+    0x3631736e,
+    0x00303535,
+    0x03000000,
+    0x10000000,
+    0x5b000000,
+    0xff000000,
+    0x00c0c2f0,
+    0x00000000,
+    0x00400d00,
+    0x03000000,
+    0x04000000,
+    0x3f000000,
+    0xf841f903,
+    0x03000000,
+    0x04000000,
+    0xb8000000,
+    0x00c20100,
+    0x03000000,
+    0x04000000,
+    0xc6000000,
+    0x01000000,
+    0x03000000,
+    0x04000000,
+    0xd7000000,
+    0x01000000,
+    0x03000000,
+    0x04000000,
+    0xe2000000,
+    0x01000000,
+    0x03000000,
+    0x04000000,
+    0xec000000,
+    0x01000000,
+    0x02000000,
+    0x01000000,
+    0x72776f6c,
+    0x2d637369,
+    0x40687465,
+    0x30464646,
+    0x30303044,
+    0x00003030,
+    0x03000000,
+    0x0c000000,
+    0x1b000000,
+    0x72776f6c,
+    0x2d637369,
+    0x00687465,
+    0x03000000,
+    0x08000000,
+    0x4f000000,
+    0x7774656e,
+    0x006b726f,
+    0x03000000,
+    0x04000000,
+    0xc6000000,
+    0x01000000,
+    0x03000000,
+    0x08000000,
+    0xd7000000,
+    0x03000000,
+    0x00000000,
+    0x03000000,
+    0x06000000,
+    0xf9000000,
+    0xe3e2e1ee,
+    0x0000e5e4,
+    0x03000000,
+    0x10000000,
+    0x5b000000,
+    0xff000000,
+    0x0000d0f0,
+    0x00000000,
+    0x00001000,
+    0x02000000,
+    0x01000000,
+    0x75626564,
+    0x6f632d67,
+    0x6f72746e,
+    0x72656c6c,
+    0x46464640,
+    0x30303031,
+    0x00303030,
+    0x03000000,
+    0x10000000,
+    0x1b000000,
+    0x63736972,
+    0x65642c76,
+    0x2d677562,
+    0x00333130,
+    0x03000000,
+    0x08000000,
+    0x0b010000,
+    0x02000000,
+    0xffff0000,
+    0x03000000,
+    0x10000000,
+    0x5b000000,
+    0xff000000,
+    0x000000f1,
+    0x00000000,
+    0x00100000,
+    0x03000000,
+    0x08000000,
+    0x1f010000,
+    0x746e6f63,
+    0x006c6f72,
+    0x02000000,
+    0x01000000,
+    0x6e696c63,
+    0x46464074,
+    0x32303146,
+    0x30303030,
+    0x00000000,
+    0x03000000,
+    0x0d000000,
+    0x1b000000,
+    0x63736972,
+    0x6c632c76,
+    0x30746e69,
+    0x00000000,
+    0x03000000,
+    0x10000000,
+    0x0b010000,
+    0x02000000,
+    0x03000000,
+    0x02000000,
+    0x07000000,
+    0x03000000,
+    0x10000000,
+    0x5b000000,
+    0xff000000,
+    0x000002f1,
+    0x00000000,
+    0x00000100,
+    0x03000000,
+    0x08000000,
+    0x1f010000,
+    0x746e6f63,
+    0x006c6f72,
+    0x02000000,
+    0x01000000,
+    0x63696c70,
+    0x46464640,
+    0x30333031,
+    0x00303030,
+    0x03000000,
+    0x04000000,
+    0x00000000,
+    0x00000000,
+    0x03000000,
+    0x04000000,
+    0x83000000,
+    0x01000000,
+    0x03000000,
+    0x1e000000,
+    0x1b000000,
+    0x69666973,
+    0x702c6576,
+    0x2d63696c,
+    0x2e302e31,
+    0x69720030,
+    0x2c766373,
+    0x63696c70,
+    0x00000030,
+    0x03000000,
+    0x00000000,
+    0x94000000,
+    0x03000000,
+    0x10000000,
+    0x0b010000,
+    0x02000000,
+    0x0b000000,
+    0x02000000,
+    0x09000000,
+    0x03000000,
+    0x10000000,
+    0x5b000000,
+    0xff000000,
+    0x000003f1,
+    0x00000000,
+    0x00000004,
+    0x03000000,
+    0x08000000,
+    0x1f010000,
+    0x746e6f63,
+    0x006c6f72,
+    0x03000000,
+    0x04000000,
+    0x29010000,
+    0x07000000,
+    0x03000000,
+    0x04000000,
+    0x3c010000,
+    0x02000000,
+    0x03000000,
+    0x04000000,
+    0xa9000000,
+    0x01000000,
+    0x02000000,
+    0x02000000,
+    0x02000000,
+    0x09000000,
+    0x64646123,
+    0x73736572,
+    0x6c65632d,
+    0x2300736c,
+    0x657a6973,
+    0x6c65632d,
+    0x6300736c,
+    0x61706d6f,
+    0x6c626974,
+    0x6f6d0065,
+    0x006c6564,
+    0x656d6974,
+    0x65736162,
+    0x6572662d,
+    0x6e657571,
+    0x63007963,
+    0x6b636f6c,
+    0x6572662d,
+    0x6e657571,
+    0x64007963,
+    0x63697665,
+    0x79745f65,
+    0x72006570,
+    0x73006765,
+    0x75746174,
+    0x69720073,
+    0x2c766373,
+    0x00617369,
+    0x2d756d6d,
+    0x65707974,
+    0x626c7400,
+    0x6c70732d,
+    0x23007469,
+    0x65746e69,
+    0x70757272,
+    0x65632d74,
+    0x00736c6c,
+    0x65746e69,
+    0x70757272,
+    0x6f632d74,
+    0x6f72746e,
+    0x72656c6c,
+    0x61687000,
+    0x656c646e,
+    0x6e617200,
+    0x00736567,
+    0x72727563,
+    0x2d746e65,
+    0x65657073,
+    0x6e690064,
+    0x72726574,
+    0x2d747075,
+    0x65726170,
+    0x6900746e,
+    0x7265746e,
+    0x74707572,
+    0x65720073,
+    0x68732d67,
+    0x00746669,
+    0x2d676572,
+    0x772d6f69,
+    0x68746469,
+    0x636f6c00,
+    0x6d2d6c61,
+    0x612d6361,
+    0x65726464,
+    0x69007373,
+    0x7265746e,
+    0x74707572,
+    0x78652d73,
+    0x646e6574,
+    0x72006465,
+    0x6e2d6765,
+    0x73656d61,
+    0x73697200,
+    0x6d2c7663,
+    0x702d7861,
+    0x726f6972,
+    0x00797469,
+    0x63736972,
+    0x646e2c76,
+    0x00007665,
+    0x00000000
+};
diff --git a/openpiton/bootrom/bootrom.sv b/openpiton/bootrom/bootrom.sv
new file mode 100644
index 0000000000000000000000000000000000000000..6d0fb7843ab1346c39f1115a904ebe5894f45015
--- /dev/null
+++ b/openpiton/bootrom/bootrom.sv
@@ -0,0 +1,285 @@
+/* Copyright 2018 ETH Zurich and University of Bologna.
+ * Copyright and related rights are licensed under the Solderpad Hardware
+ * License, Version 0.51 (the "License"); you may not use this file except in
+ * compliance with the License.  You may obtain a copy of the License at
+ * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+ * or agreed to in writing, software, hardware and materials distributed under
+ * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ * CONDITIONS OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ *
+ * File: $filename.v
+ *
+ * Description: Auto-generated bootrom
+ */
+
+// Auto-generated code
+module bootrom (
+   input  logic         clk_i,
+   input  logic         req_i,
+   input  logic [63:0]  addr_i,
+   output logic [63:0]  rdata_o
+);
+    localparam int RomSize = 246;
+
+    const logic [RomSize-1:0][63:0] mem = {
+        64'h00000000_00007665,
+        64'h646e2c76_63736972,
+        64'h00797469_726f6972,
+        64'h702d7861_6d2c7663,
+        64'h73697200_73656d61,
+        64'h6e2d6765_72006465,
+        64'h646e6574_78652d73,
+        64'h74707572_7265746e,
+        64'h69007373_65726464,
+        64'h612d6361_6d2d6c61,
+        64'h636f6c00_68746469,
+        64'h772d6f69_2d676572,
+        64'h00746669_68732d67,
+        64'h65720073_74707572,
+        64'h7265746e_6900746e,
+        64'h65726170_2d747075,
+        64'h72726574_6e690064,
+        64'h65657073_2d746e65,
+        64'h72727563_00736567,
+        64'h6e617200_656c646e,
+        64'h61687000_72656c6c,
+        64'h6f72746e_6f632d74,
+        64'h70757272_65746e69,
+        64'h00736c6c_65632d74,
+        64'h70757272_65746e69,
+        64'h23007469_6c70732d,
+        64'h626c7400_65707974,
+        64'h2d756d6d_00617369,
+        64'h2c766373_69720073,
+        64'h75746174_73006765,
+        64'h72006570_79745f65,
+        64'h63697665_64007963,
+        64'h6e657571_6572662d,
+        64'h6b636f6c_63007963,
+        64'h6e657571_6572662d,
+        64'h65736162_656d6974,
+        64'h006c6564_6f6d0065,
+        64'h6c626974_61706d6f,
+        64'h6300736c_6c65632d,
+        64'h657a6973_2300736c,
+        64'h6c65632d_73736572,
+        64'h64646123_09000000,
+        64'h02000000_02000000,
+        64'h02000000_01000000,
+        64'ha9000000_04000000,
+        64'h03000000_02000000,
+        64'h3c010000_04000000,
+        64'h03000000_07000000,
+        64'h29010000_04000000,
+        64'h03000000_006c6f72,
+        64'h746e6f63_1f010000,
+        64'h08000000_03000000,
+        64'h00000004_00000000,
+        64'h000003f1_ff000000,
+        64'h5b000000_10000000,
+        64'h03000000_09000000,
+        64'h02000000_0b000000,
+        64'h02000000_0b010000,
+        64'h10000000_03000000,
+        64'h94000000_00000000,
+        64'h03000000_00000030,
+        64'h63696c70_2c766373,
+        64'h69720030_2e302e31,
+        64'h2d63696c_702c6576,
+        64'h69666973_1b000000,
+        64'h1e000000_03000000,
+        64'h01000000_83000000,
+        64'h04000000_03000000,
+        64'h00000000_00000000,
+        64'h04000000_03000000,
+        64'h00303030_30333031,
+        64'h46464640_63696c70,
+        64'h01000000_02000000,
+        64'h006c6f72_746e6f63,
+        64'h1f010000_08000000,
+        64'h03000000_00000100,
+        64'h00000000_000002f1,
+        64'hff000000_5b000000,
+        64'h10000000_03000000,
+        64'h07000000_02000000,
+        64'h03000000_02000000,
+        64'h0b010000_10000000,
+        64'h03000000_00000000,
+        64'h30746e69_6c632c76,
+        64'h63736972_1b000000,
+        64'h0d000000_03000000,
+        64'h00000000_30303030,
+        64'h32303146_46464074,
+        64'h6e696c63_01000000,
+        64'h02000000_006c6f72,
+        64'h746e6f63_1f010000,
+        64'h08000000_03000000,
+        64'h00100000_00000000,
+        64'h000000f1_ff000000,
+        64'h5b000000_10000000,
+        64'h03000000_ffff0000,
+        64'h02000000_0b010000,
+        64'h08000000_03000000,
+        64'h00333130_2d677562,
+        64'h65642c76_63736972,
+        64'h1b000000_10000000,
+        64'h03000000_00303030,
+        64'h30303031_46464640,
+        64'h72656c6c_6f72746e,
+        64'h6f632d67_75626564,
+        64'h01000000_02000000,
+        64'h00001000_00000000,
+        64'h0000d0f0_ff000000,
+        64'h5b000000_10000000,
+        64'h03000000_0000e5e4,
+        64'he3e2e1ee_f9000000,
+        64'h06000000_03000000,
+        64'h00000000_03000000,
+        64'hd7000000_08000000,
+        64'h03000000_01000000,
+        64'hc6000000_04000000,
+        64'h03000000_006b726f,
+        64'h7774656e_4f000000,
+        64'h08000000_03000000,
+        64'h00687465_2d637369,
+        64'h72776f6c_1b000000,
+        64'h0c000000_03000000,
+        64'h00003030_30303044,
+        64'h30464646_40687465,
+        64'h2d637369_72776f6c,
+        64'h01000000_02000000,
+        64'h01000000_ec000000,
+        64'h04000000_03000000,
+        64'h01000000_e2000000,
+        64'h04000000_03000000,
+        64'h01000000_d7000000,
+        64'h04000000_03000000,
+        64'h01000000_c6000000,
+        64'h04000000_03000000,
+        64'h00c20100_b8000000,
+        64'h04000000_03000000,
+        64'hf841f903_3f000000,
+        64'h04000000_03000000,
+        64'h00400d00_00000000,
+        64'h00c0c2f0_ff000000,
+        64'h5b000000_10000000,
+        64'h03000000_00303535,
+        64'h3631736e_1b000000,
+        64'h08000000_03000000,
+        64'h00303030_43324330,
+        64'h46464640_74726175,
+        64'h01000000_b1000000,
+        64'h00000000_03000000,
+        64'h00007375_622d656c,
+        64'h706d6973_00636f73,
+        64'h2d657261_622d656e,
+        64'h61697261_2c687465,
+        64'h1b000000_1f000000,
+        64'h03000000_02000000,
+        64'h0f000000_04000000,
+        64'h03000000_02000000,
+        64'h00000000_04000000,
+        64'h03000000_00636f73,
+        64'h01000000_02000000,
+        64'h00000040_00000000,
+        64'h00000080_00000000,
+        64'h5b000000_10000000,
+        64'h03000000_00007972,
+        64'h6f6d656d_4f000000,
+        64'h07000000_03000000,
+        64'h00303030_30303030,
+        64'h38407972_6f6d656d,
+        64'h01000000_02000000,
+        64'h02000000_02000000,
+        64'h02000000_a9000000,
+        64'h04000000_03000000,
+        64'h00006374_6e692d75,
+        64'h70632c76_63736972,
+        64'h1b000000_0f000000,
+        64'h03000000_94000000,
+        64'h00000000_03000000,
+        64'h01000000_83000000,
+        64'h04000000_03000000,
+        64'h00000000_72656c6c,
+        64'h6f72746e_6f632d74,
+        64'h70757272_65746e69,
+        64'h01000000_79000000,
+        64'h00000000_03000000,
+        64'h00003933_76732c76,
+        64'h63736972_70000000,
+        64'h0b000000_03000000,
+        64'h00007573_63616d69,
+        64'h34367672_66000000,
+        64'h0b000000_03000000,
+        64'h00000076_63736972,
+        64'h00656e61_69726120,
+        64'h2c687465_1b000000,
+        64'h12000000_03000000,
+        64'h00000000_79616b6f,
+        64'h5f000000_05000000,
+        64'h03000000_00000000,
+        64'h5b000000_04000000,
+        64'h03000000_00757063,
+        64'h4f000000_04000000,
+        64'h03000000_f841f903,
+        64'h3f000000_04000000,
+        64'h03000000_00000030,
+        64'h40757063_01000000,
+        64'h83f20700_2c000000,
+        64'h04000000_03000000,
+        64'h00000000_0f000000,
+        64'h04000000_03000000,
+        64'h01000000_00000000,
+        64'h04000000_03000000,
+        64'h00000000_73757063,
+        64'h01000000_00657261,
+        64'h622d656e_61697261,
+        64'h2c687465_26000000,
+        64'h10000000_03000000,
+        64'h00766564_2d657261,
+        64'h622d656e_61697261,
+        64'h2c687465_1b000000,
+        64'h14000000_03000000,
+        64'h02000000_0f000000,
+        64'h04000000_03000000,
+        64'h02000000_00000000,
+        64'h04000000_03000000,
+        64'h00000000_01000000,
+        64'h00000000_00000000,
+        64'h00000000_00000000,
+        64'hac050000_47010000,
+        64'h00000000_10000000,
+        64'h11000000_28000000,
+        64'he4050000_38000000,
+        64'h2b070000_edfe0dd0,
+        64'h00000000_00000000,
+        64'h00000000_00000000,
+        64'h00000000_00000000,
+        64'h00000000_00000000,
+        64'h00000000_00000000,
+        64'h00000000_0000bff5,
+        64'h10500073_03c58593,
+        64'h00000597_f1402573,
+        64'h00000000_00000000,
+        64'h00000000_00000000,
+        64'h00000000_00000000,
+        64'h00000000_00000000,
+        64'h00000000_00000000,
+        64'h00008402_07458593,
+        64'h00000597_f1402573,
+        64'h01f41413_0010041b
+    };
+
+    logic [$clog2(RomSize)-1:0] addr_q;
+
+    always_ff @(posedge clk_i) begin
+        if (req_i) begin
+            addr_q <= addr_i[$clog2(RomSize)-1+3:3];
+        end
+    end
+
+    // this prevents spurious Xes from propagating into
+    // the speculative fetch stage of the core
+    assign rdata_o = (addr_q < RomSize) ? mem[addr_q] : '0;
+endmodule
diff --git a/openpiton/bootrom/encoding.h b/openpiton/bootrom/encoding.h
new file mode 120000
index 0000000000000000000000000000000000000000..a504b0765d6527648f0a36e140de53e205102081
--- /dev/null
+++ b/openpiton/bootrom/encoding.h
@@ -0,0 +1 @@
+../../bootrom/encoding.h
\ No newline at end of file
diff --git a/openpiton/bootrom/gen_rom.py b/openpiton/bootrom/gen_rom.py
new file mode 120000
index 0000000000000000000000000000000000000000..a069b9c78f5cd577fc727ef21e5e91b644195af9
--- /dev/null
+++ b/openpiton/bootrom/gen_rom.py
@@ -0,0 +1 @@
+../../bootrom/gen_rom.py
\ No newline at end of file
diff --git a/openpiton/bootrom/linker.ld b/openpiton/bootrom/linker.ld
new file mode 120000
index 0000000000000000000000000000000000000000..02a497788980f0fd9008af06405a710b5317a39c
--- /dev/null
+++ b/openpiton/bootrom/linker.ld
@@ -0,0 +1 @@
+../../bootrom/linker.ld
\ No newline at end of file
diff --git a/openpiton/serpent_peripherals.sv b/openpiton/serpent_peripherals.sv
index 8cd32aea707579e0d7325687d96ab39722b8a964..a06f123401fe5d964bbb5d02343cc3d487c7fae5 100644
--- a/openpiton/serpent_peripherals.sv
+++ b/openpiton/serpent_peripherals.sv
@@ -94,44 +94,123 @@ module serpent_peripherals #(
   // Debug module and JTAG
   /////////////////////////////
 
-  logic        jtag_req_valid;
-  logic        debug_req_ready;
-  logic        jtag_resp_ready;
-  logic        jtag_resp_valid;
+  logic          debug_req_valid;
+  logic          debug_req_ready;
+  logic          debug_resp_valid;
+  logic          debug_resp_ready;
 
-  dm::dmi_req_t  jtag_dmi_req;
+  dm::dmi_req_t  debug_req;
   dm::dmi_resp_t debug_resp;
+  
+`ifdef RISCV_FESVR_SIM
+
+  initial begin
+    $display("[INFO] instantiating FESVR DTM in simulation.");
+  end
+  
+  // SiFive's SimDTM Module
+  // Converts to DPI calls
+  logic [31:0] sim_exit; // TODO: wire this up in the testbench
+  logic [1:0] debug_req_bits_op;
+  assign dmi_req.op = dm::dtm_op_t'(debug_req_bits_op);
+
+  SimDTM i_SimDTM (
+      .clk                  ( clk_i                ),
+      .reset                ( ~rst_ni              ),
+      .debug_req_valid      ( debug_req_valid      ),
+      .debug_req_ready      ( debug_req_ready      ),
+      .debug_req_bits_addr  ( debug_req.addr       ),
+      .debug_req_bits_op    ( debug_req_bits_op    ),
+      .debug_req_bits_data  ( debug_req.data       ),
+      .debug_resp_valid     ( debug_resp_valid     ),
+      .debug_resp_ready     ( debug_resp_ready       ),
+      .debug_resp_bits_resp ( debug_resp.resp      ),
+      .debug_resp_bits_data ( debug_resp.data      ),
+      .exit                 ( sim_exit             )
+  );
+
+`else // RISCV_FESVR_SIM
+ 
+  logic        tck, tms, trst_n, tdi, tdo, tdo_oe;
 
   dmi_jtag i_dmi_jtag (
-    .clk_i                               ,
-    .rst_ni                              ,
-    .testmode_i                          ,
-    .dmi_req_o        ( jtag_dmi_req    ),
-    .dmi_req_valid_o  ( jtag_req_valid  ),
-    .dmi_req_ready_i  ( debug_req_ready ),
-    .dmi_resp_i       ( debug_resp      ),
-    .dmi_resp_ready_o ( jtag_resp_ready ),
-    .dmi_resp_valid_i ( jtag_resp_valid ),
-    .dmi_rst_no       (                 ), // not connected
-    .tck_i                               ,
-    .tms_i                               ,
-    .trst_ni                             ,
-    .td_i                                ,
-    .td_o                                ,
-    .tdo_oe_o
+    .clk_i                                ,
+    .rst_ni                               ,
+    .testmode_i                           ,
+    .dmi_req_o        ( debug_req        ),
+    .dmi_req_valid_o  ( debug_req_valid  ),
+    .dmi_req_ready_i  ( debug_req_ready  ),
+    .dmi_resp_i       ( debug_resp       ),
+    .dmi_resp_ready_o ( debug_resp_ready ),
+    .dmi_resp_valid_i ( debug_resp_valid ),
+    .dmi_rst_no       (                  ), // not connected
+    .tck_i            ( tck              ),
+    .tms_i            ( tms              ),
+    .trst_ni          ( trst_n           ),
+    .td_i             ( tdi              ),
+    .td_o             ( tdo              ),
+    .tdo_oe_o         ( tdo_oe           )
   );
 
-  ariane_axi::req_t    dm_axi_m_req,  dm_axi_s_req;
-  ariane_axi::resp_t   dm_axi_m_resp, dm_axi_s_resp;
+`ifdef RISCV_JTAG_SIM
+
+  initial begin
+    $display("[INFO] instantiating JTAG DTM in simulation.");
+  end
+ 
+  // SiFive's SimJTAG Module
+  // Converts to DPI calls
+  logic [31:0] sim_exit; // TODO: wire this up in the testbench
+  SimJTAG i_SimJTAG (
+      .clock                ( clk_i                ),
+      .reset                ( ~rst_ni              ),
+      .enable               ( jtag_enable[0]       ),
+      .init_done            ( init_done            ),
+      .jtag_TCK             ( tck                  ),
+      .jtag_TMS             ( tms                  ),
+      .jtag_TDI             ( trst_n               ),
+      .jtag_TRSTn           ( td                   ),
+      .jtag_TDO_data        ( td                   ),
+      .jtag_TDO_driven      ( tdo_oe               ),
+      .exit                 ( sim_exit             )
+  ); 
+
+  assign td_o     = 1'b0  ;
+  assign tdo_oe_o = 1'b0  ;
+
+`else // RISCV_JTAG_SIM
+
+  assign tck      = tck_i   ;
+  assign tms      = tms_i   ;
+  assign trst_n   = trst_ni ;
+  assign tdi      = td_i    ;
+  assign td_o     = tdo     ;
+  assign tdo_oe_o = tdo_oe  ;
+
+`endif // RISCV_JTAG_SIM  
+`endif // RISCV_FESVR_SIM
+
+  logic                dm_slave_req;
+  logic                dm_slave_we;
+  logic [64-1:0]       dm_slave_addr;
+  logic [64/8-1:0]     dm_slave_be;
+  logic [64-1:0]       dm_slave_wdata;
+  logic [64-1:0]       dm_slave_rdata;
+
+  logic                dm_master_req;
+  logic [64-1:0]       dm_master_add;
+  logic                dm_master_we;
+  logic [64-1:0]       dm_master_wdata;
+  logic [64/8-1:0]     dm_master_be;
+  logic                dm_master_gnt;
+  logic                dm_master_r_valid;
+  logic [64-1:0]       dm_master_r_rdata;
 
   // debug module
   dm_top #(
-    // current implementation only supports 1 hart
     .NrHarts              ( NumHarts             ),
-    .AxiIdWidth           ( AxiIdWidth           ),
-    .AxiAddrWidth         ( AxiAddrWidth         ),
-    .AxiDataWidth         ( AxiDataWidth         ),
-    .AxiUserWidth         ( AxiUserWidth         )
+    .BusWidth             ( AxiDataWidth         ),
+    .Selectable_Harts     ( {NumHarts{1'b1}}     )
   ) i_dm_top (
     .clk_i                                        ,
     .rst_ni                                       , // PoR
@@ -140,18 +219,52 @@ module serpent_peripherals #(
     .dmactive_o                                   , // active debug session
     .debug_req_o                                  ,
     .unavailable_i                                ,
-    .axi_s_req_i          ( dm_axi_s_req         ),
-    .axi_s_resp_o         ( dm_axi_s_resp        ),
-    .axi_m_req_o          ( dm_axi_m_req         ),
-    .axi_m_resp_i         ( dm_axi_m_resp        ),
+    .slave_req_i          ( dm_slave_req         ),
+    .slave_we_i           ( dm_slave_we          ),
+    .slave_addr_i         ( dm_slave_addr        ),
+    .slave_be_i           ( dm_slave_be          ),
+    .slave_wdata_i        ( dm_slave_wdata       ),
+    .slave_rdata_o        ( dm_slave_rdata       ),
+    .master_req_o         ( dm_master_req        ),
+    .master_add_o         ( dm_master_add        ),
+    .master_we_o          ( dm_master_we         ),
+    .master_wdata_o       ( dm_master_wdata      ),
+    .master_be_o          ( dm_master_be         ),
+    .master_gnt_i         ( dm_master_gnt        ),
+    .master_r_valid_i     ( dm_master_r_valid    ),
+    .master_r_rdata_i     ( dm_master_r_rdata    ),    
     .dmi_rst_ni           ( rst_ni               ),
-    .dmi_req_valid_i      ( jtag_req_valid       ),
+    .dmi_req_valid_i      ( debug_req_valid      ),
     .dmi_req_ready_o      ( debug_req_ready      ),
-    .dmi_req_i            ( jtag_dmi_req         ),
-    .dmi_resp_valid_o     ( jtag_resp_valid      ),
-    .dmi_resp_ready_i     ( jtag_resp_ready      ),
+    .dmi_req_i            ( debug_req            ),
+    .dmi_resp_valid_o     ( debug_resp_valid     ),
+    .dmi_resp_ready_i     ( debug_resp_ready     ),
     .dmi_resp_o           ( debug_resp           )
   );
+  
+  AXI_BUS #(
+      .AXI_ADDR_WIDTH ( AxiAddrWidth     ),
+      .AXI_DATA_WIDTH ( AxiDataWidth     ),
+      .AXI_ID_WIDTH   ( AxiIdWidth       ),
+      .AXI_USER_WIDTH ( AxiUserWidth     )
+  ) dm_master();
+
+  axi2mem #(
+      .AXI_ID_WIDTH   ( AxiIdWidth   ),
+      .AXI_ADDR_WIDTH ( AxiAddrWidth ),
+      .AXI_DATA_WIDTH ( AxiDataWidth ),
+      .AXI_USER_WIDTH ( AxiUserWidth )
+  ) i_dm_axi2mem (
+      .clk_i      ( clk_i                     ),
+      .rst_ni     ( rst_ni                    ),
+      .slave      ( dm_master                 ),
+      .req_o      ( dm_slave_req              ),
+      .we_o       ( dm_slave_we               ),
+      .addr_o     ( dm_slave_addr             ),
+      .be_o       ( dm_slave_be               ),
+      .data_o     ( dm_slave_wdata            ),
+      .data_i     ( dm_slave_rdata            )
+  );        
 
   noc_axilite_bridge #(
     .SLAVE_RESP_BYTEWIDTH   ( 8             ),
@@ -168,57 +281,89 @@ module serpent_peripherals #(
     .splitter_bridge_rdy    ( buf_ariane_debug_noc3_ready_i ),
     //axi lite signals
     //write address channel
-    .m_axi_awaddr           ( dm_axi_s_req.aw.addr          ),
-    .m_axi_awvalid          ( dm_axi_s_req.aw_valid         ),
-    .m_axi_awready          ( dm_axi_s_resp.aw_ready        ),
+    .m_axi_awaddr           ( dm_master.aw_addr             ),
+    .m_axi_awvalid          ( dm_master.aw_valid            ),
+    .m_axi_awready          ( dm_master.aw_ready            ),
     //write data channel
-    .m_axi_wdata            ( dm_axi_s_req.w.data           ),
-    .m_axi_wstrb            ( dm_axi_s_req.w.strb           ),
-    .m_axi_wvalid           ( dm_axi_s_req.w_valid          ),
-    .m_axi_wready           ( dm_axi_s_resp.w_ready         ),
+    .m_axi_wdata            ( dm_master.w_data              ),
+    .m_axi_wstrb            ( dm_master.w_strb              ),
+    .m_axi_wvalid           ( dm_master.w_valid             ),
+    .m_axi_wready           ( dm_master.w_ready             ),
     //read address channel
-    .m_axi_araddr           ( dm_axi_s_req.ar.addr          ),
-    .m_axi_arvalid          ( dm_axi_s_req.ar_valid         ),
-    .m_axi_arready          ( dm_axi_s_resp.ar_ready        ),
+    .m_axi_araddr           ( dm_master.ar_addr             ),
+    .m_axi_arvalid          ( dm_master.ar_valid            ),
+    .m_axi_arready          ( dm_master.ar_ready            ),
     //read data channel
-    .m_axi_rdata            ( dm_axi_s_resp.r.data          ),
-    .m_axi_rresp            ( dm_axi_s_resp.r.resp          ),
-    .m_axi_rvalid           ( dm_axi_s_resp.r_valid         ),
-    .m_axi_rready           ( dm_axi_s_req.r_ready          ),
+    .m_axi_rdata            ( dm_master.r_data              ),
+    .m_axi_rresp            ( dm_master.r_resp              ),
+    .m_axi_rvalid           ( dm_master.r_valid             ),
+    .m_axi_rready           ( dm_master.r_ready             ),
     //write response channel
-    .m_axi_bresp            ( dm_axi_s_resp.b.resp          ),
-    .m_axi_bvalid           ( dm_axi_s_resp.b_valid         ),
-    .m_axi_bready           ( dm_axi_s_req.b_ready          )
+    .m_axi_bresp            ( dm_master.b_resp              ),
+    .m_axi_bvalid           ( dm_master.b_valid             ),
+    .m_axi_bready           ( dm_master.b_ready             )
   );
 
   // tie off system bus accesses (not supported yet due to
   // missing AXI-lite br_master <-> NOC converter)
-  assign dm_axi_m_resp = '0;
+  assign dm_master_gnt      = '0;
+  assign dm_master_r_valid  = '0;
+  assign dm_master_r_rdata  = '0;
+ 
+  // ariane_axi::req_t    dm_axi_m_req;
+  // ariane_axi::resp_t   dm_axi_m_resp;
+  //
+  // axi_adapter #(
+  //     .DATA_WIDTH            ( AxiDataWidth )
+  // ) i_dm_axi_master (
+  //     .clk_i                 ( clk_i                     ),
+  //     .rst_ni                ( rst_ni                    ),
+  //     .req_i                 ( dm_master_req             ),
+  //     .type_i                ( ariane_axi::SINGLE_REQ    ),
+  //     .gnt_o                 ( dm_master_gnt             ),
+  //     .gnt_id_o              (                           ),
+  //     .addr_i                ( dm_master_add             ),
+  //     .we_i                  ( dm_master_we              ),
+  //     .wdata_i               ( dm_master_wdata           ),
+  //     .be_i                  ( dm_master_be              ),
+  //     .size_i                ( 2'b11                     ), // always do 64bit here and use byte enables to gate
+  //     .id_i                  ( '0                        ),
+  //     .valid_o               ( dm_master_r_valid         ),
+  //     .rdata_o               ( dm_master_r_rdata         ),
+  //     .id_o                  (                           ),
+  //     .critical_word_o       (                           ), 
+  //     .critical_word_valid_o (                           ), 
+  //     .axi_req_o             ( dm_axi_m_req              ),
+  //     .axi_resp_i            ( dm_axi_m_resp             )
+  // );
+
+  // assign dm_axi_m_resp = '0;
 
   // tie off signals not used by AXI-lite
-  assign dm_axi_s_req.aw.id     = '0;
-  assign dm_axi_s_req.aw.len    = '0;
-  assign dm_axi_s_req.aw.size   = 2'b11;// 8byte
-  assign dm_axi_s_req.aw.burst  = '0;
-  assign dm_axi_s_req.aw.lock   = '0;
-  assign dm_axi_s_req.aw.cache  = '0;
-  assign dm_axi_s_req.aw.prot   = '0;
-  assign dm_axi_s_req.aw.qos    = '0;
-  assign dm_axi_s_req.aw.region = '0;
-  assign dm_axi_s_req.aw.atop   = '0;
-  assign dm_axi_s_req.w.last    = 1'b1;
-  assign dm_axi_s_req.ar.id     = '0;
-  assign dm_axi_s_req.ar.len    = '0;
-  assign dm_axi_s_req.ar.size   = 2'b11;// 8byte
-  assign dm_axi_s_req.ar.burst  = '0;
-  assign dm_axi_s_req.ar.lock   = '0;
-  assign dm_axi_s_req.ar.cache  = '0;
-  assign dm_axi_s_req.ar.prot   = '0;
-  assign dm_axi_s_req.ar.qos    = '0;
-  assign dm_axi_s_req.ar.region = '0;
-  // assign dm_axi_s_resp.r.id     = '0;
-  // assign dm_axi_s_resp.r.last   = 1'b1;
-  // assign dm_axi_s_resp.b.id     = '0;
+  assign dm_master.aw_id     = '0;
+  assign dm_master.aw_len    = '0;
+  assign dm_master.aw_size   = 2'b11;// 8byte
+  assign dm_master.aw_burst  = '0;
+  assign dm_master.aw_lock   = '0;
+  assign dm_master.aw_cache  = '0;
+  assign dm_master.aw_prot   = '0;
+  assign dm_master.aw_qos    = '0;
+  assign dm_master.aw_region = '0;
+  assign dm_master.aw_atop   = '0;
+  assign dm_master.w_last    = 1'b1;
+  assign dm_master.ar_id     = '0;
+  assign dm_master.ar_len    = '0;
+  assign dm_master.ar_size   = 2'b11;// 8byte
+  assign dm_master.ar_burst  = '0;
+  assign dm_master.ar_lock   = '0;
+  assign dm_master.ar_cache  = '0;
+  assign dm_master.ar_prot   = '0;
+  assign dm_master.ar_qos    = '0;
+  assign dm_master.ar_region = '0;
+  // assign br_master.r_id      = '0;
+  // assign br_master.r_last    = 1'b1;
+  // assign br_master.b_id      = '0;
+
 
   /////////////////////////////
   // Bootrom
@@ -307,6 +452,7 @@ module serpent_peripherals #(
   assign br_master.aw_prot   = '0;
   assign br_master.aw_qos    = '0;
   assign br_master.aw_region = '0;
+  assign br_master.aw_atop   = '0;
   assign br_master.w_last    = 1'b1;
   assign br_master.ar_id     = '0;
   assign br_master.ar_len    = '0;
@@ -465,6 +611,7 @@ module serpent_peripherals #(
   assign plic_master.aw_prot   = '0;
   assign plic_master.aw_qos    = '0;
   assign plic_master.aw_region = '0;
+  assign plic_master.aw_atop   = '0;
   assign plic_master.w_last    = 1'b1;
   assign plic_master.ar_id     = '0;
   assign plic_master.ar_len    = '0;
diff --git a/src/ariane.sv b/src/ariane.sv
index 1956eec39d787374b62cbaa59340c95933e3b83c..d47624ab6fbccaaa6dd15080d6bdd4d45caed489 100644
--- a/src/ariane.sv
+++ b/src/ariane.sv
@@ -29,6 +29,7 @@ import instruction_tracer_pkg::*;
 
 module ariane #(
   parameter logic [63:0] DmBaseAddress = 64'h0,            // debug module base address
+  parameter int unsigned AxiIdWidth    = 4,
 `ifdef PITON_ARIANE
   parameter bit          SwapEndianess = 0,                // swap endianess in l15 adapter
   parameter logic [63:0] CachedAddrEnd = 64'h80_0000_0000, // end of cached region
@@ -206,6 +207,7 @@ module ariane #(
   logic                     flush_ctrl_if;
   logic                     flush_ctrl_id;
   logic                     flush_ctrl_ex;
+  logic                     flush_ctrl_bp;
   logic                     flush_tlb_ctrl_ex;
   logic                     fence_i_commit_controller;
   logic                     fence_commit_controller;
@@ -244,7 +246,7 @@ module ariane #(
     .DmBaseAddress       ( DmBaseAddress )
   ) i_frontend (
     .flush_i             ( flush_ctrl_if                 ), // not entirely correct
-    .flush_bp_i          ( 1'b0                          ),
+    .flush_bp_i          ( flush_ctrl_bp                 ),
     .debug_mode_i        ( debug_mode                    ),
     .boot_addr_i         ( boot_addr_i                   ),
     .icache_dreq_i       ( icache_dreq_cache_if          ),
@@ -557,6 +559,7 @@ module ariane #(
     .flush_if_o             ( flush_ctrl_if                 ),
     .flush_id_o             ( flush_ctrl_id                 ),
     .flush_ex_o             ( flush_ctrl_ex                 ),
+    .flush_bp_o             ( flush_ctrl_bp                 ),
     .flush_tlb_o            ( flush_tlb_ctrl_ex             ),
     .flush_dcache_o         ( dcache_flush_ctrl_cache       ),
     .flush_dcache_ack_i     ( dcache_flush_ack_cache_ctrl   ),
@@ -585,6 +588,9 @@ module ariane #(
 `ifdef PITON_ARIANE
   // this is a cache subsystem that is compatible with OpenPiton
   serpent_cache_subsystem #(
+`ifdef AXI64_CACHE_PORTS    
+    .AxiIdWidth           ( AxiIdWidth    ),
+`endif    
     .CachedAddrBeg        ( CachedAddrBeg ),
     .CachedAddrEnd        ( CachedAddrEnd ),
     .SwapEndianess        ( SwapEndianess )
diff --git a/src/axi_adapter2.sv b/src/axi_adapter2.sv
index 0441c375183fe786e7a716176fae4b1fc9395935..039a7d519f73835ca633887144cec2da948024eb 100644
--- a/src/axi_adapter2.sv
+++ b/src/axi_adapter2.sv
@@ -8,8 +8,9 @@
  * CONDITIONS OF ANY KIND, either express or implied. See the License for the
  * specific language governing permissions and limitations under the License.
  *
- * File:  axi_adapter.sv
- * Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
+ * File:  axi_adapter2.sv
+ * Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>
+ *         Florian Zaruba <zarubaf@iis.ee.ethz.ch>
  * Date:   1.8.2018
  *
  * Description: Manages communication with the AXI Bus. Note that if you intend
@@ -22,9 +23,8 @@
 import std_cache_pkg::*;
 
 module axi_adapter2 #(
-    parameter int unsigned DATA_WORDS          = 4, // data width in dwords, this is also the maximum burst length, must be >=2
-    parameter int unsigned AXI_ID_WIDTH        = 10,
-    parameter int unsigned CRITICAL_WORD_FIRST = 0  // this must be supported by the AXI subsystem, note that the data will be shifted by the word offset when this is enabled
+    parameter int unsigned AxiNumWords       = 4, // data width in dwords, this is also the maximum burst length, must be >=2
+    parameter int unsigned AxiIdWidth        = 4  // stick to the spec
 ) (
     input  logic                                        clk_i,  // Clock
     input  logic                                        rst_ni, // Asynchronous reset active low
@@ -33,37 +33,37 @@ module axi_adapter2 #(
     input  logic                                        rd_req_i,
     output logic                                        rd_gnt_o,
     input  logic [63:0]                                 rd_addr_i,
-    input  logic [$clog2(DATA_WORDS)-1:0]               rd_blen_i, // axi convention: LEN-1
+    input  logic [$clog2(AxiNumWords)-1:0]              rd_blen_i, // axi convention: LEN-1
     input  logic [1:0]                                  rd_size_i,
-    input  logic [AXI_ID_WIDTH-1:0]                     rd_id_i,   // use same ID for reads, or make sure you only have one outstanding read tx
-    // read response
+    input  logic [AxiIdWidth-1:0]                       rd_id_i,   // use same ID for reads, or make sure you only have one outstanding read tx
+    input  logic                                        rd_lock_i,
+    // read response (we have to unconditionally sink the response)
     input  logic                                        rd_rdy_i,
     output logic                                        rd_valid_o,
-    output logic [DATA_WORDS-1:0][63:0]                 rd_data_o,
-    output logic [AXI_ID_WIDTH-1:0]                     rd_id_o,
-    // can be used to determine critical word
-    output logic [63:0]                                 rd_word_o,
-    output logic                                        rd_word_valid_o,
-    output logic                                        rd_word_cnt_o,
+    output logic [AxiNumWords-1:0][63:0]                rd_data_o,
+    output logic [AxiIdWidth-1:0]                       rd_id_o,
+    output logic                                        rd_exokay_o, // indicates whether exclusive tx succeeded
     // write channel
     input  logic                                        wr_req_i,
     output logic                                        wr_gnt_o,
     input  logic [63:0]                                 wr_addr_i,
-    input  logic [DATA_WORDS-1:0][63:0]                 wr_data_i,
-    input  logic [DATA_WORDS-1:0][7:0]                  wr_be_i,
-    input  logic [$clog2(DATA_WORDS)-1:0]               wr_blen_i, // axi convention: LEN-1
+    input  logic [AxiNumWords-1:0][63:0]                wr_data_i,
+    input  logic [AxiNumWords-1:0][7:0]                 wr_be_i,
+    input  logic [$clog2(AxiNumWords)-1:0]              wr_blen_i, // axi convention: LEN-1
     input  logic [1:0]                                  wr_size_i,
-    input  logic [AXI_ID_WIDTH-1:0]                     wr_id_i,
+    input  logic [AxiIdWidth-1:0]                       wr_id_i,
+    input  logic                                        wr_lock_i,
+    input  logic [5:0]                                  wr_atop_i,
     // write response
     input  logic                                        wr_rdy_i,
     output logic                                        wr_valid_o,
-    output logic [AXI_ID_WIDTH-1:0]                     wr_id_o,
-
+    output logic [AxiIdWidth-1:0]                       wr_id_o,
+    output logic                                        wr_exokay_o, // indicates whether exclusive tx succeeded
     // AXI port
     output ariane_axi::req_t                            axi_req_o,
     input  ariane_axi::resp_t                           axi_resp_i
 );
-    localparam ADDR_INDEX = ($clog2(DATA_WORDS) > 0) ? $clog2(DATA_WORDS) : 1;
+    localparam AddrIndex = ($clog2(AxiNumWords) > 0) ? $clog2(AxiNumWords) : 1;
 
 ///////////////////////////////////////////////////////
 // write channel
@@ -74,9 +74,10 @@ module axi_adapter2 #(
     } wr_state_q, wr_state_d;
 
     // AXI tx counter
-    logic [ADDR_INDEX-1:0] wr_cnt_d, wr_cnt_q;
+    logic [AddrIndex-1:0] wr_cnt_d, wr_cnt_q;
     logic wr_single_req, wr_cnt_done, wr_cnt_clr, wr_cnt_en;
-
+    logic b_push, b_full, b_empty, b_pop, b_exokay;
+    
     assign wr_single_req       = (wr_blen_i == 0);
 
     // address
@@ -87,19 +88,38 @@ module axi_adapter2 #(
     assign axi_req_o.aw.id     = wr_id_i;
     assign axi_req_o.aw.prot   = 3'b0;
     assign axi_req_o.aw.region = 4'b0;
-    assign axi_req_o.aw.lock   = 1'b0;
+    assign axi_req_o.aw.lock   = wr_lock_i;
     assign axi_req_o.aw.cache  = 4'b0;
     assign axi_req_o.aw.qos    = 4'b0;
-    assign axi_req_o.aw.atop   = '0; // currently not used
+    assign axi_req_o.aw.atop   = wr_atop_i; 
     // data
     assign axi_req_o.w.data    = wr_data_i[wr_cnt_q];
     assign axi_req_o.w.strb    = wr_be_i[wr_cnt_q];
     assign axi_req_o.w.last    = wr_cnt_done;
 
-    // response
-    assign axi_req_o.b_ready   = wr_rdy_i;
-    assign wr_valid_o          = axi_resp_i.b_valid;
-    assign wr_id_o             = axi_resp_i.b.id;
+    // write response
+    assign b_exokay            = (axi_resp_i.b.resp == axi_pkg::RESP_EXOKAY);
+    assign axi_req_o.b_ready   = ~b_full;
+    assign b_push              = axi_resp_i.b_valid & axi_req_o.b_ready;
+    assign wr_valid_o          = ~b_empty;
+    assign b_pop               = wr_rdy_i & wr_valid_o;
+    
+    fifo_v3 #(
+      .DATA_WIDTH(AxiIdWidth+1), 
+      .DEPTH(2)
+    ) i_b_fifo (
+      .clk_i      ( clk_i      ),
+      .rst_ni     ( rst_ni     ),
+      .flush_i    ( 1'b0       ),
+      .testmode_i ( 1'b0       ),
+      .full_o     ( b_full     ),
+      .empty_o    ( b_empty    ),
+      .usage_o    (            ),
+      .data_i     ( {b_exokay, axi_resp_i.b.id}  ), 
+      .push_i     ( b_push     ),
+      .data_o     ( {wr_exokay_o, wr_id_o}       ), 
+      .pop_i      ( b_pop      )
+    );
 
     // tx counter
     assign wr_cnt_done         = (wr_cnt_q == wr_blen_i);
@@ -233,11 +253,12 @@ module axi_adapter2 #(
 ///////////////////////////////////////////////////////
 
     // AXI tx counter
-    logic [ADDR_INDEX-1:0] rd_cnt_d, rd_cnt_q;
+    logic [AddrIndex-1:0] rd_cnt_d, rd_cnt_q;
     logic rd_single_req, rd_cnt_clr, rd_cnt_en;
-    logic [DATA_WORDS-1:0][63:0] rd_data_d, rd_data_q;
+    logic [AxiNumWords-1:0][63:0] rd_data_d, rd_data_q;
     logic rd_valid_d, rd_valid_q;
-    logic [AXI_ID_WIDTH-1:0] rd_id_d, rd_id_q;
+    logic [AxiIdWidth-1:0] rd_id_d, rd_id_q;
+    logic rd_exokay_d, rd_exokay_q;
 
     assign rd_single_req       = (rd_blen_i == 0);
 
@@ -245,15 +266,14 @@ module axi_adapter2 #(
     // in case of a single request or wrapping transfer we can simply begin at the address, if we want to request a cache-line
     // with an incremental transfer we need to output the corresponding base address of the cache line
     assign axi_req_o.ar.burst  = (rd_single_req)       ? 2'b00 :
-                                 (CRITICAL_WORD_FIRST) ? 2'b10 :
-                                                         2'b01;  // wrapping transfer in case of a critical word first strategy
+                                                         2'b01;  
     assign axi_req_o.ar.addr   = rd_addr_i;
     assign axi_req_o.ar.size   = rd_size_i;
     assign axi_req_o.ar.len    = rd_blen_i;
     assign axi_req_o.ar.id     = rd_id_i;
     assign axi_req_o.ar.prot   = 3'b0;
     assign axi_req_o.ar.region = 4'b0;
-    assign axi_req_o.ar.lock   = 1'b0;
+    assign axi_req_o.ar.lock   = rd_lock_i;
     assign axi_req_o.ar.cache  = 4'b0;
     assign axi_req_o.ar.qos    = 4'b0;
 
@@ -269,22 +289,20 @@ module axi_adapter2 #(
     assign rd_cnt_clr          = axi_resp_i.r.last;
     assign rd_valid_d          = axi_resp_i.r_valid & axi_resp_i.r.last;
     assign rd_valid_o          = rd_valid_q;
+    assign rd_exokay_d         = (axi_resp_i.r.resp == axi_pkg::RESP_EXOKAY);
+    assign rd_exokay_o         = rd_exokay_q;        
 
     assign rd_id_d             = axi_resp_i.r.id;
     assign rd_id_o             = rd_id_q;
     assign rd_data_o           = rd_data_q;
-    // used to determine critical word
-    assign rd_word_o           = axi_resp_i.r.data;
-    assign rd_word_valid_o     = axi_resp_i.r_valid;
-    assign rd_word_cnt_o       = rd_cnt_q;
-
+    
     // tx counter
     assign rd_cnt_d            = (rd_cnt_clr) ? '0         :
                                  (rd_cnt_en)  ? rd_cnt_q+1 :
                                                 rd_cnt_q;
 
     generate
-        for(genvar k=0; k<DATA_WORDS; k++) begin : g_rd_data
+        for(genvar k=0; k<AxiNumWords; k++) begin : g_rd_data
             assign rd_data_d[k] = (rd_cnt_q==k && rd_cnt_en) ? axi_resp_i.r.data : rd_data_q[k];
         end
     endgenerate
@@ -301,6 +319,7 @@ module axi_adapter2 #(
             rd_data_q     <= '0;
             rd_valid_q    <= '0;
             rd_id_q       <= '0;
+            rd_exokay_q   <= '0;
         end else begin
             wr_state_q    <= wr_state_d;
             wr_cnt_q      <= wr_cnt_d;
@@ -308,6 +327,7 @@ module axi_adapter2 #(
             rd_data_q     <= rd_data_d;
             rd_valid_q    <= rd_valid_d;
             rd_id_q       <= rd_id_d;
+            rd_exokay_q   <= rd_exokay_d;
         end
     end
 
@@ -318,8 +338,8 @@ module axi_adapter2 #(
 //pragma translate_off
 `ifndef VERILATOR
    initial begin
-      assert (DATA_WORDS >= 1) else
-         $fatal(1,"[axi adapter] DATA_WORDS must be >= 1");
+      assert (AxiNumWords >= 1) else
+         $fatal(1,"[axi adapter] AxiNumWords must be >= 1");
    end
 `endif
 //pragma translate_on
diff --git a/src/axi_riscv_atomics b/src/axi_riscv_atomics
index 90729df26593ee7b6eb4f55eb2ad665b0f8d1356..7628ba65d935e03e86f0bd2eb30e8116d4f3e48f 160000
--- a/src/axi_riscv_atomics
+++ b/src/axi_riscv_atomics
@@ -1 +1 @@
-Subproject commit 90729df26593ee7b6eb4f55eb2ad665b0f8d1356
+Subproject commit 7628ba65d935e03e86f0bd2eb30e8116d4f3e48f
diff --git a/src/cache_subsystem/serpent_axi_adapter.sv b/src/cache_subsystem/serpent_axi_adapter.sv
new file mode 100644
index 0000000000000000000000000000000000000000..03915f7e99df3df1953c0fb404545a6f0ce43239
--- /dev/null
+++ b/src/cache_subsystem/serpent_axi_adapter.sv
@@ -0,0 +1,417 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Michael Schaffner <schaffner@iis.ee.ethz.ch>, ETH Zurich
+// Date: 08.08.2018
+// Description: adapter module to connect the L1D$ and L1I$ to a 64bit AXI bus.
+//
+
+import ariane_pkg::*;
+import serpent_cache_pkg::*;
+
+module serpent_axi_adapter #(
+  parameter int unsigned AxiIdWidth = 10
+) (
+   input logic                  clk_i,
+   input logic                  rst_ni,
+
+   // icache
+   input  logic                 icache_data_req_i,
+   output logic                 icache_data_ack_o,
+   input  icache_req_t          icache_data_i,
+   // returning packets must be consumed immediately
+   output logic                 icache_rtrn_vld_o,
+   output icache_rtrn_t         icache_rtrn_o,
+
+   // dcache
+   input  logic                 dcache_data_req_i,
+   output logic                 dcache_data_ack_o,
+   input  dcache_req_t          dcache_data_i,
+   // returning packets must be consumed immediately
+   output logic                 dcache_rtrn_vld_o,
+   output dcache_rtrn_t         dcache_rtrn_o,
+
+    // AXI port
+    output ariane_axi::req_t    axi_req_o,
+    input  ariane_axi::resp_t   axi_resp_i
+);
+
+// support up to 512bit cache lines
+localparam AxiNumWords = ariane_pkg::ICACHE_LINE_WIDTH/64;
+
+///////////////////////////////////////////////////////
+// request path 
+///////////////////////////////////////////////////////
+
+icache_req_t icache_data;
+logic icache_data_full, icache_data_empty;
+dcache_req_t dcache_data;
+logic dcache_data_full, dcache_data_empty;
+
+logic [1:0] arb_req, arb_ack;
+logic       arb_idx;
+
+typedef enum logic [1:0] {IFILL, LRSC, ATOP, STD} tx_t;
+tx_t tmp_type;
+
+logic rd_pending_d, rd_pending_q;
+logic axi_rd_req, axi_rd_gnt;
+logic axi_wr_req, axi_wr_gnt;
+logic axi_wr_valid, axi_rd_valid, axi_rd_rdy, axi_wr_rdy;
+logic axi_rd_lock, axi_wr_lock, axi_rd_exokay, axi_wr_exokay;
+logic [63:0]                    axi_rd_addr, axi_wr_addr;
+logic [$clog2(AxiNumWords)-1:0] axi_rd_blen, axi_wr_blen;
+logic [1:0] axi_rd_size, axi_wr_size;
+logic [AxiIdWidth-1:0] axi_rd_id_in, axi_wr_id_in, axi_rd_id_out, axi_wr_id_out;
+logic [AxiNumWords-1:0][63:0] axi_rd_data, axi_wr_data;
+logic [AxiNumWords-1:0][7:0]  axi_wr_be;
+logic [5:0] axi_wr_atop;
+logic invalidate;
+logic [2:0] amo_off_d, amo_off_q;
+
+assign icache_data_ack_o  = icache_data_req_i & ~icache_data_full;
+assign dcache_data_ack_o  = dcache_data_req_i & ~dcache_data_full;
+
+// arbiter
+assign arb_req           = {~dcache_data_empty, ~icache_data_empty};
+
+rrarbiter #(
+  .NUM_REQ(2),
+  .LOCK_IN(1)
+) i_rrarbiter (
+  .clk_i  ( clk_i                   ),
+  .rst_ni ( rst_ni                  ),
+  .flush_i( '0                      ),
+  .en_i   ( axi_rd_gnt | axi_wr_gnt ),
+  .req_i  ( arb_req                 ),
+  .ack_o  ( arb_ack                 ),
+  .vld_o  (                         ),
+  .idx_o  ( arb_idx                 )
+);
+
+always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
+  if(~rst_ni) begin
+    rd_pending_q <= '0;
+    amo_off_q    <= '0;
+  end else begin
+    rd_pending_q <= rd_pending_d;
+    amo_off_q    <= amo_off_d;
+  end
+end
+
+// request side
+always_comb begin : p_axi_req
+  axi_wr_data  = dcache_data.data;
+  axi_wr_addr  = dcache_data.paddr;
+  axi_wr_size  = dcache_data.size[1:0];
+  axi_wr_req   = 1'b0;
+  axi_wr_blen  = '0;// single word writes
+  axi_wr_be    = '0;
+  axi_wr_lock  = '0;
+  axi_wr_atop  = '0;
+
+  axi_rd_req   = 1'b0;
+  axi_rd_blen  = '0;
+  axi_rd_lock  = '0;
+
+  tmp_type     = STD;
+  invalidate   = 1'b0;
+
+  // currently we only keep one pending read transaction due 
+  // to the deserialization mechanism (only one buffer for one ID available)
+  rd_pending_d = (axi_rd_valid) ? '0 : rd_pending_q;
+  
+  // decode message type
+  if (|arb_req) begin
+    if (arb_idx == 0) begin
+      //////////////////////////////////////
+      // IMISS  
+      axi_rd_req   = !rd_pending_q;
+      rd_pending_d = (axi_rd_valid) ? '0 : rd_pending_q | axi_rd_gnt;
+  
+      tmp_type     = IFILL;
+      if (~icache_data.nc) begin
+        axi_rd_blen = ariane_pkg::ICACHE_LINE_WIDTH/64-1;
+      end  
+      //////////////////////////////////////
+    end else begin  
+      unique case (dcache_data.rtype)
+        //////////////////////////////////////
+        serpent_cache_pkg::DCACHE_LOAD_REQ: begin
+          axi_rd_req   = !rd_pending_q;
+          rd_pending_d = (axi_rd_valid) ? '0 : rd_pending_q | axi_rd_gnt;
+  
+          if (dcache_data.size[2]) axi_rd_blen = ariane_pkg::DCACHE_LINE_WIDTH/64-1;
+        end
+        //////////////////////////////////////
+        serpent_cache_pkg::DCACHE_STORE_REQ: begin
+          axi_wr_req   = 1'b1;
+          axi_wr_be    = serpent_cache_pkg::toByteEnable8(dcache_data.paddr[2:0], dcache_data.size[1:0]);
+        end
+        //////////////////////////////////////
+        serpent_cache_pkg::DCACHE_ATOMIC_REQ: begin
+          // default  
+          // push back an invalidation here.
+          // since we only keep one read tx in flight, and since
+          // the dcache drains all writes/reads before executing 
+          // an atomic, this is safe.
+          invalidate   = !rd_pending_q;
+          axi_wr_req   = !rd_pending_q;
+          rd_pending_d = (axi_rd_valid) ? '0 : rd_pending_q | axi_wr_gnt;
+  
+          tmp_type     = ATOP; 
+          axi_wr_be    = serpent_cache_pkg::toByteEnable8(dcache_data.paddr[2:0], dcache_data.size[1:0]);
+          unique case (dcache_data.amo_op)
+            AMO_LR: begin
+              axi_rd_lock  = 1'b1;
+              axi_rd_req   = !rd_pending_q;
+              rd_pending_d = (axi_rd_valid) ? '0 : rd_pending_q | axi_rd_gnt;
+              tmp_type     = LRSC; 
+              // tie to zero in this special case
+              axi_wr_req   = 1'b0;
+              axi_wr_be    = '0;
+            end
+            AMO_SC: begin
+              axi_wr_lock  = 1'b1;
+              tmp_type     = LRSC;
+              rd_pending_d = (axi_rd_valid) ? '0 : rd_pending_q;
+              // needed to properly encode success
+              unique case (dcache_data.size[1:0])
+                2'b00: amo_off_d    = dcache_data.paddr[2:0];
+                2'b01: amo_off_d    = {dcache_data.paddr[2:1], 1'b0};
+                2'b10: amo_off_d    = {dcache_data.paddr[2],   2'b00};
+                2'b11: amo_off_d    = '0;
+              endcase    
+            end  
+            // RISC-V atops have a load semantic
+            AMO_SWAP: axi_wr_atop  = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ATOMICSWAP};
+            AMO_ADD:  axi_wr_atop  = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD};
+            AMO_AND:  begin 
+              // in this case we need to invert the data to get a "CLR" 
+              axi_wr_data  = ~dcache_data.data;
+              axi_wr_atop  = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR};
+            end  
+            AMO_OR:   axi_wr_atop  = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET};
+            AMO_XOR:  axi_wr_atop  = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_EOR};
+            AMO_MAX:  axi_wr_atop  = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMAX};
+            AMO_MAXU: axi_wr_atop  = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMAX};
+            AMO_MIN:  axi_wr_atop  = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SMIN}; 
+            AMO_MINU: axi_wr_atop  = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_UMIN}; 
+          endcase  
+        end
+      //////////////////////////////////////
+      endcase
+    end 
+    
+
+    axi_wr_id_in = {dcache_data.tid, tmp_type};
+
+    // arbiter mux
+    if (arb_idx) begin
+      axi_rd_addr  = dcache_data.paddr;
+      axi_rd_size  = dcache_data.size[1:0];
+      axi_rd_id_in = {dcache_data.tid, tmp_type};
+    end else begin
+      axi_rd_addr  = icache_data.paddr;
+      axi_rd_size  = 2'b11;// always request 64bit words in case of ifill
+      axi_rd_id_in = {icache_data.tid, tmp_type};
+    end 
+  end  
+end  
+
+fifo_v2 #(
+     .dtype       (  icache_req_t            ),
+     .DEPTH       (  ADAPTER_REQ_FIFO_DEPTH  )
+) i_icache_data_fifo (
+     .clk_i       (  clk_i                   ),
+     .rst_ni      (  rst_ni                  ),
+     .flush_i     (  1'b0                    ),
+     .testmode_i  (  1'b0                    ),
+     .full_o      (  icache_data_full        ),
+     .empty_o     (  icache_data_empty       ),
+     .alm_full_o  (                          ),
+     .alm_empty_o (                          ),
+     .data_i      (  icache_data_i           ),
+     .push_i      (  icache_data_ack_o       ),
+     .data_o      (  icache_data             ),
+     .pop_i       (  arb_ack[0]              )
+);
+
+fifo_v2 #(
+     .dtype       (  dcache_req_t            ),
+     .DEPTH       (  ADAPTER_REQ_FIFO_DEPTH  )
+) i_dcache_data_fifo (
+     .clk_i       (  clk_i                   ),
+     .rst_ni      (  rst_ni                  ),
+     .flush_i     (  1'b0                    ),
+     .testmode_i  (  1'b0                    ),
+     .full_o      (  dcache_data_full        ),
+     .empty_o     (  dcache_data_empty       ),
+     .alm_full_o  (                          ),
+     .alm_empty_o (                          ),
+     .data_i      (  dcache_data_i           ),
+     .push_i      (  dcache_data_ack_o       ),
+     .data_o      (  dcache_data             ),
+     .pop_i       (  arb_ack[1]              )
+);
+
+///////////////////////////////////////////////////////
+// return path from L15
+///////////////////////////////////////////////////////
+
+always_comb begin : p_axi_rtrn
+  dcache_rtrn_o              = '0;
+  icache_rtrn_o              = '0;  
+  icache_rtrn_vld_o          = 1'b0;
+  dcache_rtrn_vld_o          = 1'b0;
+  icache_rtrn_o.data         = axi_rd_data;
+  dcache_rtrn_o.data         = axi_rd_data;
+  
+  // we are always ready to consume packets unconditionally,
+  // but we give prio to read responses below
+  axi_rd_rdy                 = 1'b1;
+  axi_wr_rdy                 = 1'b1;
+  
+  //////////////////////////////////////
+  // this is safe, there is no other read tx in flight than this atomic.
+  // note that this self invalidation is handled in this way due to the 
+  // write-through cache architecture, which is aligned with the openpiton 
+  // cache subsystem.
+  if (invalidate) begin
+      axi_wr_rdy             = 1'b0;
+      icache_rtrn_vld_o      = 1'b1;
+      dcache_rtrn_vld_o      = 1'b1;
+      icache_rtrn_o.rtype    = serpent_cache_pkg::ICACHE_INV_REQ;
+      dcache_rtrn_o.rtype    = serpent_cache_pkg::DCACHE_INV_REQ;
+      icache_rtrn_o.inv.all  = 1'b1;
+      dcache_rtrn_o.inv.all  = 1'b1;
+      icache_rtrn_o.inv.idx  = dcache_data.paddr[ariane_pkg::ICACHE_INDEX_WIDTH]; 
+      dcache_rtrn_o.inv.idx  = dcache_data.paddr[ariane_pkg::DCACHE_INDEX_WIDTH]; 
+  //////////////////////////////////////
+  end else if (axi_rd_valid) begin
+    // we give prio to read responses
+    axi_wr_rdy                 = 1'b0;
+    unique case(tx_t'(axi_rd_id_out[1:0]))
+      STD:   begin
+        dcache_rtrn_vld_o      = 1'b1;
+        dcache_rtrn_o.rtype    = serpent_cache_pkg::DCACHE_LOAD_ACK;
+        dcache_rtrn_o.tid      = axi_rd_id_out>>2;
+      end  
+      LRSC:  begin
+        dcache_rtrn_vld_o      = 1'b1;
+        dcache_rtrn_o.rtype    = serpent_cache_pkg::DCACHE_ATOMIC_ACK;
+        dcache_rtrn_o.tid      = axi_rd_id_out>>2;
+      end  
+      ATOP:  begin
+        dcache_rtrn_vld_o      = 1'b1;
+        dcache_rtrn_o.rtype    = serpent_cache_pkg::DCACHE_ATOMIC_ACK;
+        dcache_rtrn_o.tid      = axi_rd_id_out>>2;
+      end  
+      IFILL: begin 
+        icache_rtrn_vld_o      = 1'b1;
+        icache_rtrn_o.rtype    = serpent_cache_pkg::ICACHE_IFILL_ACK;
+        icache_rtrn_o.tid      = axi_rd_id_out>>2;
+      end  
+    endcase
+  //////////////////////////////////////  
+  end else if (axi_wr_valid) begin
+    dcache_rtrn_vld_o  = 1'b1; 
+    dcache_rtrn_o.tid  = axi_wr_id_out>>2;
+    unique case(tx_t'(axi_wr_id_out[1:0]))
+      STD:   dcache_rtrn_o.rtype = serpent_cache_pkg::DCACHE_STORE_ACK;
+      ATOP:  dcache_rtrn_vld_o   = 1'b0; // silently drop atop write response, as we only rely on the read response here
+      LRSC:  begin 
+        dcache_rtrn_o.rtype = serpent_cache_pkg::DCACHE_ATOMIC_ACK;
+        // encode success 
+        dcache_rtrn_o.data  = '0;
+        dcache_rtrn_o.data[amo_off_q*8] = (axi_wr_exokay) ? '0 : 1'b1;
+      end
+      default: dcache_rtrn_vld_o   = 1'b0;
+    endcase
+  end
+  //////////////////////////////////////
+end
+
+
+// remote invalidations are not supported yet (this needs a cache coherence protocol)
+// note that the atomic transactions would also need a "master exclusive monitor" in that case
+// assign icache_rtrn_o.inv.idx  = '0;
+// assign icache_rtrn_o.inv.way  = '0;
+// assign icache_rtrn_o.inv.vld  = '0;
+// assign icache_rtrn_o.inv.all  = '0;
+
+// assign dcache_rtrn_o.inv.idx  = '0;
+// assign dcache_rtrn_o.inv.way  = '0;
+// assign dcache_rtrn_o.inv.vld  = '0;
+// assign dcache_rtrn_o.inv.all  = '0;
+
+
+
+///////////////////////////////////////////////////////
+// axi adapter
+///////////////////////////////////////////////////////
+
+axi_adapter2 #(
+  .AxiNumWords     ( AxiNumWords     ),
+  .AxiIdWidth      ( AxiIdWidth      )
+) i_axi_adapter (
+  .clk_i           ( clk_i             ),
+  .rst_ni          ( rst_ni            ),
+  .rd_req_i        ( axi_rd_req        ),
+  .rd_gnt_o        ( axi_rd_gnt        ),
+  .rd_addr_i       ( axi_rd_addr       ),
+  .rd_blen_i       ( axi_rd_blen       ),
+  .rd_size_i       ( axi_rd_size       ),
+  .rd_id_i         ( axi_rd_id_in      ),
+  .rd_rdy_i        ( axi_rd_rdy        ),
+  .rd_lock_i       ( axi_rd_lock       ),
+  .rd_valid_o      ( axi_rd_valid      ),
+  .rd_data_o       ( axi_rd_data       ),
+  .rd_id_o         ( axi_rd_id_out     ),
+  .rd_exokay_o     ( axi_rd_exokay     ),    
+  .wr_req_i        ( axi_wr_req        ),
+  .wr_gnt_o        ( axi_wr_gnt        ),
+  .wr_addr_i       ( axi_wr_addr       ),
+  .wr_data_i       ( axi_wr_data       ),
+  .wr_be_i         ( axi_wr_be         ),
+  .wr_blen_i       ( axi_wr_blen       ),
+  .wr_size_i       ( axi_wr_size       ),
+  .wr_id_i         ( axi_wr_id_in      ),
+  .wr_lock_i       ( axi_wr_lock       ),
+  .wr_atop_i       ( axi_wr_atop       ),
+  .wr_rdy_i        ( axi_wr_rdy        ),
+  .wr_valid_o      ( axi_wr_valid      ),
+  .wr_id_o         ( axi_wr_id_out     ),
+  .wr_exokay_o     ( axi_wr_exokay     ),
+  .axi_req_o       ( axi_req_o         ),
+  .axi_resp_i      ( axi_resp_i        )
+);
+
+///////////////////////////////////////////////////////
+// assertions
+///////////////////////////////////////////////////////
+
+//pragma translate_off
+`ifndef VERILATOR
+  initial begin
+    assert (AxiIdWidth >= $clog2(serpent_cache_pkg::DCACHE_MAX_TX)+2) else
+      $fatal(1,$psprintf("[axi adapter] AXI ID must be at least %01d bit wide", $clog2(serpent_cache_pkg::DCACHE_MAX_TX)+2));
+    assert (ariane_pkg::ICACHE_LINE_WIDTH <= ariane_pkg::DCACHE_LINE_WIDTH) else 
+      $fatal(1,"[axi adapter] AXI shim currently assumes that the icache line size >= dcache line size");
+  end
+
+  lr_exokay: assert property (
+  @(posedge clk_i) disable iff (~rst_ni) axi_rd_valid |-> axi_rd_rdy |-> tx_t'(axi_rd_id_out[1:0]) == LRSC |-> axi_rd_exokay)
+    else $warning("[axi adapter] LR did not receive an exokay, indicating that atomics are not supported");
+  
+`endif
+//pragma translate_on
+
+endmodule // serpent_l15_adapter
\ No newline at end of file
diff --git a/src/cache_subsystem/serpent_cache_subsystem.sv b/src/cache_subsystem/serpent_cache_subsystem.sv
index 0505ecd431b12838b17bd0cc84577bd8274d9656..89ec1bc57fd94d4ba7a3f52b2005a639e7b1649c 100644
--- a/src/cache_subsystem/serpent_cache_subsystem.sv
+++ b/src/cache_subsystem/serpent_cache_subsystem.sv
@@ -76,12 +76,6 @@ logic dcache_adapter_data_req, adapter_dcache_data_ack, adapter_dcache_rtrn_vld;
 serpent_cache_pkg::dcache_req_t  dcache_adapter;
 serpent_cache_pkg::dcache_rtrn_t adapter_dcache;
 
-`ifdef AXI64_CACHE_PORTS
-// used for local plumbing in this case
-l15_req_t                       l15_req;
-l15_rtrn_t                      l15_rtrn;
-`endif
-
 serpent_icache #(
 `ifdef AXI64_CACHE_PORTS
     .Axi64BitCompliant  ( 1'b1          ),
@@ -113,6 +107,9 @@ serpent_icache #(
 // they have equal prio and are RR arbited
 // Port 2 is write only and goes into the merging write buffer
 serpent_dcache #(
+`ifdef AXI64_CACHE_PORTS
+    .Axi64BitCompliant  ( 1'b1          ),
+`endif
     // use ID 1 for dcache reads and amos. note that the writebuffer
     // uses all IDs up to DCACHE_MAX_TX-1 for write transactions.
     .RdAmoTxId       ( 1             ),
@@ -138,199 +135,75 @@ serpent_dcache #(
   );
 
 
-// arbiter/adapter
-serpent_l15_adapter #(
-    .SwapEndianess   ( SwapEndianess ),
-    .CachedAddrBeg   ( CachedAddrBeg ),
-    .CachedAddrEnd   ( CachedAddrEnd )
-  ) i_adapter (
-    .clk_i              ( clk_i                   ),
-    .rst_ni             ( rst_ni                  ),
-    .icache_data_req_i  ( icache_adapter_data_req ),
-    .icache_data_ack_o  ( adapter_icache_data_ack ),
-    .icache_data_i      ( icache_adapter          ),
-    .icache_rtrn_vld_o  ( adapter_icache_rtrn_vld ),
-    .icache_rtrn_o      ( adapter_icache          ),
-    .dcache_data_req_i  ( dcache_adapter_data_req ),
-    .dcache_data_ack_o  ( adapter_dcache_data_ack ),
-    .dcache_data_i      ( dcache_adapter          ),
-    .dcache_rtrn_vld_o  ( adapter_dcache_rtrn_vld ),
-    .dcache_rtrn_o      ( adapter_dcache          ),
-`ifdef AXI64_CACHE_PORTS
-    .l15_req_o          ( l15_req                 ),
-    .l15_rtrn_i         ( l15_rtrn                )
-`else
-    .l15_req_o          ( l15_req_o               ),
-    .l15_rtrn_i         ( l15_rtrn_i              )
-`endif
-  );
-
 ///////////////////////////////////////////////////////
-// different memory plumbing to allow for using the
-// serpent cache subsystem in a standard AXI setting
-// for verificaton purposes.
+// memory plumbing, either use 64bit AXI port or native
+// L15 cache interface (derived from OpenSPARC CCX).
 ///////////////////////////////////////////////////////
 
 `ifdef AXI64_CACHE_PORTS
-
-// support up to 512bit cache lines
-localparam AxiNumWords = 8;
-
-logic axi_rd_req, axi_rd_gnt;
-logic [63:0]                    axi_rd_addr, axi_wr_addr;
-logic [$clog2(AxiNumWords)-1:0] axi_rd_blen, axi_wr_blen;
-logic [1:0] axi_rd_size, axi_wr_size;
-logic [AxiIdWidth-1:0] axi_rd_id_in, axi_wr_id_in, axi_rd_id_out, axi_wr_id_out;
-logic axi_rd_valid;
-logic [AxiNumWords-1:0][63:0] axi_rd_data, axi_wr_data;
-logic [AxiNumWords-1:0][7:0] axi_wr_be;
-logic axi_wr_req, axi_wr_gnt;
-logic axi_wr_valid, axi_rd_rdy, axi_wr_rdy;
-
-logic ifill;
-logic [serpent_cache_pkg::L15_TID_WIDTH+2-1:0] id_tmp;
-logic rd_pending_d, rd_pending_q;
-
-// request side
-assign ifill = (l15_req.l15_rqtype==serpent_cache_pkg::L15_IMISS_RQ);
-
-assign axi_rd_req = l15_req.l15_val && (l15_req.l15_rqtype==serpent_cache_pkg::L15_LOAD_RQ | ifill) && !rd_pending_q;
-assign axi_wr_req = l15_req.l15_val && (l15_req.l15_rqtype==serpent_cache_pkg::L15_STORE_RQ);
-
-assign axi_rd_addr = l15_req.l15_address;
-assign axi_wr_addr = axi_rd_addr;
-
-// the axi interconnect does not correctly handle the ordering of read responses.
-// workaround: only allow for one outstanding TX. need to improve this.
-assign rd_pending_d = (axi_rd_valid ) ? '0 : rd_pending_q | axi_rd_gnt;
-
-assign axi_rd_id_in = {l15_req.l15_threadid, ifill, l15_req.l15_nc};
-assign axi_wr_id_in = axi_rd_id_in;
-
-assign axi_rd_size = (ifill) ? 2'b11 : l15_req.l15_size[1:0];// always request 64bit words in case of ifill
-assign axi_wr_size = l15_req.l15_size[1:0];
-
-assign axi_rd_blen = (l15_req.l15_size[2]) ? ((ifill) ? ariane_pkg::ICACHE_LINE_WIDTH/64-1  :
-                                                        ariane_pkg::DCACHE_LINE_WIDTH/64-1) : '0;
-assign axi_wr_blen = '0;// single word writes
-
-assign axi_wr_data = l15_req.l15_data;
-assign axi_wr_be   = (axi_wr_req) ? serpent_cache_pkg::toByteEnable8(axi_wr_addr[2:0], axi_wr_size) : '0;
-
-
-// return path
-always_comb begin : p_axi_rtrn
-  // default
-  l15_rtrn                   = '0;
-
-  // from request path
-  l15_rtrn.l15_ack           = axi_rd_gnt | axi_wr_gnt;
-  l15_rtrn.l15_header_ack    = axi_rd_gnt | axi_wr_gnt;
-
-  // we are always ready to consume packets unconditionally,
-  // but in case of returning reads, we have to stall the write response
-  axi_rd_rdy                 = 1'b1;
-  axi_wr_rdy                 = ~axi_rd_valid;// this vld signal comes directly from a register
-
-  // unconditionally consume packets
-  l15_rtrn.l15_val           = axi_rd_valid | axi_wr_valid;
-
-  // encode packet type
-  id_tmp                     = (axi_rd_valid) ? axi_rd_id_out : axi_wr_id_out;
-  l15_rtrn.l15_returntype    = (axi_rd_valid && id_tmp[1]) ? L15_IFILL_RET :
-  (axi_rd_valid)              ? L15_LOAD_RET  :
-  L15_ST_ACK;
-
-  // decode id and set flags accordingly
-  l15_rtrn.l15_noncacheable  = id_tmp[0];
-  l15_rtrn.l15_threadid      = id_tmp>>2;
-  // 4B non-cacheable ifill
-  l15_rtrn.l15_f4b           = id_tmp[0] & id_tmp[1] & axi_rd_valid;
-
-  l15_rtrn.l15_data_0        = axi_rd_data[0];
-  l15_rtrn.l15_data_1        = axi_rd_data[1];
-  l15_rtrn.l15_data_2        = axi_rd_data[2];
-  l15_rtrn.l15_data_3        = axi_rd_data[3];
-end
-
-always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
-  if(~rst_ni) begin
-    rd_pending_q <= '0;
-  end else begin
-    rd_pending_q <= rd_pending_d;
-  end
-end
-
-
-axi_adapter2 #(
-  .DATA_WORDS      ( AxiNumWords     ),
-  .AXI_ID_WIDTH    ( AxiIdWidth      )
-) i_axi_adapter (
-  .clk_i           ( clk_i             ),
-  .rst_ni          ( rst_ni            ),
-  .rd_req_i        ( axi_rd_req        ),
-  .rd_gnt_o        ( axi_rd_gnt        ),
-  .rd_addr_i       ( axi_rd_addr       ),
-  .rd_blen_i       ( axi_rd_blen       ),
-  .rd_size_i       ( axi_rd_size       ),
-  .rd_id_i         ( axi_rd_id_in      ),
-  .rd_rdy_i        ( axi_rd_rdy        ),
-  .rd_valid_o      ( axi_rd_valid      ),
-  .rd_data_o       ( axi_rd_data       ),
-  .rd_id_o         ( axi_rd_id_out     ),
-  .rd_word_o       (                   ),
-  .rd_word_valid_o (                   ),
-  .rd_word_cnt_o   (                   ),
-  .wr_req_i        ( axi_wr_req        ),
-  .wr_gnt_o        ( axi_wr_gnt        ),
-  .wr_addr_i       ( axi_wr_addr       ),
-  .wr_data_i       ( axi_wr_data       ),
-  .wr_be_i         ( axi_wr_be         ),
-  .wr_blen_i       ( axi_wr_blen       ),
-  .wr_size_i       ( axi_wr_size       ),
-  .wr_id_i         ( axi_wr_id_in      ),
-  .wr_rdy_i        ( axi_wr_rdy        ),
-  .wr_valid_o      ( axi_wr_valid      ),
-  .wr_id_o         ( axi_wr_id_out     ),
-  .axi_req_o       ( axi_req_o         ),
-  .axi_resp_i      ( axi_resp_i        )
-);
-
+  serpent_axi_adapter #(
+      .AxiIdWidth   ( AxiIdWidth )
+    ) i_adapter (
+      .clk_i              ( clk_i                   ),
+      .rst_ni             ( rst_ni                  ),
+      .icache_data_req_i  ( icache_adapter_data_req ),
+      .icache_data_ack_o  ( adapter_icache_data_ack ),
+      .icache_data_i      ( icache_adapter          ),
+      .icache_rtrn_vld_o  ( adapter_icache_rtrn_vld ),
+      .icache_rtrn_o      ( adapter_icache          ),
+      .dcache_data_req_i  ( dcache_adapter_data_req ),
+      .dcache_data_ack_o  ( adapter_dcache_data_ack ),
+      .dcache_data_i      ( dcache_adapter          ),
+      .dcache_rtrn_vld_o  ( adapter_dcache_rtrn_vld ),
+      .dcache_rtrn_o      ( adapter_dcache          ),
+      .axi_req_o          ( axi_req_o               ),
+      .axi_resp_i         ( axi_resp_i              )
+    );
+`else
+  serpent_l15_adapter #(
+      .SwapEndianess   ( SwapEndianess )
+    ) i_adapter (
+      .clk_i              ( clk_i                   ),
+      .rst_ni             ( rst_ni                  ),
+      .icache_data_req_i  ( icache_adapter_data_req ),
+      .icache_data_ack_o  ( adapter_icache_data_ack ),
+      .icache_data_i      ( icache_adapter          ),
+      .icache_rtrn_vld_o  ( adapter_icache_rtrn_vld ),
+      .icache_rtrn_o      ( adapter_icache          ),
+      .dcache_data_req_i  ( dcache_adapter_data_req ),
+      .dcache_data_ack_o  ( adapter_dcache_data_ack ),
+      .dcache_data_i      ( dcache_adapter          ),
+      .dcache_rtrn_vld_o  ( adapter_dcache_rtrn_vld ),
+      .dcache_rtrn_o      ( adapter_dcache          ),
+      .l15_req_o          ( l15_req_o               ),
+      .l15_rtrn_i         ( l15_rtrn_i              )
+    );
 `endif
 
-
 ///////////////////////////////////////////////////////
 // assertions
 ///////////////////////////////////////////////////////
 
 //pragma translate_off
 `ifndef VERILATOR
-
-`ifdef AXI64_CACHE_PORTS
-  initial begin
-    assert (AxiIdWidth >= $clog2(serpent_cache_pkg::DCACHE_MAX_TX)+2) else
-      $fatal(1,$psprintf("[l1 cache] AXI ID must be at least %01d bit wide", $clog2(serpent_cache_pkg::DCACHE_MAX_TX)+2));
+  a_invalid_instruction_fetch: assert property (
+    @(posedge clk_i) disable iff (~rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX)
+  else $warning(1,"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
+    icache_dreq_o.vaddr, icache_dreq_o.data);
+
+  a_invalid_write_data: assert property (
+    @(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_i[2].data_req |-> |dcache_req_ports_i[2].data_be |-> (|dcache_req_ports_i[2].data_wdata) !== 1'hX)
+  else $warning(1,"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X",
+    {dcache_req_ports_i[2].address_tag, dcache_req_ports_i[2].address_index}, dcache_req_ports_i[2].data_be, dcache_req_ports_i[2].data_wdata);
+
+
+  for(genvar j=0; j<2; j++) begin : g_assertion
+    a_invalid_read_data: assert property (
+      @(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_o[j].data_rvalid |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX)
+    else $warning(1,"[l1 dcache] reading invalid data on port %01d: data=%016X",
+      j, dcache_req_ports_o[j].data_rdata);
   end
 `endif
-
-a_invalid_instruction_fetch: assert property (
-  @(posedge clk_i) disable iff (~rst_ni) icache_dreq_o.valid |-> (|icache_dreq_o.data) !== 1'hX)
-else $warning(1,"[l1 dcache] reading invalid instructions: vaddr=%08X, data=%08X",
-  icache_dreq_o.vaddr, icache_dreq_o.data);
-
-a_invalid_write_data: assert property (
-  @(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_i[2].data_req |-> |dcache_req_ports_i[2].data_be |-> (|dcache_req_ports_i[2].data_wdata) !== 1'hX)
-else $warning(1,"[l1 dcache] writing invalid data: paddr=%016X, be=%02X, data=%016X",
-  {dcache_req_ports_i[2].address_tag, dcache_req_ports_i[2].address_index}, dcache_req_ports_i[2].data_be, dcache_req_ports_i[2].data_wdata);
-
-
-for(genvar j=0; j<2; j++) begin : g_assertion
-  a_invalid_read_data: assert property (
-    @(posedge clk_i) disable iff (~rst_ni) dcache_req_ports_o[j].data_rvalid |-> (|dcache_req_ports_o[j].data_rdata) !== 1'hX)
-  else $warning(1,"[l1 dcache] reading invalid data on port %01d: data=%016X",
-    j, dcache_req_ports_o[j].data_rdata);
-end
-`endif
 //pragma translate_on
 
 
diff --git a/src/cache_subsystem/serpent_dcache.sv b/src/cache_subsystem/serpent_dcache.sv
index c1c8c40458cc99ac64cdbb1d1a474b0e2045ed3d..5bea217fb927c1bcf3edb0c9e730870e23d5dab8 100644
--- a/src/cache_subsystem/serpent_dcache.sv
+++ b/src/cache_subsystem/serpent_dcache.sv
@@ -16,11 +16,12 @@ import ariane_pkg::*;
 import serpent_cache_pkg::*;
 
 module serpent_dcache #(
+    parameter bit                         Axi64BitCompliant  = 1'b0,             // set this to 1 when using in conjunction with 64bit AXI bus adapter
     // ID to be used for read and AMO transactions.
     // note that the write buffer uses all IDs up to DCACHE_MAX_TX-1 for write transactions
-    parameter logic [DCACHE_ID_WIDTH-1:0] RdAmoTxId     = 1,
-    parameter logic [63:0]                CachedAddrBeg = 64'h00_8000_0000, // begin of cached region
-    parameter logic [63:0]                CachedAddrEnd = 64'h80_0000_0000  // end of cached region
+    parameter logic [CACHE_ID_WIDTH-1:0]  RdAmoTxId          = 1,
+    parameter logic [63:0]                CachedAddrBeg      = 64'h00_8000_0000, // begin of cached region
+    parameter logic [63:0]                CachedAddrEnd      = 64'h80_0000_0000  // end of cached region
 ) (
     input  logic                           clk_i,       // Clock
     input  logic                           rst_ni,      // Asynchronous reset active low
@@ -79,10 +80,10 @@ module serpent_dcache #(
     logic [NumPorts-1:0][63:0]                    miss_paddr;
     logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0]    miss_vld_bits;
     logic [NumPorts-1:0][2:0]                     miss_size;
-    logic [NumPorts-1:0][DCACHE_ID_WIDTH-1:0]     miss_id;
+    logic [NumPorts-1:0][CACHE_ID_WIDTH-1:0]      miss_id;
     logic [NumPorts-1:0]                          miss_replay;
     logic [NumPorts-1:0]                          miss_rtrn_vld;
-    logic [DCACHE_ID_WIDTH-1:0]                   miss_rtrn_id;
+    logic [CACHE_ID_WIDTH-1:0]                    miss_rtrn_id;
 
     // memory <-> read controllers/miss unit
     logic [NumPorts-1:0]                          rd_prio;
@@ -109,8 +110,9 @@ module serpent_dcache #(
 ///////////////////////////////////////////////////////
 
     serpent_dcache_missunit #(
-        .AmoTxId  ( RdAmoTxId ),
-        .NumPorts ( NumPorts  )
+        .Axi64BitCompliant ( Axi64BitCompliant ),
+        .AmoTxId           ( RdAmoTxId         ),
+        .NumPorts          ( NumPorts          )
     ) i_serpent_dcache_missunit (
         .clk_i              ( clk_i              ),
         .rst_ni             ( rst_ni             ),
@@ -170,8 +172,8 @@ module serpent_dcache #(
         serpent_dcache_ctrl #(
                 .RdTxId        ( RdAmoTxId     ),
                 .CachedAddrBeg ( CachedAddrBeg ),
-                .CachedAddrEnd ( CachedAddrEnd ))
-            i_serpent_dcache_ctrl (
+                .CachedAddrEnd ( CachedAddrEnd )
+        ) i_serpent_dcache_ctrl (
                 .clk_i           ( clk_i             ),
                 .rst_ni          ( rst_ni            ),
                 .cache_en_i      ( cache_en          ),
@@ -215,8 +217,8 @@ module serpent_dcache #(
 
     serpent_dcache_wbuffer #(
             .CachedAddrBeg ( CachedAddrBeg ),
-            .CachedAddrEnd ( CachedAddrEnd ))
-        i_serpent_dcache_wbuffer (
+            .CachedAddrEnd ( CachedAddrEnd )
+    ) i_serpent_dcache_wbuffer (
             .clk_i           ( clk_i               ),
             .rst_ni          ( rst_ni              ),
             .empty_o         ( wbuffer_empty_o     ),
@@ -268,9 +270,10 @@ module serpent_dcache #(
 // memory arrays, arbitration and tag comparison
 ///////////////////////////////////////////////////////
 
-   serpent_dcache_mem #(
-            .NumPorts(NumPorts)
-        ) i_serpent_dcache_mem (
+    serpent_dcache_mem #(
+            .Axi64BitCompliant ( Axi64BitCompliant ),
+            .NumPorts          ( NumPorts          )
+    ) i_serpent_dcache_mem (
             .clk_i             ( clk_i              ),
             .rst_ni            ( rst_ni             ),
             // read ports
diff --git a/src/cache_subsystem/serpent_dcache_ctrl.sv b/src/cache_subsystem/serpent_dcache_ctrl.sv
index 5d4cf9a54c332b8f02ac55e9d79cb6732be53f34..c23058e3e0b0dd4ed8315a6df97449f94882fa70 100644
--- a/src/cache_subsystem/serpent_dcache_ctrl.sv
+++ b/src/cache_subsystem/serpent_dcache_ctrl.sv
@@ -17,7 +17,7 @@ import ariane_pkg::*;
 import serpent_cache_pkg::*;
 
 module serpent_dcache_ctrl #(
-    parameter logic [DCACHE_ID_WIDTH-1:0] RdTxId        = 1,                // ID to use for read transactions
+    parameter logic [CACHE_ID_WIDTH-1:0]  RdTxId        = 1,                // ID to use for read transactions
     parameter logic [63:0]                CachedAddrBeg = 64'h00_8000_0000, // begin of cached region
     parameter logic [63:0]                CachedAddrEnd = 64'h80_0000_0000  // end of cached region
 ) (
@@ -36,7 +36,7 @@ module serpent_dcache_ctrl #(
     output logic [63:0]                     miss_paddr_o,
     output logic                            miss_nc_o,       // request to I/O space
     output logic [2:0]                      miss_size_o,     // 00: 1byte, 01: 2byte, 10: 4byte, 11: 8byte, 111: cacheline
-    output logic [DCACHE_ID_WIDTH-1:0]      miss_id_o,       // set to constant ID
+    output logic [CACHE_ID_WIDTH-1:0]       miss_id_o,       // set to constant ID
     input  logic                            miss_replay_i,   // request collided with pending miss - have to replay the request
     input  logic                            miss_rtrn_vld_i, // signals that the miss has been served, asserted in the same cycle as when the data returns from memory
     // used to detect readout mux collisions
diff --git a/src/cache_subsystem/serpent_dcache_mem.sv b/src/cache_subsystem/serpent_dcache_mem.sv
index 968617d4698176f3909d6aba63d1e01435a67964..5d11f425c8f2bd369ec45059b0af204916ebceb3 100644
--- a/src/cache_subsystem/serpent_dcache_mem.sv
+++ b/src/cache_subsystem/serpent_dcache_mem.sv
@@ -14,12 +14,12 @@
 //
 //
 // Notes: 1) all ports can trigger a readout of all ways, and the way where the tag hits is selected
-//  
+//
 //        2) only port0 can write full cache lines. higher ports are read only. also, port0 can only read the tag array,
 //           and does not trigger a cache line readout.
 //
 //        3) the single word write port is a separate port without access to the tag memory.
-//           these single word writes can interleave with read operations if they go to different 
+//           these single word writes can interleave with read operations if they go to different
 //           cacheline offsets, since each word offset is placed into a different SRAM bank.
 //
 //        4) Read ports with same priority are RR arbited. but high prio ports (rd_prio_i[port_nr] = '1b1) will stall
@@ -29,45 +29,46 @@ import ariane_pkg::*;
 import serpent_cache_pkg::*;
 
 module serpent_dcache_mem #(
-        parameter int unsigned NumPorts     = 3
-    )(
-        input  logic                                              clk_i,
-        input  logic                                              rst_ni,
-        
-        // ports
-        input  logic  [NumPorts-1:0][DCACHE_TAG_WIDTH-1:0]        rd_tag_i,           // tag in - comes one cycle later
-        input  logic  [NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0]     rd_idx_i,     
-        input  logic  [NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0]     rd_off_i,     
-        input  logic  [NumPorts-1:0]                              rd_req_i,           // read the word at offset off_i[:3] in all ways
-        input  logic  [NumPorts-1:0]                              rd_tag_only_i,      // only do a tag/valid lookup, no access to data arrays
-        input  logic  [NumPorts-1:0]                              rd_prio_i,          // 0: low prio, 1: high prio
-        output logic  [NumPorts-1:0]                              rd_ack_o,     
-        output logic                [DCACHE_SET_ASSOC-1:0]        rd_vld_bits_o,
-        output logic                [DCACHE_SET_ASSOC-1:0]        rd_hit_oh_o,
-        output logic                [63:0]                        rd_data_o,
-        
-        // only available on port 0, uses address signals of port 0    
-        input  logic                                              wr_cl_vld_i,
-        input  logic                                              wr_cl_nc_i,         // noncacheable access
-        input  logic                [DCACHE_SET_ASSOC-1:0]        wr_cl_we_i,         // writes a full cacheline 
-        input  logic                [DCACHE_TAG_WIDTH-1:0]        wr_cl_tag_i,
-        input  logic                [DCACHE_CL_IDX_WIDTH-1:0]     wr_cl_idx_i,
-        input  logic                [DCACHE_OFFSET_WIDTH-1:0]     wr_cl_off_i,
-        input  logic                [DCACHE_LINE_WIDTH-1:0]       wr_cl_data_i, 
-        input  logic                [DCACHE_LINE_WIDTH/8-1:0]     wr_cl_data_be_i, 
-        input  logic                [DCACHE_SET_ASSOC-1:0]        wr_vld_bits_i,      
-        
-        // separate port for single word write, no tag access
-        input  logic                [DCACHE_SET_ASSOC-1:0]        wr_req_i,           // write a single word to offset off_i[:3] 
-        output logic                                              wr_ack_o,
-        input  logic                [DCACHE_CL_IDX_WIDTH-1:0]     wr_idx_i,     
-        input  logic                [DCACHE_OFFSET_WIDTH-1:0]     wr_off_i,     
-        input  logic                [63:0]                        wr_data_i,      
-        input  logic                [7:0]                         wr_data_be_i,
-
-        // forwarded wbuffer
-        input wbuffer_t             [DCACHE_WBUF_DEPTH-1:0]       wbuffer_data_i
-    );
+    parameter bit          Axi64BitCompliant  = 1'b0, // set this to 1 when using in conjunction with 64bit AXI bus adapter
+    parameter int unsigned NumPorts           = 3
+) (
+    input  logic                                              clk_i,
+    input  logic                                              rst_ni,
+
+    // ports
+    input  logic  [NumPorts-1:0][DCACHE_TAG_WIDTH-1:0]        rd_tag_i,           // tag in - comes one cycle later
+    input  logic  [NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0]     rd_idx_i,
+    input  logic  [NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0]     rd_off_i,
+    input  logic  [NumPorts-1:0]                              rd_req_i,           // read the word at offset off_i[:3] in all ways
+    input  logic  [NumPorts-1:0]                              rd_tag_only_i,      // only do a tag/valid lookup, no access to data arrays
+    input  logic  [NumPorts-1:0]                              rd_prio_i,          // 0: low prio, 1: high prio
+    output logic  [NumPorts-1:0]                              rd_ack_o,
+    output logic                [DCACHE_SET_ASSOC-1:0]        rd_vld_bits_o,
+    output logic                [DCACHE_SET_ASSOC-1:0]        rd_hit_oh_o,
+    output logic                [63:0]                        rd_data_o,
+
+    // only available on port 0, uses address signals of port 0
+    input  logic                                              wr_cl_vld_i,
+    input  logic                                              wr_cl_nc_i,         // noncacheable access
+    input  logic                [DCACHE_SET_ASSOC-1:0]        wr_cl_we_i,         // writes a full cacheline
+    input  logic                [DCACHE_TAG_WIDTH-1:0]        wr_cl_tag_i,
+    input  logic                [DCACHE_CL_IDX_WIDTH-1:0]     wr_cl_idx_i,
+    input  logic                [DCACHE_OFFSET_WIDTH-1:0]     wr_cl_off_i,
+    input  logic                [DCACHE_LINE_WIDTH-1:0]       wr_cl_data_i,
+    input  logic                [DCACHE_LINE_WIDTH/8-1:0]     wr_cl_data_be_i,
+    input  logic                [DCACHE_SET_ASSOC-1:0]        wr_vld_bits_i,
+
+    // separate port for single word write, no tag access
+    input  logic                [DCACHE_SET_ASSOC-1:0]        wr_req_i,           // write a single word to offset off_i[:3]
+    output logic                                              wr_ack_o,
+    input  logic                [DCACHE_CL_IDX_WIDTH-1:0]     wr_idx_i,
+    input  logic                [DCACHE_OFFSET_WIDTH-1:0]     wr_off_i,
+    input  logic                [63:0]                        wr_data_i,
+    input  logic                [7:0]                         wr_data_be_i,
+
+    // forwarded wbuffer
+    input wbuffer_t             [DCACHE_WBUF_DEPTH-1:0]       wbuffer_data_i
+);
 
 
     logic [DCACHE_NUM_BANKS-1:0]                                  bank_req;
@@ -76,25 +77,25 @@ module serpent_dcache_mem #(
     logic [DCACHE_NUM_BANKS-1:0][DCACHE_CL_IDX_WIDTH-1:0]         bank_idx;
     logic [DCACHE_CL_IDX_WIDTH-1:0]                               bank_idx_d, bank_idx_q;
     logic [DCACHE_OFFSET_WIDTH-1:0]                               bank_off_d, bank_off_q;
-     
-    logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][63:0]      bank_wdata;                   // 
-    logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][63:0]      bank_rdata;                   // 
+
+    logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][63:0]      bank_wdata;                   //
+    logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][63:0]      bank_rdata;                   //
     logic [DCACHE_SET_ASSOC-1:0][63:0]                            rdata_cl;                     // selected word from each cacheline
- 
+
     logic [DCACHE_TAG_WIDTH-1:0]                                  rd_tag;
     logic [DCACHE_SET_ASSOC-1:0]                                  vld_req;                      // bit enable for valid regs
     logic                                                         vld_we;                       // valid bits write enable
     logic [DCACHE_SET_ASSOC-1:0]                                  vld_wdata;                    // valid bits to write
     logic [DCACHE_SET_ASSOC-1:0][DCACHE_TAG_WIDTH-1:0]            tag_rdata;                    // these are the tags coming from the tagmem
-    logic                       [DCACHE_CL_IDX_WIDTH-1:0]         vld_addr;                     // valid bit 
-    
+    logic                       [DCACHE_CL_IDX_WIDTH-1:0]         vld_addr;                     // valid bit
+
     logic [$clog2(NumPorts)-1:0]                                  vld_sel_d, vld_sel_q;
-    
+
     logic [DCACHE_WBUF_DEPTH-1:0]                                 wbuffer_hit_oh;
     logic [7:0]                                                   wbuffer_be;
     logic [63:0]                                                  wbuffer_rdata, rdata;
     logic [63:0]                                                  wbuffer_cmp_addr;
-    
+
     logic                                                         cmp_en_d, cmp_en_q;
     logic                                                         rd_acked;
     logic [NumPorts-1:0]                                          bank_collision, rd_req_masked, rd_req_prio;
@@ -107,19 +108,19 @@ module serpent_dcache_mem #(
     //
     // SRAM bank mapping:
     //
-    // Bank 0                   Bank 2  
-    // [way0, w0] [way1, w0] .. [way0, w1] [way1, w1] .. 
+    // Bank 0                   Bank 2
+    // [way0, w0] [way1, w0] .. [way0, w1] [way1, w1] ..
 
     // byte enable mapping
-    generate 
+    generate
         for (genvar k=0;k<DCACHE_NUM_BANKS;k++) begin : g_bank
             for (genvar j=0;j<DCACHE_SET_ASSOC;j++) begin : g_bank_way
-                assign bank_be[k][j]   = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_data_be_i[k*8 +: 8] : 
-                                         (wr_req_i[j]   & wr_ack_o)    ? wr_data_be_i              : 
-                                                                         '0;
-                
-                assign bank_wdata[k][j] = (wr_cl_vld_i) ?  wr_cl_data_i[k*64 +: 64] :
-                                                           wr_data_i;    
+                assign bank_be[k][j]   = (wr_cl_we_i[j] & wr_cl_vld_i)  ? wr_cl_data_be_i[k*8 +: 8] :
+                                         (wr_req_i[j]   & wr_ack_o)     ? wr_data_be_i              :
+                                                                          '0;
+
+                assign bank_wdata[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ?  wr_cl_data_i[k*64 +: 64] :
+                                                                           wr_data_i;
             end
         end
     endgenerate
@@ -129,14 +130,14 @@ module serpent_dcache_mem #(
     assign rd_tag     = rd_tag_i[vld_sel_q]; //delayed by one cycle
     assign bank_off_d = (wr_cl_vld_i) ? wr_cl_off_i   : rd_off_i[vld_sel_d];
     assign bank_idx_d = (wr_cl_vld_i) ? wr_cl_idx_i   : rd_idx_i[vld_sel_d];
-    assign vld_req    = (wr_cl_vld_i) ? wr_cl_we_i    : (rd_acked) ? '1 : '0;  
+    assign vld_req    = (wr_cl_vld_i) ? wr_cl_we_i    : (rd_acked) ? '1 : '0;
 
 
     // priority masking
     // disable low prio requests when any of the high prio reqs is present
     assign rd_req_prio   = rd_req_i & rd_prio_i;
     assign rd_req_masked = (|rd_req_prio) ? rd_req_prio : rd_req_i;
-    
+
     // read port arbiter
     rrarbiter #(
         .NUM_REQ(NumPorts)
@@ -150,9 +151,9 @@ module serpent_dcache_mem #(
         .vld_o  ( rd_acked      ),
         .idx_o  ( vld_sel_d     )
     );
-    
+
     always_comb begin : p_bank_req
-        
+
         vld_we   = wr_cl_vld_i;
         bank_req = '0;
         wr_ack_o = '0;
@@ -161,7 +162,7 @@ module serpent_dcache_mem #(
 
         for(int k=0; k<NumPorts; k++) begin
             bank_collision[k] = rd_off_i[k][DCACHE_OFFSET_WIDTH-1:3] == wr_off_i[DCACHE_OFFSET_WIDTH-1:3];
-        end    
+        end
 
         if(wr_cl_vld_i & |wr_cl_we_i) begin
             bank_req = '1;
@@ -171,24 +172,24 @@ module serpent_dcache_mem #(
             if(rd_acked) begin
                 if(~rd_tag_only_i[vld_sel_d]) begin
                     bank_req                                               = dcache_cl_bin2oh(rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:3]);
-                    bank_idx[rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:3]] = rd_idx_i[vld_sel_d]; 
+                    bank_idx[rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:3]] = rd_idx_i[vld_sel_d];
                 end
-            end        
-        
+            end
+
             if(|wr_req_i) begin
                 if(rd_tag_only_i[vld_sel_d] | ~(rd_ack_o[vld_sel_d] & bank_collision[vld_sel_d])) begin
                     wr_ack_o = 1'b1;
                     bank_req |= dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:3]);
                     bank_we   = dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:3]);
                 end
-            end  
-        end        
+            end
+        end
     end
 
 ///////////////////////////////////////////////////////
 // tag comparison, hit generatio, readoud muxes
 ///////////////////////////////////////////////////////
-    
+
     logic [DCACHE_OFFSET_WIDTH-1:0]       wr_cl_off;
     logic [$clog2(DCACHE_WBUF_DEPTH)-1:0] wbuffer_hit_idx;
     logic [$clog2(DCACHE_SET_ASSOC)-1:0]  rd_hit_idx;
@@ -196,10 +197,10 @@ module serpent_dcache_mem #(
     assign cmp_en_d = (|vld_req) & ~vld_we;
 
     // word tag comparison in write buffer
-    assign wbuffer_cmp_addr = (wr_cl_vld_i) ? {wr_cl_tag_i, wr_cl_idx_i, wr_cl_off_i} : 
+    assign wbuffer_cmp_addr = (wr_cl_vld_i) ? {wr_cl_tag_i, wr_cl_idx_i, wr_cl_off_i} :
                                               {rd_tag, bank_idx_q, bank_off_q};
     // hit generation
-    generate 
+    generate
         for (genvar i=0;i<DCACHE_SET_ASSOC;i++) begin : g_tag_cmpsel
             // tag comparison of ways >0
             assign rd_hit_oh_o[i] = (rd_tag == tag_rdata[i]) & rd_vld_bits_o[i]  & cmp_en_q;
@@ -209,7 +210,7 @@ module serpent_dcache_mem #(
 
         for(genvar k=0; k<DCACHE_WBUF_DEPTH; k++) begin : g_wbuffer_hit
             assign wbuffer_hit_oh[k] = (|wbuffer_data_i[k].valid) & (wbuffer_data_i[k].wtag == (wbuffer_cmp_addr >> 3));
-        end    
+        end
     endgenerate
 
 
@@ -220,7 +221,7 @@ module serpent_dcache_mem #(
         .cnt_o   ( wbuffer_hit_idx  ),
         .empty_o (                  )
     );
-    
+
     lzc #(
         .WIDTH ( DCACHE_SET_ASSOC )
     ) i_lzc_rd_hit (
@@ -229,21 +230,26 @@ module serpent_dcache_mem #(
         .empty_o (              )
     );
 
-    assign wbuffer_rdata = wbuffer_data_i[wbuffer_hit_idx].data; 
+    assign wbuffer_rdata = wbuffer_data_i[wbuffer_hit_idx].data;
     assign wbuffer_be    = (|wbuffer_hit_oh) ? wbuffer_data_i[wbuffer_hit_idx].valid : '0;
-    
-    assign wr_cl_off     = (wr_cl_nc_i)     ? '0 : wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:3];
-    assign rdata         = (wr_cl_vld_i)    ? wr_cl_data_i[wr_cl_off*64 +: 64] :
-                                              rdata_cl[rd_hit_idx];
+
+    if (Axi64BitCompliant) begin
+        assign wr_cl_off     = wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:3];
+    end else begin  
+        assign wr_cl_off     = (wr_cl_nc_i) ? '0 : wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:3];
+    end
+      
+    assign rdata         = (wr_cl_vld_i)  ? wr_cl_data_i[wr_cl_off*64 +: 64] :
+                                            rdata_cl[rd_hit_idx];
 
     // overlay bytes that hit in the write buffer
-    generate  
+    generate
         for(genvar k=0; k<8; k++) begin : g_rd_data
             assign rd_data_o[8*k +: 8] = (wbuffer_be[k]) ? wbuffer_rdata[8*k +: 8] : rdata[8*k +: 8];
         end
-    endgenerate    
+    endgenerate
+
 
-    
 
 ///////////////////////////////////////////////////////
 // memory arrays and regs
@@ -272,8 +278,8 @@ module serpent_dcache_mem #(
         for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : g_tag_srams
 
             assign tag_rdata[i]     = vld_tag_rdata[i][DCACHE_TAG_WIDTH-1:0];
-            assign rd_vld_bits_o[i] = vld_tag_rdata[i][DCACHE_TAG_WIDTH];    
-        
+            assign rd_vld_bits_o[i] = vld_tag_rdata[i][DCACHE_TAG_WIDTH];
+
             // Tag RAM
             sram #(
                 // tag + valid bit
@@ -290,7 +296,7 @@ module serpent_dcache_mem #(
                 .rdata_o   ( vld_tag_rdata[i]    )
             );
         end
-    endgenerate    
+    endgenerate
 
 
     always_ff @(posedge clk_i or negedge rst_ni) begin : p_regs
@@ -317,20 +323,20 @@ module serpent_dcache_mem #(
 `ifndef VERILATOR
 
     hit_hot1: assert property (
-        @(posedge clk_i) disable iff (~rst_ni) &vld_req |-> ~vld_we |=> $onehot0(rd_hit_oh_o))     
+        @(posedge clk_i) disable iff (~rst_ni) &vld_req |-> ~vld_we |=> $onehot0(rd_hit_oh_o))
             else $fatal(1,"[l1 dcache] rd_hit_oh_o signal must be hot1");
 
     word_write_hot1: assert property (
-        @(posedge clk_i) disable iff (~rst_ni) wr_ack_o |-> $onehot0(wr_req_i))     
+        @(posedge clk_i) disable iff (~rst_ni) wr_ack_o |-> $onehot0(wr_req_i))
             else $fatal(1,"[l1 dcache] wr_req_i signal must be hot1");
 
     wbuffer_hit_hot1: assert property (
-        @(posedge clk_i) disable iff (~rst_ni) &vld_req |-> ~vld_we |=> $onehot0(wbuffer_hit_oh))     
+        @(posedge clk_i) disable iff (~rst_ni) &vld_req |-> ~vld_we |=> $onehot0(wbuffer_hit_oh))
             else $fatal(1,"[l1 dcache] wbuffer_hit_oh signal must be hot1");
 
     // this is only used for verification!
-    logic                                    vld_mirror[serpent_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0];        
-    logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_mirror[serpent_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0];        
+    logic                                    vld_mirror[serpent_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0];
+    logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_mirror[serpent_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0];
     logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] tag_write_duplicate_test;
 
     always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror
@@ -342,19 +348,19 @@ module serpent_dcache_mem #(
                 if(vld_req[i] & vld_we) begin
                     vld_mirror[vld_addr][i] <= vld_wdata[i];
                     tag_mirror[vld_addr][i] <= wr_cl_tag_i;
-                end 
-            end       
+                end
+            end
         end
     end
 
     generate
         for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin
             assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == wr_cl_tag_i) & vld_mirror[vld_addr][i] & (|vld_wdata);
-        end 
+        end
     endgenerate
 
     tag_write_duplicate: assert property (
-        @(posedge clk_i) disable iff (~rst_ni) |vld_req |-> vld_we |-> ~(|tag_write_duplicate_test))     
+        @(posedge clk_i) disable iff (~rst_ni) |vld_req |-> vld_we |-> ~(|tag_write_duplicate_test))
             else $fatal(1,"[l1 dcache] cannot allocate a CL that is already present in the cache");
 
     // logic tst;
@@ -362,7 +368,7 @@ module serpent_dcache_mem #(
     //     tst = tag == 44'h13;
     //     // for (int k=0; k<DCACHE_SET_ASSOC;k++) begin
     //     //     tst |= tag_rdata[k] == 44'h96;
-    //     // end    
+    //     // end
     //     tst &= bank_idx_d == 64'h0C;
     //     tst &= |wr_cl_we_i;
     // end
diff --git a/src/cache_subsystem/serpent_dcache_missunit.sv b/src/cache_subsystem/serpent_dcache_missunit.sv
index a85d1564f17fdecbe0107aba2a5dd7e692d3756a..a6174bf83449549c5ebc89dee9538d2477816bf4 100644
--- a/src/cache_subsystem/serpent_dcache_missunit.sv
+++ b/src/cache_subsystem/serpent_dcache_missunit.sv
@@ -18,9 +18,10 @@ import ariane_pkg::*;
 import serpent_cache_pkg::*;
 
 module serpent_dcache_missunit #(
-    parameter logic [DCACHE_ID_WIDTH-1:0] AmoTxId  = 1, // TX id to be used for AMOs
-    parameter int unsigned                NumPorts = 3  // number of miss ports
- ) (
+    parameter bit                         Axi64BitCompliant  = 1'b0, // set this to 1 when using in conjunction with 64bit AXI bus adapter
+    parameter logic [CACHE_ID_WIDTH-1:0]  AmoTxId            = 1,    // TX id to be used for AMOs
+    parameter int unsigned                NumPorts           = 3     // number of miss ports
+) (
     input  logic                                       clk_i,       // Clock
     input  logic                                       rst_ni,      // Asynchronous reset active low
     // cache management, signals from/to core
@@ -43,12 +44,12 @@ module serpent_dcache_missunit #(
     input  logic [NumPorts-1:0][63:0]                  miss_paddr_i,
     input  logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0]  miss_vld_bits_i,
     input  logic [NumPorts-1:0][2:0]                   miss_size_i,
-    input  logic [NumPorts-1:0][DCACHE_ID_WIDTH-1:0]   miss_id_i,          // used as transaction ID
+    input  logic [NumPorts-1:0][CACHE_ID_WIDTH-1:0]    miss_id_i,          // used as transaction ID
     // signals that the request collided with a pending read
     output logic [NumPorts-1:0]                        miss_replay_o,
     // signals response from memory
     output logic [NumPorts-1:0]                        miss_rtrn_vld_o,
-    output logic [DCACHE_ID_WIDTH-1:0]                 miss_rtrn_id_o,     // only used for writes, set to zero fro reads
+    output logic [CACHE_ID_WIDTH-1:0]                  miss_rtrn_id_o,     // only used for writes, set to zero fro reads
     // from writebuffer
     input  logic [DCACHE_MAX_TX-1:0][63:0]             tx_paddr_i,         // used to check for address collisions with read operations
     input  logic [DCACHE_MAX_TX-1:0]                   tx_vld_i,           // used to check for address collisions with read operations
@@ -79,7 +80,7 @@ module serpent_dcache_missunit #(
         logic [63:0]                         paddr   ;
         logic [2:0]                          size    ;
         logic [DCACHE_SET_ASSOC-1:0]         vld_bits;
-        logic [DCACHE_ID_WIDTH-1:0]          id      ;
+        logic [CACHE_ID_WIDTH-1:0]          id      ;
         logic                                nc      ;
         logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way;
         logic [$clog2(NumPorts)-1:0]        miss_port_idx;
@@ -208,7 +209,14 @@ module serpent_dcache_missunit #(
                                                          amo_req_i.operand_b;
 
     // note: openpiton returns a full cacheline!
-    assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[DCACHE_OFFSET_WIDTH-1:3]*64 +: 64];
+    generate
+      if (Axi64BitCompliant) begin
+          assign amo_rtrn_mux = mem_rtrn_i.data[0 +: 64];
+      end else begin
+          assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[DCACHE_OFFSET_WIDTH-1:3]*64 +: 64];
+      end
+    endgenerate
+    
     // always sign extend 32bit values
     assign amo_resp_o.result = (amo_req_i.size==2'b10) ? {{32{amo_rtrn_mux[amo_req_i.operand_a[2]*32 + 31]}},
                                                               amo_rtrn_mux[amo_req_i.operand_a[2]*32 +: 32]} :
@@ -482,10 +490,6 @@ end
 //pragma translate_off
 `ifndef VERILATOR
 
-    nc_response : assert property (
-        @(posedge clk_i) disable iff (~rst_ni) mshr_vld_q |-> mshr_q.nc |-> mem_rtrn_vld_i |-> load_ack |-> mem_rtrn_i.nc)
-            else $fatal(1,"[l1 dcache missunit] NC load response implies NC load response");
-
     read_tid : assert property (
         @(posedge clk_i) disable iff (~rst_ni) mshr_vld_q |-> mem_rtrn_vld_i |-> load_ack |-> mem_rtrn_i.tid == mshr_q.id)
             else $fatal(1,"[l1 dcache missunit] TID of load response doesn't match");
diff --git a/src/cache_subsystem/serpent_dcache_wbuffer.sv b/src/cache_subsystem/serpent_dcache_wbuffer.sv
index c7e1d952e781eb2bc0f0013e28a05a30ccac547f..215b43df06372f17fa65faa640fbb673c4f40861 100644
--- a/src/cache_subsystem/serpent_dcache_wbuffer.sv
+++ b/src/cache_subsystem/serpent_dcache_wbuffer.sv
@@ -72,10 +72,10 @@ module serpent_dcache_wbuffer #(
     output logic [DCACHE_SET_ASSOC-1:0]        miss_vld_bits_o, // unused here (set to 0)
     output logic                               miss_nc_o,       // request to I/O space
     output logic [2:0]                         miss_size_o,     //
-    output logic [DCACHE_ID_WIDTH-1:0]         miss_id_o,       // ID of this transaction (wbuffer uses all IDs from 0 to DCACHE_MAX_TX-1)
+    output logic [CACHE_ID_WIDTH-1:0]          miss_id_o,       // ID of this transaction (wbuffer uses all IDs from 0 to DCACHE_MAX_TX-1)
     // write responses from memory
     input  logic                               miss_rtrn_vld_i,
-    input  logic [DCACHE_ID_WIDTH-1:0]         miss_rtrn_id_i,  // transaction ID to clear
+    input  logic [CACHE_ID_WIDTH-1:0]          miss_rtrn_id_i,  // transaction ID to clear
     // cache read interface
     output logic [DCACHE_TAG_WIDTH-1:0]        rd_tag_o,        // tag in - comes one cycle later
     output logic [DCACHE_CL_IDX_WIDTH-1:0]     rd_idx_o,
@@ -111,7 +111,7 @@ logic     [DCACHE_WBUF_DEPTH-1:0]         wbuffer_hit_oh, inval_hit;
 logic     [DCACHE_WBUF_DEPTH-1:0][7:0]    bdirty;
 
 logic [$clog2(DCACHE_WBUF_DEPTH)-1:0] next_ptr, dirty_ptr, hit_ptr, wr_ptr, check_ptr_d, check_ptr_q, check_ptr_q1, rtrn_ptr;
-logic [DCACHE_ID_WIDTH-1:0] tx_id, rtrn_id;
+logic [CACHE_ID_WIDTH-1:0] tx_id, rtrn_id;
 
 logic [2:0] bdirty_off;
 logic [7:0] tx_be;
@@ -292,6 +292,8 @@ assign wr_data_o    = wbuffer_q[rtrn_ptr].data;
 // readout of status bits, index calculation
 ///////////////////////////////////////////////////////
 
+logic [DCACHE_WBUF_DEPTH-1:0][DCACHE_CL_IDX_WIDTH-1:0] wtag_comp;
+
 assign wr_cl_vld_d = wr_cl_vld_i;
 assign wr_cl_idx_d = wr_cl_idx_i;
 
@@ -313,8 +315,9 @@ generate
         // checks if an invalidation/cache refill hits a particular word
         // note: an invalidation can hit multiple words!
         // need to respect previous cycle, too, since we add a cycle of latency to the rd_hit_oh_i signal...
-        assign inval_hit[k]  = (wr_cl_vld_d & valid[k] & (wbuffer_q[k].wtag[DCACHE_INDEX_WIDTH-1:0]<<3 == wr_cl_idx_d<<DCACHE_OFFSET_WIDTH)) |
-                               (wr_cl_vld_q & valid[k] & (wbuffer_q[k].wtag[DCACHE_INDEX_WIDTH-1:0]<<3 == wr_cl_idx_q<<DCACHE_OFFSET_WIDTH));
+        assign wtag_comp[k] = wbuffer_q[k].wtag[DCACHE_INDEX_WIDTH-4:DCACHE_OFFSET_WIDTH-3];
+        assign inval_hit[k]  = (wr_cl_vld_d & valid[k] & (wtag_comp[k] == wr_cl_idx_d)) |
+                               (wr_cl_vld_q & valid[k] & (wtag_comp[k] == wr_cl_idx_q));
 
         // these word have to be looked up in the cache
         assign tocheck[k]       = (~wbuffer_q[k].checked) & valid[k];
diff --git a/src/cache_subsystem/serpent_icache.sv b/src/cache_subsystem/serpent_icache.sv
index 925fa1c0f8aeffb7153adbe8457e6bdd403d2567..b2b1b11496fcfed3ea9c621c6db9d2f6cb310dc7 100644
--- a/src/cache_subsystem/serpent_icache.sv
+++ b/src/cache_subsystem/serpent_icache.sv
@@ -28,7 +28,7 @@ import ariane_pkg::*;
 import serpent_cache_pkg::*;
 
 module serpent_icache  #(
-    parameter logic [DCACHE_ID_WIDTH-1:0] RdTxId             = 0,                // ID to be used for read transactions
+    parameter logic [CACHE_ID_WIDTH-1:0]  RdTxId             = 0,                // ID to be used for read transactions
     parameter bit                         Axi64BitCompliant  = 1'b0,             // set this to 1 when using in conjunction with 64bit AXI bus adapter
     parameter logic [63:0]                CachedAddrBeg      = 64'h00_8000_0000, // begin of cached region
     parameter logic [63:0]                CachedAddrEnd      = 64'h80_0000_0000  // end of cached region
@@ -513,14 +513,6 @@ module serpent_icache  #(
 
 //pragma translate_off
 `ifndef VERILATOR
-  noncacheable0: assert property (
-      @(posedge clk_i) disable iff (~rst_ni) paddr_is_nc |-> mem_rtrn_vld_i |-> state_q != KILL_MISS |-> mem_rtrn_i.rtype == ICACHE_IFILL_ACK |-> mem_rtrn_i.nc)
-         else $fatal(1,"[l1 icache] NC paddr implies nc ifill");
-
-  noncacheable1: assert property (
-      @(posedge clk_i) disable iff (~rst_ni) mem_rtrn_vld_i |-> state_q != KILL_MISS |-> mem_rtrn_i.f4b |-> mem_rtrn_i.nc)
-         else $fatal(1,"[l1 icache] 4b ifill implies NC");
-
   repl_inval0: assert property (
       @(posedge clk_i) disable iff (~rst_ni) cache_wren |-> ~(mem_rtrn_i.inv.all | mem_rtrn_i.inv.vld))
          else $fatal(1,"[l1 icache] cannot replace cacheline and invalidate cacheline simultaneously");
@@ -534,9 +526,39 @@ module serpent_icache  #(
          else $fatal(1,"[l1 icache] fsm reached an invalid state");
 
   hot1: assert property (
-      @(posedge clk_i) disable iff (~rst_ni) (~inv_en) |=> cmp_en_q |-> $onehot0(cl_hit))
+      @(posedge clk_i) disable iff (~rst_ni) (~inv_en) |-> cache_rden |=> cmp_en_q |-> $onehot0(cl_hit))
          else $fatal(1,"[l1 icache] cl_hit signal must be hot1");
 
+    // this is only used for verification!
+    logic                                    vld_mirror[serpent_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0];        
+    logic [ariane_pkg::ICACHE_TAG_WIDTH-1:0] tag_mirror[serpent_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0];        
+    logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] tag_write_duplicate_test;
+
+    always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror
+        if(~rst_ni) begin
+            vld_mirror <= '{default:'0};
+            tag_mirror <= '{default:'0};
+        end else begin
+            for (int i = 0; i < ICACHE_SET_ASSOC; i++) begin
+                if(vld_req[i] & vld_we) begin
+                    vld_mirror[vld_addr][i] <= vld_wdata[i];
+                    tag_mirror[vld_addr][i] <= cl_tag_q;
+                end 
+            end       
+        end
+    end
+
+    generate
+        for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin
+            assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == cl_tag_q) & vld_mirror[vld_addr][i] & (|vld_wdata);
+        end 
+    endgenerate
+
+    tag_write_duplicate: assert property (
+        @(posedge clk_i) disable iff (~rst_ni) |vld_req |-> vld_we |-> ~(|tag_write_duplicate_test))     
+            else $fatal(1,"[l1 icache] cannot allocate a CL that is already present in the cache");
+
+
    initial begin
       // assert wrong parameterizations
       assert (ICACHE_INDEX_WIDTH<=12)
diff --git a/src/cache_subsystem/serpent_l15_adapter.sv b/src/cache_subsystem/serpent_l15_adapter.sv
index 7f8f77c6f93119a00919adbaa706ac2ef8d1e025..41e605a7fc01de26f13eb4757df5d40295a04035 100644
--- a/src/cache_subsystem/serpent_l15_adapter.sv
+++ b/src/cache_subsystem/serpent_l15_adapter.sv
@@ -52,10 +52,7 @@ import ariane_pkg::*;
 import serpent_cache_pkg::*;
 
 module serpent_l15_adapter #(
-  parameter logic [63:0] CachedAddrBeg = 64'h00_8000_0000, // begin of cached region
-  parameter logic [63:0] CachedAddrEnd = 64'h80_0000_0000, // end of cached region
-  parameter bit          SwapEndianess = 1               ,
-  parameter bit          PitonRemapIO  = 1                 // for OpenPiton
+  parameter bit          SwapEndianess = 1               
 ) (
    input logic                  clk_i,
    input logic                  rst_ni,
@@ -68,7 +65,6 @@ module serpent_l15_adapter #(
    output logic                 icache_rtrn_vld_o,
    output icache_rtrn_t         icache_rtrn_o,
 
-
    // dcache
    input  logic                 dcache_data_req_i,
    output logic                 dcache_data_ack_o,
@@ -77,8 +73,6 @@ module serpent_l15_adapter #(
    output logic                 dcache_rtrn_vld_o,
    output dcache_rtrn_t         dcache_rtrn_o,
 
-   // TODO: interrupt interface
-
    // L15
    output l15_req_t             l15_req_o,
    input  l15_rtrn_t            l15_rtrn_i
@@ -115,7 +109,6 @@ l15_rtrn_t rtrn_fifo_data;
 // logic [63:0]                       l15_req_o.l15_data_next_entry;       // unused in Ariane (only used for CAS atomic requests)
 // logic [L15_TLB_CSM_WIDTH-1:0]      l15_req_o.l15_csm_data;
 
-logic [63:0] tmp_paddr;
 
 assign icache_data_ack_o  = icache_data_req_i & ~icache_data_full;
 assign dcache_data_ack_o  = dcache_data_req_i & ~dcache_data_full;
@@ -131,10 +124,7 @@ assign l15_req_o.l15_invalidate_cacheline = '0; // unused by Ariane as L1 has no
 assign l15_req_o.l15_blockstore           = '0; // unused in openpiton
 assign l15_req_o.l15_blockinitstore       = '0; // unused in openpiton
 assign l15_req_o.l15_l1rplway             = (arb_idx) ? dcache_data.way   : icache_data.way;
-// assign tmp_paddr                          = (arb_idx) ? dcache_data.paddr :
-//                                                         icache_data.paddr;
 
-// assign l15_req_o.l15_address              = ((tmp_paddr < CachedAddrBeg) && PitonRemapIO) ? {25'b1, tmp_paddr[38:0]} : tmp_paddr;
 assign l15_req_o.l15_address              = (arb_idx) ? dcache_data.paddr :
                                                         icache_data.paddr;
 
@@ -324,10 +314,7 @@ endgenerate
 
 // fifo signals
 assign icache_rtrn_o.tid      = rtrn_fifo_data.l15_threadid;
-assign icache_rtrn_o.nc       = rtrn_fifo_data.l15_noncacheable;
-assign icache_rtrn_o.f4b      = rtrn_fifo_data.l15_f4b;
 assign dcache_rtrn_o.tid      = rtrn_fifo_data.l15_threadid;
-assign dcache_rtrn_o.nc       = rtrn_fifo_data.l15_noncacheable;
 
 // invalidation signal mapping
 assign icache_rtrn_o.inv.idx  = {rtrn_fifo_data.l15_inval_address_15_4, 4'b0000};
diff --git a/src/clint/clint.sv b/src/clint/clint.sv
index 5e84ebaf9c48308f8b404815b6964cbcd641ed33..39cba6b63541d46ee461a60adc9642c681aac6ed 100644
--- a/src/clint/clint.sv
+++ b/src/clint/clint.sv
@@ -89,8 +89,8 @@ module clint #(
         // written from APB bus - gets priority
         if (en && we) begin
             case (register_address) inside
-                [MSIP_BASE:MSIP_BASE+8*NR_CORES]: begin
-                    msip_n[$unsigned(address[AddrSelWidth-1+3:3])] = wdata[0];
+                [MSIP_BASE:MSIP_BASE+4*NR_CORES]: begin
+                    msip_n[$unsigned(address[AddrSelWidth-1+2:2])] = wdata[32*address[2]];
                 end
 
                 [MTIMECMP_BASE:MTIMECMP_BASE+8*NR_CORES]: begin
@@ -111,8 +111,8 @@ module clint #(
 
         if (en && !we) begin
             case (register_address) inside
-                [MSIP_BASE:MSIP_BASE+8*NR_CORES]: begin
-                    rdata = msip_q[$unsigned(address[AddrSelWidth-1+3:3])];
+                [MSIP_BASE:MSIP_BASE+4*NR_CORES]: begin
+                    rdata = msip_q[$unsigned(address[AddrSelWidth-1+2:2])];
                 end
 
                 [MTIMECMP_BASE:MTIMECMP_BASE+8*NR_CORES]: begin
diff --git a/src/controller.sv b/src/controller.sv
index bbd340bdd944fe28736ac34e192e44b7ae032f4a..5bbd0971de341e6c8b9085e7d6f25e16136b8bba 100644
--- a/src/controller.sv
+++ b/src/controller.sv
@@ -22,6 +22,7 @@ module controller (
     output logic            flush_unissued_instr_o, // Flush un-issued instructions of the scoreboard
     output logic            flush_id_o,             // Flush ID stage
     output logic            flush_ex_o,             // Flush EX stage
+    output logic            flush_bp_o,             // Flush branch predictors
     output logic            flush_icache_o,         // Flush ICache
     output logic            flush_dcache_o,         // Flush DCache
     input  logic            flush_dcache_ack_i,     // Acknowledge the whole DCache Flush
@@ -57,6 +58,7 @@ module controller (
         flush_dcache           = 1'b0;
         flush_icache_o         = 1'b0;
         flush_tlb_o            = 1'b0;
+        flush_bp_o             = 1'b0;
         // ------------
         // Mis-predict
         // ------------
@@ -78,9 +80,12 @@ module controller (
             flush_unissued_instr_o = 1'b1;
             flush_id_o             = 1'b1;
             flush_ex_o             = 1'b1;
-
+// this is not needed in the case since we 
+// have a write-through cache in this case
+`ifndef PITON_ARIANE
             flush_dcache           = 1'b1;
             fence_active_d         = 1'b1;
+`endif            
         end
 
         // ---------------------------------
@@ -93,11 +98,17 @@ module controller (
             flush_id_o             = 1'b1;
             flush_ex_o             = 1'b1;
             flush_icache_o         = 1'b1;
-
+// this is not needed in the case since we 
+// have a write-through cache in this case
+`ifndef PITON_ARIANE
             flush_dcache           = 1'b1;
             fence_active_d         = 1'b1;
+`endif
         end
 
+// this is not needed in the case since we 
+// have a write-through cache in this case
+`ifndef PITON_ARIANE
         // wait for the acknowledge here
         if (flush_dcache_ack_i && fence_active_q) begin
             fence_active_d = 1'b0;
@@ -105,7 +116,7 @@ module controller (
         end else if (fence_active_q) begin
             flush_dcache = 1'b1;
         end
-
+`endif
         // ---------------------------------
         // SFENCE.VMA
         // ---------------------------------
@@ -140,6 +151,11 @@ module controller (
             flush_unissued_instr_o = 1'b1;
             flush_id_o             = 1'b1;
             flush_ex_o             = 1'b1;
+            // this potentially reduces performance, but is needed
+            // to suppress speculative fetches to virtual memory from
+            // machine mode. TODO: remove when PMA checkers have been
+            // added to the system
+            flush_bp_o             = 1'b1;
         end
     end
 
diff --git a/src/csr_regfile.sv b/src/csr_regfile.sv
index 9fec4542aa05e476d9cb1dc5f54818bd324f8887..7d94ea71b1127baf3b30b3a2c9c4d6c2d783c71c 100644
--- a/src/csr_regfile.sv
+++ b/src/csr_regfile.sv
@@ -967,8 +967,13 @@ module csr_regfile #(
     assign tw_o             = mstatus_q.tw;
     assign tsr_o            = mstatus_q.tsr;
     assign halt_csr_o       = wfi_q;
+`ifdef PITON_ARIANE
+    assign icache_en_o      = icache_q[0];
+`else
     assign icache_en_o      = icache_q[0] & (~debug_mode_q);
+    `endif
     assign dcache_en_o      = dcache_q[0];
+
     // determine if mprv needs to be considered if in debug mode
     assign mprv             = (debug_mode_q && !dcsr_q.mprven) ? 1'b0 : mstatus_q.mprv;
     assign debug_mode_o     = debug_mode_q;
diff --git a/src/debug/ariane.cfg b/src/debug/ariane.cfg
deleted file mode 100644
index 00ab350811519ba47980d2a5c827a6e2784b0040..0000000000000000000000000000000000000000
--- a/src/debug/ariane.cfg
+++ /dev/null
@@ -1,22 +0,0 @@
-adapter_khz     10000
-
-interface remote_bitbang
-remote_bitbang_host localhost
-
-remote_bitbang_port $::env(JTAG_VPI_PORT)
-
-set _CHIPNAME riscv
-jtag newtap $_CHIPNAME cpu -irlen 5
-
-set _TARGETNAME $_CHIPNAME.cpu
-target create $_TARGETNAME riscv -chain-position $_TARGETNAME
-
-riscv set_reset_timeout_sec 120
-riscv set_command_timeout_sec 120
-
-# prefer to use sba for system bus access
-riscv set_prefer_sba on
-
-init
-halt
-echo "Ready for Remote Connections"
diff --git a/src/debug/debug_rom/.gitignore b/src/debug/debug_rom/.gitignore
deleted file mode 100644
index e04ab732392f1e15caac9c1ac85e439a67dad030..0000000000000000000000000000000000000000
--- a/src/debug/debug_rom/.gitignore
+++ /dev/null
@@ -1,3 +0,0 @@
-*.bin
-*.elf
-debug_rom.img
diff --git a/src/debug/debug_rom/Makefile b/src/debug/debug_rom/Makefile
deleted file mode 100644
index f05a0e888ee80f73d270a9cd05456e70de45bc8f..0000000000000000000000000000000000000000
--- a/src/debug/debug_rom/Makefile
+++ /dev/null
@@ -1,28 +0,0 @@
-# See LICENSE.SiFive for license details
-
-debug_rom = debug_rom.sv
-
-GCC=riscv64-unknown-elf-gcc
-OBJCOPY=riscv64-unknown-elf-objcopy
-OBJDUMP=riscv64-unknown-elf-objdump
-
-all: $(debug_rom)
-
-%.sv: %.img 
-	python gen_rom.py $< 
-
-%.img: %.bin
-	dd if=$< of=$@ bs=256 count=1
-
-%.bin: %.elf
-	$(OBJCOPY) -O binary $< $@
-
-%.elf: %.S link.ld
-	$(GCC) -I$(RISCV)/include -Tlink.ld $< -nostdlib -fPIC -static -Wl,--no-gc-sections -o $@ 
-
-%.dump: %.elf
-	$(OBJDUMP) -d $< --disassemble-all --disassemble-zeroes --section=.text --section=.text.startup --section=.text.init --section=.data  > $@
-
-clean:
-	rm -f *.img *.dump *.bin *.sv
-
diff --git a/src/debug/debug_rom/debug_rom.S b/src/debug/debug_rom/debug_rom.S
deleted file mode 100644
index 76c81ca99503c0aef86e3106e98ec04528b462bd..0000000000000000000000000000000000000000
--- a/src/debug/debug_rom/debug_rom.S
+++ /dev/null
@@ -1,93 +0,0 @@
-// See LICENSE.SiFive for license details.
-
-#include "encoding.h"
-
-// These are implementation-specific addresses in the Debug Module
-#define HALTED    0x100
-#define GOING     0x104
-#define RESUMING  0x108
-#define EXCEPTION 0x10C
-
-// Region of memory where each hart has 1
-// byte to read.
-#define FLAGS 0x400
-#define FLAG_GO     0
-#define FLAG_RESUME 1
-
-        .option norvc
-        .global entry
-        .global exception
-
-        // Entry location on ebreak, Halt, or Breakpoint
-        // It is the same for all harts. They branch when
-        // their GO or RESUME bit is set.
-
-entry:
-       jal zero, _entry
-resume:
-       jal zero, _resume
-exception:
-       jal zero, _exception
-
-
-
-_entry:
-        // This fence is required because the execution may have written something
-        // into the Abstract Data or Program Buffer registers.
-        fence
-        csrw CSR_DSCRATCH0, s0       // Save s0 to allow signaling MHARTID
-        csrw CSR_DSCRATCH1, a0       // Save a0 to allow loading arbitrary DM base
-        auipc a0, 0                  // Get PC
-        srli a0, a0, 12              // And throw away lower 12 bits to get the DM base
-        slli a0, a0, 12
-
-        // We continue to let the hart know that we are halted in order that
-        // a DM which was reset is still made aware that a hart is halted.
-        // We keep checking both whether there is something the debugger wants
-        // us to do, or whether we should resume.
-entry_loop:
-        csrr s0, CSR_MHARTID
-        sw   s0, HALTED(a0)
-        add  s0, s0, a0
-        lbu  s0, FLAGS(s0) // 1 byte flag per hart. Only one hart advances here.
-        andi s0, s0, (1 << FLAG_GO)
-        bnez s0, going
-        csrr s0, CSR_MHARTID
-        add  s0, s0, a0
-        lbu  s0, FLAGS(s0) // multiple harts can resume  here
-        andi s0, s0, (1 << FLAG_RESUME)
-        bnez s0, resume
-        jal  zero, entry_loop
-
-_exception:
-        csrw CSR_DSCRATCH1, a0       // Save a0 to allow loading arbitrary DM offsets
-        auipc a0, 0                  // Get POC
-        srli a0, a0, 12              // And throw away lower 12 bits to get the DM base
-        slli a0, a0, 12
-        sw   zero, EXCEPTION(a0)     // Let debug module know you got an exception.
-        csrr a0, CSR_DSCRATCH1       // Restore a0 here
-        ebreak
-
-going:
-        sw zero, GOING(a0)          // When debug module sees this write, the GO flag is reset.
-        csrr s0, CSR_DSCRATCH0      // Restore s0 here
-        csrr a0, CSR_DSCRATCH1      // Restore a0 here
-        jal zero, whereto
-_resume:
-        csrr s0, CSR_MHARTID
-        sw   s0, RESUMING(a0)   // When Debug Module sees this write, the RESUME flag is reset.
-        csrr s0, CSR_DSCRATCH0  // Restore s0 here
-        csrr a0, CSR_DSCRATCH1  // Restore a0 here
-        dret
-
-        // END OF ACTUAL "ROM" CONTENTS. BELOW IS JUST FOR LINKER SCRIPT.
-
-.section .whereto
-whereto:
-        nop
-        // Variable "ROM" This is : jal x0 abstract, jal x0 program_buffer,
-        //                or jal x0 resume, as desired.
-        //                Debug Module state machine tracks what is 'desired'.
-        //                We don't need/want to use jalr here because all of the
-        //                Variable ROM contents are set by
-        //                Debug Module before setting the OK_GO byte.
diff --git a/src/debug/debug_rom/debug_rom.sv b/src/debug/debug_rom/debug_rom.sv
deleted file mode 100644
index 1c6727c399f8b5c7efa5f820c3dd2506f36fea78..0000000000000000000000000000000000000000
--- a/src/debug/debug_rom/debug_rom.sv
+++ /dev/null
@@ -1,58 +0,0 @@
-/* Copyright 2018 ETH Zurich and University of Bologna.
- * Copyright and related rights are licensed under the Solderpad Hardware
- * License, Version 0.51 (the "License"); you may not use this file except in
- * compliance with the License.  You may obtain a copy of the License at
- * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
- * or agreed to in writing, software, hardware and materials distributed under
- * this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations under the License.
- *
- * File: $filename.v
- *
- * Description: Auto-generated bootrom
- */
-
-// Auto-generated code
-module debug_rom (
-   input  logic         clk_i,
-   input  logic         req_i,
-   input  logic [63:0]  addr_i,
-   output logic [63:0]  rdata_o
-);
-    localparam int RomSize = 19;
-
-    const logic [RomSize-1:0][63:0] mem = {
-        64'h00000000_7b200073,
-        64'h7b302573_7b202473,
-        64'h10852423_f1402473,
-        64'ha85ff06f_7b302573,
-        64'h7b202473_10052223,
-        64'h00100073_7b302573,
-        64'h10052623_00c51513,
-        64'h00c55513_00000517,
-        64'h7b351073_fd5ff06f,
-        64'hfa041ce3_00247413,
-        64'h40044403_00a40433,
-        64'hf1402473_02041c63,
-        64'h00147413_40044403,
-        64'h00a40433_10852023,
-        64'hf1402473_00c51513,
-        64'h00c55513_00000517,
-        64'h7b351073_7b241073,
-        64'h0ff0000f_04c0006f,
-        64'h07c0006f_00c0006f
-    };
-
-    logic [$clog2(RomSize)-1:0] addr_q;
-
-    always_ff @(posedge clk_i) begin
-        if (req_i) begin
-            addr_q <= addr_i[$clog2(RomSize)-1+3:3];
-        end
-    end
-
-    // this prevents spurious Xes from propagating into
-    // the speculative fetch stage of the core
-    assign rdata_o = (addr_q < RomSize) ? mem[addr_q] : '0;
-endmodule
diff --git a/src/debug/debug_rom/encoding.h b/src/debug/debug_rom/encoding.h
deleted file mode 120000
index 6807325341e49f9e187a7da2cef33d8fb018b148..0000000000000000000000000000000000000000
--- a/src/debug/debug_rom/encoding.h
+++ /dev/null
@@ -1 +0,0 @@
-../../../bootrom/encoding.h
\ No newline at end of file
diff --git a/src/debug/debug_rom/gen_rom.py b/src/debug/debug_rom/gen_rom.py
deleted file mode 120000
index 7f92eadd062f1162ad122e9e4036d7fb40da0660..0000000000000000000000000000000000000000
--- a/src/debug/debug_rom/gen_rom.py
+++ /dev/null
@@ -1 +0,0 @@
-../../../bootrom/gen_rom.py
\ No newline at end of file
diff --git a/src/debug/debug_rom/link.ld b/src/debug/debug_rom/link.ld
deleted file mode 100644
index 053bb43c1fc967b88a7399484780c7d95fe43f96..0000000000000000000000000000000000000000
--- a/src/debug/debug_rom/link.ld
+++ /dev/null
@@ -1,16 +0,0 @@
-/* See LICENSE.SiFive for license details. */
-OUTPUT_ARCH( "riscv" )
-ENTRY( entry )
-SECTIONS
-{
-    .whereto 0x300 :
-    {
-        *(.whereto)
-    }   
-    . = 0x800;
-    .text :
-    {
-        *(.text)
-    }
-    _end = .;
-}
diff --git a/src/debug/dm_csrs.sv b/src/debug/dm_csrs.sv
deleted file mode 100644
index cbf25e295263098ea679f9c1c9451247ca0e1421..0000000000000000000000000000000000000000
--- a/src/debug/dm_csrs.sv
+++ /dev/null
@@ -1,569 +0,0 @@
-/* Copyright 2018 ETH Zurich and University of Bologna.
- * Copyright and related rights are licensed under the Solderpad Hardware
- * License, Version 0.51 (the “License”); you may not use this file except in
- * compliance with the License.  You may obtain a copy of the License at
- * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
- * or agreed to in writing, software, hardware and materials distributed under
- * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations under the License.
- *
- * File:  dm_csrs.sv
- * Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
- * Date:   30.6.2018
- *
- * Description: Debug CSRs. Communication over Debug Transport Module (DTM)
- */
-
-module dm_csrs #(
-    parameter int NrHarts = -1
-) (
-    input  logic                              clk_i,              // Clock
-    input  logic                              rst_ni,             // Asynchronous reset active low
-    input  logic                              testmode_i,
-    input  logic                              dmi_rst_ni,         // Debug Module Interface reset, active-low
-    input  logic                              dmi_req_valid_i,
-    output logic                              dmi_req_ready_o,
-    input  dm::dmi_req_t                      dmi_req_i,
-    // every request needs a response one cycle later
-    output logic                              dmi_resp_valid_o,
-    input  logic                              dmi_resp_ready_i,
-    output dm::dmi_resp_t                     dmi_resp_o,
-    // global ctrl
-    output logic                              ndmreset_o,      // non-debug module reset, active-high
-    output logic                              dmactive_o,      // 1 -> debug-module is active, 0 -> synchronous re-set
-    // hart status
-    input  dm::hartinfo_t [NrHarts-1:0]       hartinfo_i,      // static hartinfo
-    input  logic [NrHarts-1:0]                halted_i,        // hart is halted
-    input  logic [NrHarts-1:0]                unavailable_i,   // e.g.: powered down
-    input  logic [NrHarts-1:0]                resumeack_i,     // hart acknowledged resume request
-    // hart control
-    output logic [19:0]                       hartsel_o,       // hartselect to ctrl module
-    output logic [NrHarts-1:0]                haltreq_o,       // request to halt a hart
-    output logic [NrHarts-1:0]                resumereq_o,     // request hart to resume
-
-    output logic                              cmd_valid_o,       // debugger is writing to the command field
-    output dm::command_t                      cmd_o,             // abstract command
-    input  logic                              cmderror_valid_i,  // an error occured
-    input  dm::cmderr_t                       cmderror_i,        // this error occured
-    input  logic                              cmdbusy_i,         // cmd is currently busy executing
-
-    output logic [dm::ProgBufSize-1:0][31:0]  progbuf_o, // to system bus
-    output logic [dm::DataCount-1:0][31:0]    data_o,
-
-    input  logic [dm::DataCount-1:0][31:0]    data_i,
-    input  logic                              data_valid_i,
-    // system bus access module (SBA)
-    output logic [63:0]                       sbaddress_o,
-    input  logic [63:0]                       sbaddress_i,
-    output logic                              sbaddress_write_valid_o,
-    // control signals in
-    output logic                              sbreadonaddr_o,
-    output logic                              sbautoincrement_o,
-    output logic [2:0]                        sbaccess_o,
-    // data out
-    output logic                              sbreadondata_o,
-    output logic [63:0]                       sbdata_o,
-    output logic                              sbdata_read_valid_o,
-    output logic                              sbdata_write_valid_o,
-    // read data in
-    input  logic [63:0]                       sbdata_i,
-    input  logic                              sbdata_valid_i,
-    // control signals
-    input  logic                              sbbusy_i,
-    input  logic                              sberror_valid_i, // bus error occurred
-    input  logic [2:0]                        sberror_i // bus error occurred
-);
-    // the amount of bits we need to represent all harts
-    localparam HartSelLen = (NrHarts == 1) ? 1 : $clog2(NrHarts);
-    dm::dtm_op_t dtm_op;
-    assign dtm_op = dm::dtm_op_t'(dmi_req_i.op);
-
-    logic        resp_queue_full;
-    logic        resp_queue_empty;
-    logic        resp_queue_push;
-    logic        resp_queue_pop;
-    logic [31:0] resp_queue_data;
-
-    localparam dm::dm_csr_t DataEnd = dm::dm_csr_t'((dm::Data0 + {4'b0, dm::DataCount}));
-    localparam dm::dm_csr_t ProgBufEnd = dm::dm_csr_t'((dm::ProgBuf0 + {4'b0, dm::ProgBufSize}));
-
-    logic [31:0] haltsum0, haltsum1, haltsum2, haltsum3;
-    logic [NrHarts/2**5 :0][31:0] halted_reshaped0;
-    logic [NrHarts/2**10:0][31:0] halted_reshaped1;
-    logic [NrHarts/2**15:0][31:0] halted_reshaped2;
-    logic [(NrHarts/2**10+1)*32-1:0] halted_flat1;
-    logic [(NrHarts/2**15+1)*32-1:0] halted_flat2;
-    logic [32-1:0] halted_flat3;
-
-    // haltsum0
-    assign halted_reshaped0 = halted_i;
-    assign haltsum0         = halted_reshaped0[hartsel_o[19:5]];
-    // haltsum1
-    always_comb begin : p_reduction1
-      halted_flat1 = '0;
-      for (int k=0; k<NrHarts/2**5; k++) begin
-        halted_flat1[k] = &halted_reshaped0[k];
-      end
-      halted_reshaped1 = halted_flat1;
-      haltsum1         = halted_reshaped1[hartsel_o[19:10]];
-    end
-    // haltsum2
-    always_comb begin : p_reduction2
-      halted_flat2 = '0;
-      for (int k=0; k<NrHarts/2**10; k++) begin
-        halted_flat2[k] = &halted_reshaped1[k];
-      end
-      halted_reshaped2 = halted_flat2;
-      haltsum2         = halted_reshaped2[hartsel_o[19:15]];
-    end
-    // haltsum3
-    always_comb begin : p_reduction3
-      halted_flat3 = '0;
-      for (int k=0; k<NrHarts/2**15; k++) begin
-        halted_flat3[k] = &halted_reshaped2[k];
-      end
-      haltsum3 = halted_flat3;
-    end
-
-
-    dm::dmstatus_t      dmstatus;
-    dm::dmcontrol_t     dmcontrol_d, dmcontrol_q;
-    dm::abstractcs_t    abstractcs;
-    dm::cmderr_t        cmderr_d, cmderr_q;
-    dm::command_t       command_d, command_q;
-    dm::abstractauto_t  abstractauto_d, abstractauto_q;
-    dm::sbcs_t          sbcs_d, sbcs_q;
-    logic [63:0]        sbaddr_d, sbaddr_q;
-    logic [63:0]        sbdata_d, sbdata_q;
-
-    logic [NrHarts-1:0] havereset_d, havereset_q;
-    // program buffer
-    logic [dm::ProgBufSize-1:0][31:0] progbuf_d, progbuf_q;
-    // because first data address starts at 0x04
-    logic [({3'b0, dm::DataCount} + dm::Data0 - 1):(dm::Data0)][31:0] data_d, data_q;
-
-    logic [HartSelLen-1:0] selected_hart;
-
-    // a successful response returns zero
-    assign dmi_resp_o.resp = dm::DTM_SUCCESS;
-    assign dmi_resp_valid_o     = ~resp_queue_empty;
-    assign dmi_req_ready_o      = ~resp_queue_full;
-    assign resp_queue_push      = dmi_req_valid_i & dmi_req_ready_o;
-    // SBA
-    assign sbautoincrement_o = sbcs_q.sbautoincrement;
-    assign sbreadonaddr_o    = sbcs_q.sbreadonaddr;
-    assign sbreadondata_o    = sbcs_q.sbreadondata;
-    assign sbaccess_o        = sbcs_q.sbaccess;
-    assign sbdata_o          = sbdata_q;
-    assign sbaddress_o       = sbaddr_q;
-
-    assign hartsel_o         = {dmcontrol_q.hartselhi, dmcontrol_q.hartsello};
-
-    always_comb begin : csr_read_write
-        // --------------------
-        // Static Values (R/O)
-        // --------------------
-        // dmstatus
-        dmstatus    = '0;
-        dmstatus.version = dm::DbgVersion013;
-        // no authentication implemented
-        dmstatus.authenticated = 1'b1;
-        // we do not support halt-on-reset sequence
-        dmstatus.hasresethaltreq = 1'b0;
-        // TODO(zarubaf) things need to change here if we implement the array mask
-        dmstatus.allhavereset = havereset_q[selected_hart];
-        dmstatus.anyhavereset = havereset_q[selected_hart];
-
-        dmstatus.allresumeack = resumeack_i[selected_hart];
-        dmstatus.anyresumeack = resumeack_i[selected_hart];
-
-        dmstatus.allunavail   = unavailable_i[selected_hart];
-        dmstatus.anyunavail   = unavailable_i[selected_hart];
-
-        // as soon as we are out of the legal Hart region tell the debugger
-        // that there are only non-existent harts
-        dmstatus.allnonexistent = (hartsel_o > NrHarts[19:0] - 1) ? 1'b1 : 1'b0;
-        dmstatus.anynonexistent = (hartsel_o > NrHarts[19:0] - 1) ? 1'b1 : 1'b0;
-
-        dmstatus.allhalted    = halted_i[selected_hart];
-        dmstatus.anyhalted    = halted_i[selected_hart];
-
-        dmstatus.allrunning   = ~halted_i[selected_hart];
-        dmstatus.anyrunning   = ~halted_i[selected_hart];
-
-        // abstractcs
-        abstractcs = '0;
-        abstractcs.datacount = dm::DataCount;
-        abstractcs.progbufsize = dm::ProgBufSize;
-        abstractcs.busy = cmdbusy_i;
-        abstractcs.cmderr = cmderr_q;
-
-        // abstractautoexec
-        abstractauto_d = abstractauto_q;
-        abstractauto_d.zero0 = '0;
-
-        // default assignments
-        havereset_d = havereset_q;
-        dmcontrol_d = dmcontrol_q;
-        cmderr_d    = cmderr_q;
-        command_d   = command_q;
-        progbuf_d   = progbuf_q;
-        data_d      = data_q;
-        sbcs_d      = sbcs_q;
-        sbaddr_d    = sbaddress_i;
-        sbdata_d    = sbdata_q;
-
-        resp_queue_data         = 32'b0;
-        cmd_valid_o             = 1'b0;
-        sbaddress_write_valid_o = 1'b0;
-        sbdata_read_valid_o     = 1'b0;
-        sbdata_write_valid_o    = 1'b0;
-
-        // reads
-        if (dmi_req_ready_o && dmi_req_valid_i && dtm_op == dm::DTM_READ) begin
-            unique case ({1'b0, dmi_req_i.addr}) inside
-                [(dm::Data0):DataEnd]: begin
-                    if (dm::DataCount > 0) begin
-                        resp_queue_data = data_q[dmi_req_i.addr[4:0]];
-                    end
-                    if (!cmdbusy_i) begin
-                        // check whether we need to re-execute the command (just give a cmd_valid)
-                        cmd_valid_o = abstractauto_q.autoexecdata[dmi_req_i.addr[3:0] - int'(dm::Data0)];
-                    end
-                end
-                dm::DMControl:    resp_queue_data = dmcontrol_q;
-                dm::DMStatus:     resp_queue_data = dmstatus;
-                dm::Hartinfo:     resp_queue_data = hartinfo_i[selected_hart];
-                dm::AbstractCS:   resp_queue_data = abstractcs;
-                dm::AbstractAuto: resp_queue_data = abstractauto_q;
-                // command is read-only
-                dm::Command:    resp_queue_data = '0;
-                [(dm::ProgBuf0):ProgBufEnd]: begin
-                    resp_queue_data = progbuf_q[dmi_req_i.addr[4:0]];
-                    if (!cmdbusy_i) begin
-                        // check whether we need to re-execute the command (just give a cmd_valid)
-                        // TODO(zarubaf): check if offset is correct - without it this may assign Xes
-                        cmd_valid_o = abstractauto_q.autoexecprogbuf[dmi_req_i.addr[3:0]+16];
-                    end
-                end
-                dm::HaltSum0: resp_queue_data = haltsum0;
-                dm::HaltSum1: resp_queue_data = haltsum1;
-                dm::HaltSum2: resp_queue_data = haltsum2;
-                dm::HaltSum3: resp_queue_data = haltsum3;
-                dm::SBCS: begin
-                    if (sbbusy_i) begin
-                        sbcs_d.sbbusyerror = 1'b1;
-                    end
-                end
-                dm::SBAddress0: begin
-                    // access while the SBA was busy
-                    if (sbbusy_i) begin
-                       sbcs_d.sbbusyerror = 1'b1;
-                    end begin
-                        resp_queue_data = sbaddr_q[31:0];
-                    end
-                end
-                dm::SBAddress1: begin
-                    // access while the SBA was busy
-                    if (sbbusy_i) begin
-                       sbcs_d.sbbusyerror = 1'b1;
-                    end begin
-                        resp_queue_data = sbaddr_q[63:32];
-                    end
-                end
-                dm::SBData0: begin
-                    // access while the SBA was busy
-                    if (sbbusy_i) begin
-                       sbcs_d.sbbusyerror = 1'b1;
-                    end begin
-                        sbdata_read_valid_o = (sbcs_q.sberror == '0);
-                        resp_queue_data = sbdata_q[31:0];
-                    end
-                end
-                dm::SBData1: begin
-                    // access while the SBA was busy
-                    if (sbbusy_i) begin
-                       sbcs_d.sbbusyerror = 1'b1;
-                    end begin
-                        resp_queue_data = sbdata_q[63:32];
-                    end
-                end
-                default:;
-            endcase
-        end
-
-        // write
-        if (dmi_req_ready_o && dmi_req_valid_i && dtm_op == dm::DTM_WRITE) begin
-            unique case (dm::dm_csr_t'({1'b0, dmi_req_i.addr})) inside
-                [(dm::Data0):DataEnd]: begin
-                    // attempts to write them while busy is set does not change their value
-                    if (!cmdbusy_i && dm::DataCount > 0) begin
-                        data_d[dmi_req_i.addr[4:0]] = dmi_req_i.data;
-                        // check whether we need to re-execute the command (just give a cmd_valid)
-                        cmd_valid_o = abstractauto_q.autoexecdata[dmi_req_i.addr[3:0] - int'(dm::Data0)];
-                    end
-                end
-                dm::DMControl: begin
-                    automatic dm::dmcontrol_t dmcontrol;
-                    dmcontrol = dm::dmcontrol_t'(dmi_req_i.data);
-                    // clear the havreset of the selected hart
-                    if (dmcontrol.ackhavereset) begin
-                        havereset_d[selected_hart] = 1'b0;
-                    end
-                    dmcontrol_d = dmi_req_i.data;
-                end
-                dm::DMStatus:; // write are ignored to R/O register
-                dm::Hartinfo:; // hartinfo is R/O
-                // only command error is write-able
-                dm::AbstractCS: begin // W1C
-                    // Gets set if an abstract command fails. The bits in this
-                    // field remain set until they are cleared by writing 1 to
-                    // them. No abstract command is started until the value is
-                    // reset to 0.
-                    automatic dm::abstractcs_t a_abstractcs;
-                    a_abstractcs = dm::abstractcs_t'(dmi_req_i.data);
-                    // reads during abstract command execution are not allowed
-                    if (!cmdbusy_i) begin
-                        cmderr_d = dm::cmderr_t'(~a_abstractcs.cmderr & cmderr_q);
-                    end else if (cmderr_q == dm::CmdErrNone) begin
-                        cmderr_d = dm::CmdErrBusy;
-                    end
-
-                end
-                dm::Command: begin
-                    // writes are ignored if a command is already busy
-                    if (!cmdbusy_i) begin
-                        cmd_valid_o = 1'b1;
-                        command_d = dm::command_t'(dmi_req_i.data);
-                    // if there was an attempted to write during a busy execution
-                    // and the cmderror field is zero set the busy error
-                    end else if (cmderr_q == dm::CmdErrNone) begin
-                        cmderr_d = dm::CmdErrBusy;
-                    end
-                end
-                dm::AbstractAuto: begin
-                    // this field can only be written legally when there is no command executing
-                    if (!cmdbusy_i) begin
-                        abstractauto_d = {dmi_req_i.data[31:16], 4'b0, dmi_req_i.data[11:0]};
-                    end else if (cmderr_q == dm::CmdErrNone) begin
-                        cmderr_d = dm::CmdErrBusy;
-                    end
-                end
-                [(dm::ProgBuf0):ProgBufEnd]: begin
-                    // attempts to write them while busy is set does not change their value
-                    if (!cmdbusy_i) begin
-                        progbuf_d[dmi_req_i.addr[4:0]] = dmi_req_i.data;
-                        // check whether we need to re-execute the command (just give a cmd_valid)
-                        // this should probably throw an error if executed during another command was busy
-                        // TODO(zarubaf): check if offset is correct - without it this may assign Xes
-                        cmd_valid_o = abstractauto_q.autoexecprogbuf[dmi_req_i.addr[3:0]+16];
-                    end
-                end
-                dm::SBCS: begin
-                    // access while the SBA was busy
-                    if (sbbusy_i) begin
-                        sbcs_d.sbbusyerror = 1'b1;
-                    end begin
-                        automatic dm::sbcs_t sbcs = dm::sbcs_t'(dmi_req_i.data);
-                        sbcs_d = sbcs;
-                        // R/W1C
-                        sbcs_d.sbbusyerror = sbcs_q.sbbusyerror & (~sbcs.sbbusyerror);
-                        sbcs_d.sberror     = sbcs_q.sberror     & (~sbcs.sberror);
-                    end
-                end
-                dm::SBAddress0: begin
-                    // access while the SBA was busy
-                    if (sbbusy_i) begin
-                       sbcs_d.sbbusyerror = 1'b1;
-                    end begin
-                        sbaddr_d[31:0] = dmi_req_i.data;
-                        sbaddress_write_valid_o = (sbcs_q.sberror == '0);
-                    end
-                end
-                dm::SBAddress1: begin
-                    // access while the SBA was busy
-                    if (sbbusy_i) begin
-                       sbcs_d.sbbusyerror = 1'b1;
-                    end begin
-                        sbaddr_d[63:32] = dmi_req_i.data;
-                    end
-                end
-                dm::SBData0: begin
-                    // access while the SBA was busy
-                    if (sbbusy_i) begin
-                       sbcs_d.sbbusyerror = 1'b1;
-                    end begin
-                        sbdata_d[31:0] = dmi_req_i.data;
-                        sbdata_write_valid_o = (sbcs_q.sberror == '0);
-                    end
-                end
-                dm::SBData1: begin
-                    // access while the SBA was busy
-                    if (sbbusy_i) begin
-                       sbcs_d.sbbusyerror = 1'b1;
-                    end begin
-                        sbdata_d[63:32] = dmi_req_i.data;
-                    end
-                end
-                default:;
-            endcase
-        end
-        // hart threw a command error and has precedence over bus writes
-        if (cmderror_valid_i) begin
-            cmderr_d = cmderror_i;
-        end
-
-        // update data registers
-        if (data_valid_i)
-            data_d = data_i;
-
-        // set the havereset flag when we did a ndmreset
-        if (ndmreset_o) begin
-            havereset_d = '1;
-        end
-        // -------------
-        // System Bus
-        // -------------
-        // set bus error
-        if (sberror_valid_i) begin
-            sbcs_d.sberror = sberror_i;
-        end
-        // update read data
-        if (sbdata_valid_i) begin
-            sbdata_d = sbdata_i;
-        end
-
-        // dmcontrol
-        // TODO(zarubaf) we currently do not implement the hartarry mask
-        dmcontrol_d.hasel           = 1'b0;
-        // we do not support resetting an individual hart
-        dmcontrol_d.hartreset       = 1'b0;
-        dmcontrol_d.setresethaltreq = 1'b0;
-        dmcontrol_d.clrresethaltreq = 1'b0;
-        dmcontrol_d.zero1           = '0;
-        dmcontrol_d.zero0           = '0;
-        // Non-writeable, clear only
-        dmcontrol_d.ackhavereset    = 1'b0;
-        // static values for dcsr
-        sbcs_d.sbversion            = 3'b1;
-        sbcs_d.sbbusy               = sbbusy_i;
-        sbcs_d.sbasize              = 7'd64; // bus is 64 bit wide
-        sbcs_d.sbaccess128          = 1'b0;
-        sbcs_d.sbaccess64           = 1'b0;
-        sbcs_d.sbaccess32           = 1'b0;
-        sbcs_d.sbaccess16           = 1'b0;
-        sbcs_d.sbaccess8            = 1'b0;
-        sbcs_d.sbaccess             = 1'b0;
-    end
-
-    // output multiplexer
-    always_comb begin
-        selected_hart = hartsel_o[HartSelLen-1:0];
-        // default assignment
-        haltreq_o = '0;
-        resumereq_o = '0;
-        haltreq_o[selected_hart] = dmcontrol_q.haltreq;
-        resumereq_o[selected_hart] = dmcontrol_q.resumereq;
-    end
-
-    assign dmactive_o = dmcontrol_q.dmactive;
-    assign cmd_o      = command_q;
-    assign progbuf_o  = progbuf_q;
-    assign data_o     = data_q;
-
-    assign resp_queue_pop = dmi_resp_ready_i & ~resp_queue_empty;
-
-    logic ndmreset_n;
-
-    assign ndmreset_o = dmcontrol_q.ndmreset;
-
-    // response FIFO
-    fifo_v2 #(
-        .dtype            ( logic [31:0]         ),
-        .DEPTH            ( 2                    )
-    ) i_fifo (
-        .clk_i            ( clk_i                ),
-        .rst_ni           ( dmi_rst_ni           ), // reset only when system is re-set
-        .flush_i          ( 1'b0                 ), // we do not need to flush this queue
-        .testmode_i       ( testmode_i           ),
-        .full_o           ( resp_queue_full      ),
-        .empty_o          ( resp_queue_empty     ),
-        .alm_full_o       (                      ),
-        .alm_empty_o      (                      ),
-        .data_i           ( resp_queue_data      ),
-        .push_i           ( resp_queue_push      ),
-        .data_o           ( dmi_resp_o.data      ),
-        .pop_i            ( resp_queue_pop       )
-    );
-
-    always_ff @(posedge clk_i or negedge rst_ni) begin
-        // PoR
-        if (~rst_ni) begin
-            dmcontrol_q    <= '0;
-            havereset_q    <= '1;
-            // this is the only write-able bit during reset
-            cmderr_q       <= dm::CmdErrNone;
-            command_q      <= '0;
-            abstractauto_q <= '0;
-            progbuf_q      <= '0;
-            data_q         <= '0;
-            sbcs_q         <= '0;
-            sbaddr_q       <= '0;
-            sbdata_q       <= '0;
-        end else begin
-            havereset_q    <= havereset_d;
-            // synchronous re-set of debug module, active-low, except for dmactive
-            if (!dmcontrol_q.dmactive) begin
-                dmcontrol_q.haltreq          <= '0;
-                dmcontrol_q.resumereq        <= '0;
-                dmcontrol_q.hartreset        <= '0;
-                dmcontrol_q.zero1            <= '0;
-                dmcontrol_q.hasel            <= '0;
-                dmcontrol_q.hartsello        <= '0;
-                dmcontrol_q.hartselhi        <= '0;
-                dmcontrol_q.zero0            <= '0;
-                dmcontrol_q.setresethaltreq  <= '0;
-                dmcontrol_q.clrresethaltreq  <= '0;
-                dmcontrol_q.ndmreset         <= '0;
-                // this is the only write-able bit during reset
-                dmcontrol_q.dmactive         <= dmcontrol_d.dmactive;
-                cmderr_q                     <= dm::CmdErrNone;
-                command_q                    <= '0;
-                abstractauto_q               <= '0;
-                progbuf_q                    <= '0;
-                data_q                       <= '0;
-                sbcs_q                       <= '0;
-                sbaddr_q                     <= '0;
-                sbdata_q                     <= '0;
-            end else begin
-                dmcontrol_q                  <= dmcontrol_d;
-                cmderr_q                     <= cmderr_d;
-                command_q                    <= command_d;
-                abstractauto_q               <= abstractauto_d;
-                progbuf_q                    <= progbuf_d;
-                data_q                       <= data_d;
-                sbcs_q                       <= sbcs_d;
-                sbaddr_q                     <= sbaddr_d;
-                sbdata_q                     <= sbdata_d;
-            end
-        end
-    end
-
-
-///////////////////////////////////////////////////////
-// assertions
-///////////////////////////////////////////////////////
-
-
-//pragma translate_off
-`ifndef VERILATOR
-    haltsum: assert property (
-        @(posedge clk_i) disable iff (~rst_ni) (dmi_req_ready_o && dmi_req_valid_i && dtm_op == dm::DTM_READ) |->
-            !({1'b0, dmi_req_i.addr} inside {dm::HaltSum0, dm::HaltSum1, dm::HaltSum2, dm::HaltSum3}))
-                else $warning("Haltsums are not implemented yet and always return 0.");
-`endif
-//pragma translate_on
-
-
-endmodule
diff --git a/src/debug/dm_mem.sv b/src/debug/dm_mem.sv
deleted file mode 100644
index 689b5954458802a37f7e4c92e923c25905f07da3..0000000000000000000000000000000000000000
--- a/src/debug/dm_mem.sv
+++ /dev/null
@@ -1,413 +0,0 @@
-/* Copyright 2018 ETH Zurich and University of Bologna.
- * Copyright and related rights are licensed under the Solderpad Hardware
- * License, Version 0.51 (the “License”); you may not use this file except in
- * compliance with the License.  You may obtain a copy of the License at
- * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
- * or agreed to in writing, software, hardware and materials distributed under
- * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations under the License.
- *
- * File:   dm_mem.sv
- * Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
- * Date:   11.7.2018
- *
- * Description: Memory module for execution-based debug clients
- *
- */
-
-module dm_mem #(
-    parameter int NrHarts     = -1
-)(
-    input  logic                             clk_i,       // Clock
-    input  logic                             rst_ni,      // debug module reset
-
-    output logic [NrHarts-1:0]               debug_req_o,
-    input  logic [19:0]                      hartsel_i,
-    // from Ctrl and Status register
-    input  logic [NrHarts-1:0]               haltreq_i,
-    input  logic [NrHarts-1:0]               resumereq_i,
-
-    // state bits
-    output logic [NrHarts-1:0]               halted_o,    // hart acknowledge halt
-    output logic [NrHarts-1:0]               resuming_o,  // hart is resuming
-
-    input  logic [dm::ProgBufSize-1:0][31:0] progbuf_i,    // program buffer to expose
-
-    input  logic [dm::DataCount-1:0][31:0]   data_i,       // data in
-    output logic [dm::DataCount-1:0][31:0]   data_o,       // data out
-    output logic                             data_valid_o, // data out is valid
-    // abstract command interface
-    input  logic                             cmd_valid_i,
-    input  dm::command_t                     cmd_i,
-    output logic                             cmderror_valid_o,
-    output dm::cmderr_t                      cmderror_o,
-    output logic                             cmdbusy_o,
-    // data interface
-
-    // SRAM interface
-    input  logic                             req_i,
-    input  logic                             we_i,
-    input  logic [63:0]                      addr_i,
-    input  logic [63:0]                      wdata_i,
-    input  logic [7:0]                       be_i,
-    output logic [63:0]                      rdata_o
-);
-
-    localparam int HartSelLen = (NrHarts == 1) ? 1 : $clog2(NrHarts);
-    localparam DbgAddressBits  = 12;
-    localparam logic [DbgAddressBits-1:0] DataBase = (dm::DataAddr);
-    localparam logic [DbgAddressBits-1:0] DataEnd = (dm::DataAddr + 4*dm::DataCount);
-    localparam logic [DbgAddressBits-1:0] ProgBufBase = (dm::DataAddr - 4*dm::ProgBufSize);
-    localparam logic [DbgAddressBits-1:0] ProgBufEnd = (dm::DataAddr - 1);
-    localparam logic [DbgAddressBits-1:0] AbstractCmdBase = (ProgBufBase - 4*10);
-    localparam logic [DbgAddressBits-1:0] AbstractCmdEnd = (ProgBufBase - 1);
-    localparam logic [DbgAddressBits-1:0] WhereTo   = 'h300;
-    localparam logic [DbgAddressBits-1:0] FlagsBase = 'h400;
-    localparam logic [DbgAddressBits-1:0] FlagsEnd  = 'h7FF;
-
-
-    localparam logic [DbgAddressBits-1:0] Halted    = 'h100;
-    localparam logic [DbgAddressBits-1:0] Going     = 'h104;
-    localparam logic [DbgAddressBits-1:0] Resuming  = 'h108;
-    localparam logic [DbgAddressBits-1:0] Exception = 'h10C;
-
-    logic [dm::ProgBufSize/2-1:0][63:0]   progbuf;
-    logic [4:0][63:0]   abstract_cmd;
-    logic [NrHarts-1:0] halted_d, halted_q;
-    logic [NrHarts-1:0] resuming_d, resuming_q;
-    logic               resume, go, going;
-
-    logic [HartSelLen-1:0] hart_sel;
-    logic exception, halted;
-    logic unsupported_command;
-
-    logic [63:0] rom_rdata;
-    logic [63:0] rdata_d, rdata_q;
-    // distinguish whether we need to forward data from the ROM or the FSM
-    // latch the address for this
-    logic fwd_rom_d, fwd_rom_q;
-    dm::ac_ar_cmd_t ac_ar;
-
-    // Abstract Command Access Register
-    assign ac_ar       = dm::ac_ar_cmd_t'(cmd_i.control);
-    assign hart_sel    = wdata_i[HartSelLen-1:0];
-    assign debug_req_o = haltreq_i;
-    assign halted_o    = halted_q;
-    assign resuming_o  = resuming_q;
-
-    // reshape progbuf
-    assign progbuf = progbuf_i;
-
-    enum logic [1:0] { Idle, Go, Resume, CmdExecuting } state_d, state_q;
-
-    // hart ctrl queue
-    always_comb begin
-        cmderror_valid_o = 1'b0;
-        cmderror_o       = dm::CmdErrNone;
-        state_d          = state_q;
-        go               = 1'b0;
-        resume           = 1'b0;
-        cmdbusy_o        = 1'b1;
-
-        case (state_q)
-            Idle: begin
-                cmdbusy_o = 1'b0;
-                if (cmd_valid_i && halted_q) begin
-                    // give the go signal
-                    state_d = Go;
-                end else if (cmd_valid_i) begin
-                    // hart must be halted for all requests
-                    cmderror_valid_o = 1'b1;
-                    cmderror_o = dm::CmdErrorHaltResume;
-                end
-                // CSRs want to resume, the request is ignored when the hart is
-                // requested to halt or it didn't clear the resuming_q bit before
-                if (resumereq_i && !resuming_q && !haltreq_i && halted_q) begin
-                    state_d = Resume;
-                end
-            end
-
-            Go: begin
-                // we are already busy here since we scheduled the execution of a program
-                cmdbusy_o = 1'b1;
-                go        = 1'b1;
-                // the thread is now executing the command, track its state
-                if (going)
-                    state_d = CmdExecuting;
-            end
-
-            Resume: begin
-                cmdbusy_o = 1'b1;
-                resume = 1'b1;
-                if (resuming_o)
-                    state_d = Idle;
-            end
-
-            CmdExecuting: begin
-                cmdbusy_o = 1'b1;
-                go        = 1'b0;
-                // wait until the hart has halted again
-                if (halted) begin
-                    state_d = Idle;
-                end
-            end
-        endcase
-
-        if (exception) begin
-            cmderror_valid_o = 1'b1;
-            cmderror_o = dm::CmdErrorException;
-        end
-
-        if (unsupported_command) begin
-            cmderror_valid_o = 1'b1;
-            cmderror_o = dm::CmdErrNotSupported;
-        end
-    end
-
-    // read/write logic
-    always_comb begin
-        automatic logic [63:0] data_bits;
-
-        halted_d     = halted_q;
-        resuming_d   = resuming_q;
-        rdata_o      = fwd_rom_q ? rom_rdata : rdata_q;
-        rdata_d      = rdata_q;
-        // convert the data in bits representation
-        data_bits    = data_i;
-        // write data in csr register
-        data_valid_o = 1'b0;
-        exception    = 1'b0;
-        halted       = 1'b0;
-        going        = 1'b0;
-        // The resume ack signal is lowered when the resume request is deasserted
-        if (resumereq_i == 1'b0) begin
-            resuming_d[hart_sel] = 1'b0;
-        end
-        // we've got a new request
-        if (req_i) begin
-            // this is a write
-            if (we_i) begin
-                unique case (addr_i[DbgAddressBits-1:0]) inside
-                    Halted: begin
-                        halted = 1'b1;
-                        halted_d[hart_sel] = 1'b1;
-                    end
-                    Going: begin
-                        going = 1'b1;
-                    end
-                    Resuming: begin
-                        // clear the halted flag as the hart resumed execution
-                        halted_d[hart_sel] = 1'b0;
-                        // set the resuming flag which needs to be cleared by the debugger
-                        resuming_d[hart_sel] = 1'b1;
-                    end
-                    // an exception occurred during execution
-                    Exception: exception = 1'b1;
-                    // core can write data registers
-                    [(dm::DataAddr):DataEnd]: begin
-                        data_valid_o = 1'b1;
-                        for (int i = 0; i < $bits(be_i); i++) begin
-                            if (be_i[i]) begin
-                                data_bits[i*8+:8] = wdata_i[i*8+:8];
-                            end
-                        end
-                    end
-                endcase
-
-            // this is a read
-            end else begin
-                unique case (addr_i[DbgAddressBits-1:0]) inside
-                    // variable ROM content
-                    WhereTo: begin
-                        // variable jump to abstract cmd, program_buffer or resume
-                        if (resumereq_i) begin
-                            rdata_d = {32'b0, riscv::jal(0, dm::ResumeAddress[11:0]-WhereTo)};
-                        end
-
-                        // there is a command active so jump there
-                        if (cmdbusy_o) begin
-                            // transfer not set is a shortcut to the program buffer
-                            if (!ac_ar.transfer) begin
-                                rdata_d = {32'b0, riscv::jal(0, ProgBufBase-WhereTo)};
-                            // this is a legit abstract cmd -> execute it
-                            end else begin
-                                rdata_d = {32'b0, riscv::jal(0, AbstractCmdBase-WhereTo)};
-                            end
-                        end
-                    end
-
-                    // TODO(zarubaf) change hard-coded values
-                    [DataBase:DataEnd]: begin
-                        rdata_d = {data_i[1], data_i[0]};
-                    end
-
-                    [ProgBufBase:ProgBufEnd]: begin
-                        rdata_d = progbuf[(addr_i[DbgAddressBits-1:3] - ProgBufBase[DbgAddressBits-1:3])];
-                    end
-
-                    // two slots for abstract command
-                    [AbstractCmdBase:AbstractCmdEnd]: begin
-                        // return the correct address index
-                        rdata_d = abstract_cmd[(addr_i[DbgAddressBits-1:3] - AbstractCmdBase[DbgAddressBits-1:3])];
-                    end
-                    // harts are polling for flags here
-                    [FlagsBase:FlagsEnd]: begin
-                        automatic logic [7:0][7:0] rdata;
-                        rdata = '0;
-                        // release the corresponding hart
-                        if (({addr_i[DbgAddressBits-1:3], 3'b0} - FlagsBase[DbgAddressBits-1:0]) == {hartsel_i[DbgAddressBits-1:3], 3'b0}) begin
-                            rdata[hartsel_i[2:0]] = {6'b0, resume, go};
-                        end
-                        rdata_d = rdata;
-                    end
-                    default: ;
-                endcase
-            end
-        end
-
-        data_o = data_bits;
-    end
-
-    always_comb begin : abstract_cmd_rom
-        // this abstract command is currently unsupported
-        unsupported_command = 1'b0;
-        // default memory
-        // if ac_ar.transfer is not set then we can take a shortcut to the program buffer
-        abstract_cmd[0][31:0]  = riscv::illegal();
-        // load debug module base address into a0, this is shared among all commands
-        abstract_cmd[0][63:32] = riscv::auipc(10, 0);
-        abstract_cmd[1][31:0]  = riscv::srli(10, 10, 12); // clear lowest 12bit to get base offset of DM
-        abstract_cmd[1][63:32] = riscv::slli(10, 10, 12);
-        abstract_cmd[2][31:0]  = riscv::nop();
-        abstract_cmd[2][63:32] = riscv::nop();
-        abstract_cmd[3][31:0]  = riscv::nop();
-        abstract_cmd[3][63:32] = riscv::nop();
-        abstract_cmd[4][31:0]  = riscv::csrr(riscv::CSR_DSCRATCH1, 10);
-        abstract_cmd[4][63:32] = riscv::ebreak();
-
-        // this depends on the command being executed
-        unique case (cmd_i.cmdtype)
-            // --------------------
-            // Access Register
-            // --------------------
-            dm::AccessRegister: begin
-                if (ac_ar.aarsize < 4 && ac_ar.transfer && ac_ar.write) begin
-                    // store a0 in dscratch1
-                    abstract_cmd[0][31:0] = riscv::csrw(riscv::CSR_DSCRATCH1, 10);
-                    // this range is reserved
-                    if (ac_ar.regno[15:14] != '0) begin
-                        abstract_cmd[0][31:0] = riscv::illegal();
-                    // A0 access needs to be handled separately, as we use A0 to load the DM address offset
-                    // need to access DSCRATCH1 in this case
-                    end else if (ac_ar.regno[12] && (!ac_ar.regno[5]) && (ac_ar.regno[4:0] == 10)) begin
-                        // store s0 in dscratch
-                        abstract_cmd[2][31:0]  = riscv::csrw(riscv::CSR_DSCRATCH0, 8);
-                        // load from data register
-                        abstract_cmd[2][63:32] = riscv::load(ac_ar.aarsize, 8, 10, dm::DataAddr);
-                        // and store it in the corresponding CSR
-                        abstract_cmd[3][31:0]  = riscv::csrw(riscv::CSR_DSCRATCH1, 8);
-                        // restore s0 again from dscratch
-                        abstract_cmd[3][63:32] = riscv::csrr(riscv::CSR_DSCRATCH0, 8);
-                    // GPR/FPR access
-                    end else if (ac_ar.regno[12]) begin
-                        // determine whether we want to access the floating point register or not
-                        if (ac_ar.regno[5]) begin
-                            abstract_cmd[2][31:0] = riscv::float_load(ac_ar.aarsize, ac_ar.regno[4:0], 10, dm::DataAddr);
-                        end else begin
-                            abstract_cmd[2][31:0] = riscv::load(ac_ar.aarsize, ac_ar.regno[4:0], 10, dm::DataAddr);
-                        end
-                    // CSR access
-                    end else begin
-                        // data register to CSR
-                        // store s0 in dscratch
-                        abstract_cmd[2][31:0]  = riscv::csrw(riscv::CSR_DSCRATCH0, 8);
-                        // load from data register
-                        abstract_cmd[2][63:32] = riscv::load(ac_ar.aarsize, 8, 10, dm::DataAddr);
-                        // and store it in the corresponding CSR
-                        abstract_cmd[3][31:0]  = riscv::csrw(riscv::csr_reg_t'(ac_ar.regno[11:0]), 8);
-                        // restore s0 again from dscratch
-                        abstract_cmd[3][63:32] = riscv::csrr(riscv::CSR_DSCRATCH0, 8);
-                    end
-                end else if (ac_ar.aarsize < 4 && ac_ar.transfer && !ac_ar.write) begin
-                    // store a0 in dscratch1
-                    abstract_cmd[0][31:0]  = riscv::csrw(riscv::CSR_DSCRATCH1, 10);
-                    // this range is reserved
-                    if (ac_ar.regno[15:14] != '0) begin
-                        abstract_cmd[0][31:0] = riscv::illegal();
-                    // A0 access needs to be handled separately, as we use A0 to load the DM address offset
-                    // need to access DSCRATCH1 in this case
-                    end else if (ac_ar.regno[12] && (!ac_ar.regno[5]) && (ac_ar.regno[4:0] == 10)) begin
-                        // store s0 in dscratch
-                        abstract_cmd[2][31:0]  = riscv::csrw(riscv::CSR_DSCRATCH0, 8);
-                        // read value from CSR into s0
-                        abstract_cmd[2][63:32] = riscv::csrr(riscv::CSR_DSCRATCH1, 8);
-                        // and store s0 into data section
-                        abstract_cmd[3][31:0]  = riscv::store(ac_ar.aarsize, 8, 10, dm::DataAddr);
-                        // restore s0 again from dscratch
-                        abstract_cmd[3][63:32] = riscv::csrr(riscv::CSR_DSCRATCH0, 8);
-                    // GPR/FPR access
-                    end else if (ac_ar.regno[12]) begin
-                        // determine whether we want to access the floating point register or not
-                        if (ac_ar.regno[5]) begin
-                            abstract_cmd[2][31:0] = riscv::float_store(ac_ar.aarsize, ac_ar.regno[4:0], 10, dm::DataAddr);
-                        end else begin
-                            abstract_cmd[2][31:0] = riscv::store(ac_ar.aarsize, ac_ar.regno[4:0], 10, dm::DataAddr);
-                        end
-                    // CSR access
-                    end else begin
-                        // CSR register to data
-                        // store s0 in dscratch
-                        abstract_cmd[2][31:0]  = riscv::csrw(riscv::CSR_DSCRATCH0, 8);
-                        // read value from CSR into s0
-                        abstract_cmd[2][63:32] = riscv::csrr(riscv::csr_reg_t'(ac_ar.regno[11:0]), 8);
-                        // and store s0 into data section
-                        abstract_cmd[3][31:0]  = riscv::store(ac_ar.aarsize, 8, 10, dm::DataAddr);
-                        // restore s0 again from dscratch
-                        abstract_cmd[3][63:32] = riscv::csrr(riscv::CSR_DSCRATCH0, 8);
-                    end
-                end
-
-                // check whether we need to execute the program buffer
-                if (ac_ar.postexec) begin
-                    // issue a nop, we will automatically run into the program buffer
-                    abstract_cmd[4][63:32] = riscv::nop();
-                end
-            end
-            // not supported at the moment
-            // dm::QuickAccess:;
-            // dm::AccessMemory:;
-            default: begin
-                unsupported_command = 1'b1;
-            end
-        endcase
-    end
-
-    debug_rom i_debug_rom (
-        .clk_i,
-        .req_i,
-        .addr_i,
-        .rdata_o (rom_rdata)
-    );
-
-    // ROM starts at the HaltAddress of the core e.g.: it immediately jumps to
-    // the ROM base address
-    assign fwd_rom_d = (addr_i[DbgAddressBits-1:0] >= dm::HaltAddress[DbgAddressBits-1:0]) ? 1'b1 : 1'b0;
-
-    always_ff @(posedge clk_i or negedge rst_ni) begin
-        if (~rst_ni) begin
-            fwd_rom_q  <= 1'b0;
-            rdata_q    <= '0;
-            halted_q   <= 1'b0;
-            resuming_q <= 1'b0;
-            state_q    <= Idle;
-        end else begin
-            fwd_rom_q  <= fwd_rom_d;
-            rdata_q    <= rdata_d;
-            halted_q   <= halted_d;
-            resuming_q <= resuming_d;
-            state_q    <= state_d;
-        end
-    end
-
-endmodule
diff --git a/src/debug/dm_pkg.sv b/src/debug/dm_pkg.sv
deleted file mode 100644
index c123e5fd7182abb4aea432a9edf065899ff7bbc1..0000000000000000000000000000000000000000
--- a/src/debug/dm_pkg.sv
+++ /dev/null
@@ -1,217 +0,0 @@
-/* Copyright 2018 ETH Zurich and University of Bologna.
- * Copyright and related rights are licensed under the Solderpad Hardware
- * License, Version 0.51 (the “License”); you may not use this file except in
- * compliance with the License.  You may obtain a copy of the License at
- * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
- * or agreed to in writing, software, hardware and materials distributed under
- * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations under the License.
- *
- * File:   dm_pkg.sv
- * Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
- * Date:   30.6.2018
- *
- * Description: Debug-module package, contains common system definitions.
- *
- */
-
-package dm;
-    localparam logic [3:0] DbgVersion013 = 4'h2;
-    // size of program buffer in junks of 32-bit words
-    localparam logic [4:0] ProgBufSize   = 5'h8;
-
-    // TODO(zarubaf) This is hard-coded to two at the moment
-    // amount of data count registers implemented
-    localparam logic [3:0] DataCount     = 4'h2;
-
-    // address to which a hart should jump when it was requested to halt
-    localparam logic [63:0] HaltAddress = 64'h800;
-    localparam logic [63:0] ResumeAddress = HaltAddress + 4;
-    localparam logic [63:0] ExceptionAddress = HaltAddress + 8;
-
-    // address where data0-15 is shadowed or if shadowed in a CSR
-    // address of the first CSR used for shadowing the data
-    localparam logic [11:0] DataAddr = 12'h380; // we are aligned with Rocket here
-
-    // debug registers
-    typedef enum logic [7:0] {
-        Data0        = 8'h04,
-        Data1        = 8'h05,
-        Data2        = 8'h06,
-        Data3        = 8'h07,
-        Data4        = 8'h08,
-        Data5        = 8'h09,
-        Data6        = 8'h0A,
-        Data7        = 8'h0B,
-        Data8        = 8'h0C,
-        Data9        = 8'h0D,
-        Data10       = 8'h0E,
-        Data11       = 8'h0F,
-        DMControl    = 8'h10,
-        DMStatus     = 8'h11, // r/o
-        Hartinfo     = 8'h12,
-        HaltSum1     = 8'h13,
-        HAWindowSel  = 8'h14,
-        HAWindow     = 8'h15,
-        AbstractCS   = 8'h16,
-        Command      = 8'h17,
-        AbstractAuto = 8'h18,
-        DevTreeAddr0 = 8'h19,
-        DevTreeAddr1 = 8'h1A,
-        DevTreeAddr2 = 8'h1B,
-        DevTreeAddr3 = 8'h1C,
-        NextDM       = 8'h1D,
-        ProgBuf0     = 8'h20,
-        ProgBuf15    = 8'h2F,
-        AuthData     = 8'h30,
-        HaltSum2     = 8'h34,
-        HaltSum3     = 8'h35,
-        SBAddress3   = 8'h37,
-        SBCS         = 8'h38,
-        SBAddress0   = 8'h39,
-        SBAddress1   = 8'h3A,
-        SBAddress2   = 8'h3B,
-        SBData0      = 8'h3C,
-        SBData1      = 8'h3D,
-        SBData2      = 8'h3E,
-        SBData3      = 8'h3F,
-        HaltSum0     = 8'h40
-    } dm_csr_t;
-
-    // debug causes
-    localparam logic [2:0] CauseBreakpoint = 3'h1;
-    localparam logic [2:0] CauseTrigger    = 3'h2;
-    localparam logic [2:0] CauseRequest    = 3'h3;
-    localparam logic [2:0] CauseSingleStep = 3'h4;
-
-    typedef struct packed {
-        logic [31:23] zero1;
-        logic         impebreak;
-        logic [21:20] zero0;
-        logic         allhavereset;
-        logic         anyhavereset;
-        logic         allresumeack;
-        logic         anyresumeack;
-        logic         allnonexistent;
-        logic         anynonexistent;
-        logic         allunavail;
-        logic         anyunavail;
-        logic         allrunning;
-        logic         anyrunning;
-        logic         allhalted;
-        logic         anyhalted;
-        logic         authenticated;
-        logic         authbusy;
-        logic         hasresethaltreq;
-        logic         devtreevalid;
-        logic [3:0]   version;
-    } dmstatus_t;
-
-    typedef struct packed {
-        logic         haltreq;
-        logic         resumereq;
-        logic         hartreset;
-        logic         ackhavereset;
-        logic         zero1;
-        logic         hasel;
-        logic [25:16] hartsello;
-        logic [15:6]  hartselhi;
-        logic [5:4]   zero0;
-        logic         setresethaltreq;
-        logic         clrresethaltreq;
-        logic         ndmreset;
-        logic         dmactive;
-    } dmcontrol_t;
-
-    typedef struct packed {
-        logic [31:24] zero1;
-        logic [23:20] nscratch;
-        logic [19:17] zero0;
-        logic         dataaccess;
-        logic [15:12] datasize;
-        logic [11:0]  dataaddr;
-    } hartinfo_t;
-
-    typedef enum logic [2:0] {  CmdErrNone, CmdErrBusy, CmdErrNotSupported,
-                                CmdErrorException, CmdErrorHaltResume,
-                                CmdErrorBus, CmdErrorOther = 7
-                             } cmderr_t;
-
-    typedef struct packed {
-        logic [31:29] zero3;
-        logic [28:24] progbufsize;
-        logic [23:13] zero2;
-        logic         busy;
-        logic         zero1;
-        cmderr_t      cmderr;
-        logic [7:4]   zero0;
-        logic [3:0]   datacount;
-    } abstractcs_t;
-
-    typedef enum logic [7:0] {
-                                 AccessRegister = 8'h0,
-                                 QuickAccess    = 8'h1,
-                                 AccessMemory   = 8'h2
-                             } cmd_t;
-
-    typedef struct packed {
-        cmd_t        cmdtype;
-        logic [23:0] control;
-    } command_t;
-
-    typedef struct packed {
-        logic [31:16] autoexecprogbuf;
-        logic [15:12] zero0;
-        logic [11:0]  autoexecdata;
-    } abstractauto_t;
-
-    typedef struct packed {
-        logic         zero1;
-        logic [22:20] aarsize;
-        logic         zero0;
-        logic         postexec;
-        logic         transfer;
-        logic         write;
-        logic [15:0]  regno;
-    } ac_ar_cmd_t;
-
-    // DTM
-    typedef enum logic [1:0] {
-        DTM_NOP   = 2'h0,
-        DTM_READ  = 2'h1,
-        DTM_WRITE = 2'h2
-    } dtm_op_t;
-
-    typedef struct packed {
-        logic [31:29] sbversion;
-        logic [28:23] zero0;
-        logic         sbbusyerror;
-        logic         sbbusy;
-        logic         sbreadonaddr;
-        logic [19:17] sbaccess;
-        logic         sbautoincrement;
-        logic         sbreadondata;
-        logic [14:12] sberror;
-        logic [11:5]  sbasize;
-        logic         sbaccess128;
-        logic         sbaccess64;
-        logic         sbaccess32;
-        logic         sbaccess16;
-        logic         sbaccess8;
-    } sbcs_t;
-
-    localparam logic[1:0] DTM_SUCCESS = 2'h0;
-
-    typedef struct packed {
-        logic [6:0]  addr;
-        dtm_op_t     op;
-        logic [31:0] data;
-    } dmi_req_t;
-
-    typedef struct packed  {
-        logic [31:0] data;
-        logic [1:0]  resp;
-    } dmi_resp_t;
-
-endpackage
diff --git a/src/debug/dm_sba.sv b/src/debug/dm_sba.sv
deleted file mode 100644
index 218ae19678fd9ecf5dcebfa3b322bc5833b94355..0000000000000000000000000000000000000000
--- a/src/debug/dm_sba.sv
+++ /dev/null
@@ -1,166 +0,0 @@
-/* Copyright 2018 ETH Zurich and University of Bologna.
- * Copyright and related rights are licensed under the Solderpad Hardware
- * License, Version 0.51 (the “License”); you may not use this file except in
- * compliance with the License.  You may obtain a copy of the License at
- * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
- * or agreed to in writing, software, hardware and materials distributed under
- * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations under the License.
- *
- * File:   dm_sba.sv
- * Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
- * Date:   1.8.2018
- *
- * Description: System Bus Access Module
- *
- */
-
-module dm_sba (
-    input  logic          clk_i,       // Clock
-    input  logic          rst_ni,
-    input  logic          dmactive_i,  // synchronous reset active low
-    // AXI port
-    output ariane_axi::req_t  axi_req_o,
-    input  ariane_axi::resp_t axi_resp_i,
-
-    input  logic [63:0]   sbaddress_i,
-    input  logic          sbaddress_write_valid_i,
-    // control signals in
-    input  logic          sbreadonaddr_i,
-    output logic [63:0]   sbaddress_o,
-    input  logic          sbautoincrement_i,
-    input  logic [2:0]    sbaccess_i,
-    // data in
-    input  logic          sbreadondata_i,
-    input  logic [63:0]   sbdata_i,
-    input  logic          sbdata_read_valid_i,
-    input  logic          sbdata_write_valid_i,
-    // read data out
-    output logic [63:0]   sbdata_o,
-    output logic          sbdata_valid_o,
-    // control signals
-    output logic          sbbusy_o,
-    output logic          sberror_valid_o, // bus error occurred
-    output logic [2:0]    sberror_o // bus error occurred
-);
-
-    enum logic [2:0] { Idle, Read, Write, WaitRead, WaitWrite } state_d, state_q;
-
-    logic [63:0]      address;
-    logic             req;
-    logic             gnt;
-    logic             we;
-    logic [7:0]       be;
-
-    assign sbbusy_o = (state_q != Idle) ? 1'b1 : 1'b0;
-
-    always_comb begin
-        req     = 1'b0;
-        address = sbaddress_i;
-        we      = 1'b0;
-        be      = '0;
-
-        sberror_o       = '0;
-        sberror_valid_o = 1'b0;
-        sbaddress_o     = sbaddress_i;
-
-        state_d = state_q;
-
-        case (state_q)
-            Idle: begin
-                // debugger requested a read
-                if (sbaddress_write_valid_i && sbreadonaddr_i)  state_d = Read;
-                // debugger requested a write
-                if (sbdata_write_valid_i) state_d = Write;
-                // perform another read
-                if (sbdata_read_valid_i && sbreadondata_i) state_d = Read;
-            end
-
-            Read: begin
-                req = 1'b1;
-                if (gnt) state_d = WaitRead;
-            end
-
-            Write: begin
-                req = 1'b1;
-                we  = 1'b1;
-                // generate byte enable mask
-                case (sbaccess_i)
-                    3'b000: be[ sbaddress_i[2:0]] = '1;
-                    3'b001: be[{sbaddress_i[2:1], 1'b0} +: 2] = '1;
-                    3'b010: be[{sbaddress_i[2:2], 2'b0} +: 4] = '1;
-                    3'b011: be = '1;
-                    default:;
-                endcase
-                if (gnt) state_d = WaitWrite;
-            end
-
-            WaitRead: begin
-                if (sbdata_valid_o) begin
-                    state_d = Idle;
-                    // auto-increment address
-                    if (sbautoincrement_i) sbaddress_o = sbaddress_i + (1 << sbaccess_i);
-                end
-            end
-
-            WaitWrite: begin
-                if (sbdata_valid_o) begin
-                    state_d = Idle;
-                    // auto-increment address
-                    if (sbautoincrement_i) sbaddress_o = sbaddress_i + (1 << sbaccess_i);
-                end
-            end
-        endcase
-        // handle error case
-        if (sbaccess_i > 3 && state_q != Idle) begin
-            req             = 1'b0;
-            state_d         = Idle;
-            sberror_valid_o = 1'b1;
-            sberror_o       = 'd3;
-        end
-        // further error handling should go here ...
-    end
-
-    always_ff @(posedge clk_i or negedge rst_ni) begin
-        if (~rst_ni) begin
-            state_q <= Idle;
-        end else begin
-            state_q <= state_d;
-        end
-    end
-
-
-    axi_adapter #(
-        .DATA_WIDTH            ( 64                        )
-    ) i_axi_master (
-        .clk_i                 ( clk_i                     ),
-        .rst_ni                ( rst_ni                    ),
-        .req_i                 ( req                       ),
-        .type_i                ( ariane_axi::SINGLE_REQ    ),
-        .gnt_o                 ( gnt                       ),
-        .gnt_id_o              (                           ),
-        .addr_i                ( address                   ),
-        .we_i                  ( we                        ),
-        .wdata_i               ( sbdata_i                  ),
-        .be_i                  ( be                        ),
-        .size_i                ( sbaccess_i[1:0]           ),
-        .id_i                  ( '0                        ),
-        .valid_o               ( sbdata_valid_o            ),
-        .rdata_o               ( sbdata_o                  ),
-        .id_o                  (                           ),
-        .critical_word_o       (                           ), // not needed here
-        .critical_word_valid_o (                           ), // not needed here
-        .axi_req_o,
-        .axi_resp_i
-    );
-
-
-    //pragma translate_off
-    `ifndef VERILATOR
-        // maybe bump severity to $error if not handled at runtime
-        dm_sba_access_size: assert property(@(posedge  clk_i) disable iff (dmactive_i !== 1'b0) (state_d != Idle) |-> (sbaccess_i < 4)) else $warning ("accesses > 8 byte not supported at the moment");
-    `endif
-    //pragma translate_on
-
-endmodule
diff --git a/src/debug/dm_top.sv b/src/debug/dm_top.sv
deleted file mode 100644
index b8026124f539415735e5511e3eb5f07bb04eed2a..0000000000000000000000000000000000000000
--- a/src/debug/dm_top.sv
+++ /dev/null
@@ -1,230 +0,0 @@
-/* Copyright 2018 ETH Zurich and University of Bologna.
- * Copyright and related rights are licensed under the Solderpad Hardware
- * License, Version 0.51 (the “License”); you may not use this file except in
- * compliance with the License.  You may obtain a copy of the License at
- * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
- * or agreed to in writing, software, hardware and materials distributed under
- * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations under the License.
- *
- * File:   dm_top.sv
- * Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
- * Date:   30.6.2018
- *
- * Description: Top-level of debug module (DM). This is an AXI-Slave.
- *              DTM protocol is equal to SiFives debug protocol to leverage
- *              SW infrastructure re-use. As of version 0.13
- */
-
-module dm_top #(
-    parameter int NrHarts      = -1,
-    parameter int AxiIdWidth   = -1,
-    parameter int AxiAddrWidth = -1,
-    parameter int AxiDataWidth = -1,
-    parameter int AxiUserWidth = -1
-) (
-    input  logic               clk_i,       // clock
-    input  logic               rst_ni,      // asynchronous reset active low, connect PoR here, not the system reset
-    input  logic               testmode_i,
-    output logic               ndmreset_o,  // non-debug module reset
-    output logic               dmactive_o,  // debug module is active
-    output logic [NrHarts-1:0] debug_req_o, // async debug request
-    input  logic [NrHarts-1:0] unavailable_i, // communicate whether the hart is unavailable (e.g.: power down)
-
-    // bus slave, for an execution based technique
-    input  ariane_axi::req_t   axi_s_req_i,
-    output ariane_axi::resp_t  axi_s_resp_o,
-
-    // bus master, for system bus accesses
-    output ariane_axi::req_t   axi_m_req_o,
-    input  ariane_axi::resp_t  axi_m_resp_i,
-
-    // Connection to DTM - compatible to RocketChip Debug Module
-    input  logic               dmi_rst_ni,
-    input  logic               dmi_req_valid_i,
-    output logic               dmi_req_ready_o,
-    input  dm::dmi_req_t       dmi_req_i,
-
-    output logic               dmi_resp_valid_o,
-    input  logic               dmi_resp_ready_i,
-    output dm::dmi_resp_t      dmi_resp_o
-);
-
-    // Debug CSRs
-    dm::hartinfo_t [NrHarts-1:0]      hartinfo;
-    logic [NrHarts-1:0]               halted;
-    // logic [NrHarts-1:0]               running;
-    logic [NrHarts-1:0]               resumeack;
-    logic [NrHarts-1:0]               haltreq;
-    logic [NrHarts-1:0]               resumereq;
-    logic                             cmd_valid;
-    dm::command_t                     cmd;
-
-    logic                             req;
-    logic                             we;
-    logic [63:0]                      addr;
-    logic [7:0]                       be;
-    logic [63:0]                      wdata;
-    logic [63:0]                      rdata;
-
-    logic                             cmderror_valid;
-    dm::cmderr_t                      cmderror;
-    logic                             cmdbusy;
-    logic [dm::ProgBufSize-1:0][31:0] progbuf;
-    logic [dm::DataCount-1:0][31:0]   data_csrs_mem;
-    logic [dm::DataCount-1:0][31:0]   data_mem_csrs;
-    logic                             data_valid;
-    logic [19:0]                      hartsel;
-    // System Bus Access Module
-    logic [63:0]                      sbaddress_csrs_sba;
-    logic [63:0]                      sbaddress_sba_csrs;
-    logic                             sbaddress_write_valid;
-    logic                             sbreadonaddr;
-    logic                             sbautoincrement;
-    logic [2:0]                       sbaccess;
-    logic                             sbreadondata;
-    logic [63:0]                      sbdata_write;
-    logic                             sbdata_read_valid;
-    logic                             sbdata_write_valid;
-    logic [63:0]                      sbdata_read;
-    logic                             sbdata_valid;
-    logic                             sbbusy;
-    logic                             sberror_valid;
-    logic [2:0]                       sberror;
-
-    // Debug Ctrl for each hart -> I haven't found a better way to
-    // parameterize this
-    for (genvar i = 0; i < NrHarts; i++) begin : dm_hart_ctrl
-        assign hartinfo[i] = ariane_pkg::DebugHartInfo;
-    end
-
-    dm_csrs #(
-        .NrHarts(NrHarts)
-    ) i_dm_csrs (
-        .clk_i                   ( clk_i                 ),
-        .rst_ni                  ( rst_ni                ),
-        .testmode_i              ( testmode_i            ),
-        .dmi_rst_ni,
-        .dmi_req_valid_i,
-        .dmi_req_ready_o,
-        .dmi_req_i,
-        .dmi_resp_valid_o,
-        .dmi_resp_ready_i,
-        .dmi_resp_o,
-        .ndmreset_o              ( ndmreset_o            ),
-        .dmactive_o              ( dmactive_o            ),
-        .hartsel_o               ( hartsel               ),
-        .hartinfo_i              ( hartinfo              ),
-        .halted_i                ( halted                ),
-        .unavailable_i,
-        .resumeack_i             ( resumeack             ),
-        .haltreq_o               ( haltreq               ),
-        .resumereq_o             ( resumereq             ),
-        .cmd_valid_o             ( cmd_valid             ),
-        .cmd_o                   ( cmd                   ),
-        .cmderror_valid_i        ( cmderror_valid        ),
-        .cmderror_i              ( cmderror              ),
-        .cmdbusy_i               ( cmdbusy               ),
-        .progbuf_o               ( progbuf               ),
-        .data_i                  ( data_mem_csrs         ),
-        .data_valid_i            ( data_valid            ),
-        .data_o                  ( data_csrs_mem         ),
-        .sbaddress_o             ( sbaddress_csrs_sba    ),
-        .sbaddress_i             ( sbaddress_sba_csrs    ),
-        .sbaddress_write_valid_o ( sbaddress_write_valid ),
-        .sbreadonaddr_o          ( sbreadonaddr          ),
-        .sbautoincrement_o       ( sbautoincrement       ),
-        .sbaccess_o              ( sbaccess              ),
-        .sbreadondata_o          ( sbreadondata          ),
-        .sbdata_o                ( sbdata_write          ),
-        .sbdata_read_valid_o     ( sbdata_read_valid     ),
-        .sbdata_write_valid_o    ( sbdata_write_valid    ),
-        .sbdata_i                ( sbdata_read           ),
-        .sbdata_valid_i          ( sbdata_valid          ),
-        .sbbusy_i                ( sbbusy                ),
-        .sberror_valid_i         ( sberror_valid         ),
-        .sberror_i               ( sberror               )
-    );
-
-    dm_sba i_dm_sba (
-        .clk_i                   ( clk_i                 ),
-        .rst_ni                  ( rst_ni                ),
-        .dmactive_i              ( dmactive_o            ),
-        .axi_req_o               ( axi_m_req_o           ),
-        .axi_resp_i              ( axi_m_resp_i          ),
-        .sbaddress_i             ( sbaddress_csrs_sba    ),
-        .sbaddress_o             ( sbaddress_sba_csrs    ),
-        .sbaddress_write_valid_i ( sbaddress_write_valid ),
-        .sbreadonaddr_i          ( sbreadonaddr          ),
-        .sbautoincrement_i       ( sbautoincrement       ),
-        .sbaccess_i              ( sbaccess              ),
-        .sbreadondata_i          ( sbreadondata          ),
-        .sbdata_i                ( sbdata_write          ),
-        .sbdata_read_valid_i     ( sbdata_read_valid     ),
-        .sbdata_write_valid_i    ( sbdata_write_valid    ),
-        .sbdata_o                ( sbdata_read           ),
-        .sbdata_valid_o          ( sbdata_valid          ),
-        .sbbusy_o                ( sbbusy                ),
-        .sberror_valid_o         ( sberror_valid         ),
-        .sberror_o               ( sberror               )
-    );
-
-    dm_mem #(
-        .NrHarts (NrHarts)
-    ) i_dm_mem (
-        .clk_i                   ( clk_i                 ),
-        .rst_ni                  ( rst_ni                ),
-        .debug_req_o             ( debug_req_o           ),
-        .hartsel_i               ( hartsel               ),
-        .haltreq_i               ( haltreq               ),
-        .resumereq_i             ( resumereq             ),
-        .halted_o                ( halted                ),
-        .resuming_o              ( resumeack             ),
-        .cmd_valid_i             ( cmd_valid             ),
-        .cmd_i                   ( cmd                   ),
-        .cmderror_valid_o        ( cmderror_valid        ),
-        .cmderror_o              ( cmderror              ),
-        .cmdbusy_o               ( cmdbusy               ),
-        .progbuf_i               ( progbuf               ),
-        .data_i                  ( data_csrs_mem         ),
-        .data_o                  ( data_mem_csrs         ),
-        .data_valid_o            ( data_valid            ),
-        .req_i                   ( req                   ),
-        .we_i                    ( we                    ),
-        .addr_i                  ( addr                  ),
-        .wdata_i                 ( wdata                 ),
-        .be_i                    ( be                    ),
-        .rdata_o                 ( rdata                 )
-    );
-
-    AXI_BUS #(
-        .AXI_ID_WIDTH   ( AxiIdWidth   ),
-        .AXI_ADDR_WIDTH ( AxiAddrWidth ),
-        .AXI_DATA_WIDTH ( AxiDataWidth ),
-        .AXI_USER_WIDTH ( AxiUserWidth )
-    ) slave();
-
-    axi_slave_connect_rev i_axi_slave_connect_rev (
-      .axi_req_i (axi_s_req_i),
-      .axi_resp_o(axi_s_resp_o),
-      .slave(slave));
-
-    axi2mem #(
-        .AXI_ID_WIDTH   ( AxiIdWidth   ),
-        .AXI_ADDR_WIDTH ( AxiAddrWidth ),
-        .AXI_DATA_WIDTH ( AxiDataWidth ),
-        .AXI_USER_WIDTH ( AxiUserWidth )
-    ) i_axi2mem (
-        .clk_i      ( clk_i    ),
-        .rst_ni     ( rst_ni   ),
-        .slave      ( slave    ),
-        .req_o      ( req      ),
-        .we_o       ( we       ),
-        .addr_o     ( addr     ),
-        .be_o       ( be       ),
-        .data_o     ( wdata    ),
-        .data_i     ( rdata    )
-    );
-
-endmodule
diff --git a/src/debug/dmi_cdc.sv b/src/debug/dmi_cdc.sv
deleted file mode 100644
index 98b15a8604bd5f14e881fa8e09be46007b0a3ba9..0000000000000000000000000000000000000000
--- a/src/debug/dmi_cdc.sv
+++ /dev/null
@@ -1,72 +0,0 @@
-/* Copyright 2018 ETH Zurich and University of Bologna.
- * Copyright and related rights are licensed under the Solderpad Hardware
- * License, Version 0.51 (the “License”); you may not use this file except in
- * compliance with the License.  You may obtain a copy of the License at
- * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
- * or agreed to in writing, software, hardware and materials distributed under
- * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations under the License.
- *
- * File:   axi_riscv_debug_module.sv
- * Author: Andreas Traber <atraber@iis.ee.ethz.ch>
- * Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
- *
- * Description: Clock domain crossings for JTAG to DMI very heavily based
- *              on previous work by Andreas Traber for the PULP project.
- *              This is mainly a wrapper around the existing CDCs.
- */
-module dmi_cdc (
-    // JTAG side (master side)
-    input  logic             tck_i,
-    input  logic             trst_ni,
-
-    input  dm::dmi_req_t     jtag_dmi_req_i,
-    output logic             jtag_dmi_ready_o,
-    input  logic             jtag_dmi_valid_i,
-
-    output dm::dmi_resp_t    jtag_dmi_resp_o,
-    output logic             jtag_dmi_valid_o,
-    input  logic             jtag_dmi_ready_i,
-
-    // core side (slave side)
-    input  logic             clk_i,
-    input  logic             rst_ni,
-
-    output dm::dmi_req_t     core_dmi_req_o,
-    output logic             core_dmi_valid_o,
-    input  logic             core_dmi_ready_i,
-
-    input dm::dmi_resp_t     core_dmi_resp_i,
-    output logic             core_dmi_ready_o,
-    input  logic             core_dmi_valid_i
-  );
-
-  cdc_2phase #(.T(dm::dmi_req_t)) i_cdc_req (
-    .src_rst_ni  ( trst_ni          ),
-    .src_clk_i   ( tck_i            ),
-    .src_data_i  ( jtag_dmi_req_i   ),
-    .src_valid_i ( jtag_dmi_valid_i ),
-    .src_ready_o ( jtag_dmi_ready_o ),
-
-    .dst_rst_ni  ( rst_ni           ),
-    .dst_clk_i   ( clk_i            ),
-    .dst_data_o  ( core_dmi_req_o   ),
-    .dst_valid_o ( core_dmi_valid_o ),
-    .dst_ready_i ( core_dmi_ready_i )
-  );
-
-  cdc_2phase #(.T(dm::dmi_resp_t)) i_cdc_resp (
-    .src_rst_ni  ( rst_ni           ),
-    .src_clk_i   ( clk_i            ),
-    .src_data_i  ( core_dmi_resp_i  ),
-    .src_valid_i ( core_dmi_valid_i ),
-    .src_ready_o ( core_dmi_ready_o ),
-
-    .dst_rst_ni  ( trst_ni          ),
-    .dst_clk_i   ( tck_i            ),
-    .dst_data_o  ( jtag_dmi_resp_o  ),
-    .dst_valid_o ( jtag_dmi_valid_o ),
-    .dst_ready_i ( jtag_dmi_ready_i )
-  );
-endmodule
diff --git a/src/debug/dmi_jtag.sv b/src/debug/dmi_jtag.sv
deleted file mode 100644
index 49df7ce5b0429a5357507d6b51e48e26bd66603f..0000000000000000000000000000000000000000
--- a/src/debug/dmi_jtag.sv
+++ /dev/null
@@ -1,258 +0,0 @@
-/* Copyright 2018 ETH Zurich and University of Bologna.
- * Copyright and related rights are licensed under the Solderpad Hardware
- * License, Version 0.51 (the “License”); you may not use this file except in
- * compliance with the License.  You may obtain a copy of the License at
- * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
- * or agreed to in writing, software, hardware and materials distributed under
- * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations under the License.
- *
- * File:   axi_riscv_debug_module.sv
- * Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
- * Date:   19.7.2018
- *
- * Description: JTAG DMI (debug module interface)
- *
- */
-
-module dmi_jtag (
-    input  logic         clk_i,      // DMI Clock
-    input  logic         rst_ni,     // Asynchronous reset active low
-    input  logic         testmode_i,
-
-    output logic         dmi_rst_no, // hard reset
-    output dm::dmi_req_t dmi_req_o,
-    output logic         dmi_req_valid_o,
-    input  logic         dmi_req_ready_i,
-
-    input dm::dmi_resp_t dmi_resp_i,
-    output logic         dmi_resp_ready_o,
-    input  logic         dmi_resp_valid_i,
-
-    input  logic         tck_i,    // JTAG test clock pad
-    input  logic         tms_i,    // JTAG test mode select pad
-    input  logic         trst_ni,  // JTAG test reset pad
-    input  logic         td_i,     // JTAG test data input pad
-    output logic         td_o,     // JTAG test data output pad
-    output logic         tdo_oe_o  // Data out output enable
-);
-    assign       dmi_rst_no = rst_ni;
-
-    logic        test_logic_reset;
-    logic        shift_dr;
-    logic        update_dr;
-    logic        capture_dr;
-    logic        dmi_access;
-    logic        dtmcs_select;
-    logic        dmi_reset;
-    logic        dmi_tdi;
-    logic        dmi_tdo;
-
-    dm::dmi_req_t  dmi_req;
-    logic          dmi_req_ready;
-    logic          dmi_req_valid;
-
-    dm::dmi_resp_t dmi_resp;
-    logic          dmi_resp_valid;
-    logic          dmi_resp_ready;
-
-    typedef struct packed {
-        logic [6:0]  address;
-        logic [31:0] data;
-        logic [1:0]  op;
-    } dmi_t;
-
-    typedef enum logic [1:0] {
-                                DMINoError = 2'h0, DMIReservedError = 2'h1,
-                                DMIOPFailed = 2'h2, DMIBusy = 2'h3
-                             } dmi_error_t;
-
-    enum logic [2:0] { Idle, Read, WaitReadValid, Write, WaitWriteValid } state_d, state_q;
-
-    logic [$bits(dmi_t)-1:0] dr_d, dr_q;
-    logic [6:0] address_d, address_q;
-    logic [31:0] data_d, data_q;
-
-    dmi_t  dmi;
-    assign dmi          = dmi_t'(dr_q);
-    assign dmi_req.addr = address_q;
-    assign dmi_req.data = data_q;
-    assign dmi_req.op   = (state_q == Write) ? dm::DTM_WRITE : dm::DTM_READ;
-    // we'will always be ready to accept the data we requested
-    assign dmi_resp_ready = 1'b1;
-
-    logic error_dmi_busy;
-    dmi_error_t error_d, error_q;
-
-    always_comb begin
-        error_dmi_busy = 1'b0;
-        // default assignments
-        state_d   = state_q;
-        address_d = address_q;
-        data_d    = data_q;
-        error_d   = error_q;
-
-        dmi_req_valid = 1'b0;
-
-        case (state_q)
-            Idle: begin
-                // make sure that no error is sticky
-                if (dmi_access && update_dr && (error_q == DMINoError)) begin
-                    // save address and value
-                    address_d = dmi.address;
-                    data_d = dmi.data;
-                    if (dm::dtm_op_t'(dmi.op) == dm::DTM_READ) begin
-                        state_d = Read;
-                    end else if (dm::dtm_op_t'(dmi.op) == dm::DTM_WRITE) begin
-                        state_d = Write;
-                    end
-                    // else this is a nop and we can stay here
-                end
-            end
-
-            Read: begin
-                dmi_req_valid = 1'b1;
-                if (dmi_req_ready) begin
-                    state_d = WaitReadValid;
-                end
-            end
-
-            WaitReadValid: begin
-                // load data into register and shift out
-                if (dmi_resp_valid) begin
-                    data_d = dmi_resp.data;
-                    state_d = Idle;
-                end
-            end
-
-            Write: begin
-                dmi_req_valid = 1'b1;
-                // got a valid answer go back to idle
-                if (dmi_req_ready) begin
-                    state_d = Idle;
-                end
-            end
-
-            WaitWriteValid: begin
-                // just wait for idle here
-                if (dmi_resp_valid) begin
-                    state_d = Idle;
-                end
-            end
-        endcase
-
-        // update_dr means we got another request but we didn't finish
-        // the one in progress, this state is sticky
-        if (update_dr && state_q != Idle) begin
-            error_dmi_busy = 1'b1;
-        end
-
-        // if capture_dr goes high while we are in the read state
-        // or in the corresponding wait state we are not giving back a valid word
-        // -> throw an error
-        if (capture_dr && state_q inside {Read, WaitReadValid}) begin
-            error_dmi_busy = 1'b1;
-        end
-
-        if (error_dmi_busy) begin
-            error_d = DMIBusy;
-        end
-        // clear sticky error flag
-        if (dmi_reset && dtmcs_select) begin
-            error_d = DMINoError;
-        end
-    end
-
-    // shift register
-    assign dmi_tdo = dr_q[0];
-
-    always_comb begin
-        dr_d    = dr_q;
-
-        if (capture_dr) begin
-            if (dmi_access) begin
-                if (error_q == DMINoError && !error_dmi_busy) begin
-                    dr_d = {address_q, data_q, DMINoError};
-                // DMI was busy, report an error
-                end else if (error_q == DMIBusy || error_dmi_busy) begin
-                    dr_d = {address_q, data_q, DMIBusy};
-                end
-            end
-        end
-
-        if (shift_dr) begin
-            if (dmi_access) dr_d = {dmi_tdi, dr_q[$bits(dr_q)-1:1]};
-        end
-
-        if (test_logic_reset) begin
-            dr_d = '0;
-        end
-    end
-
-    always_ff @(posedge tck_i or negedge trst_ni) begin
-        if (~trst_ni) begin
-            dr_q      <= '0;
-            state_q   <= Idle;
-            address_q <= '0;
-            data_q    <= '0;
-            error_q   <= DMINoError;
-        end else begin
-            dr_q      <= dr_d;
-            state_q   <= state_d;
-            address_q <= address_d;
-            data_q    <= data_d;
-            error_q   <= error_d;
-        end
-    end
-
-    // ---------
-    // TAP
-    // ---------
-    dmi_jtag_tap #(
-        .IrLength (5)
-    ) i_dmi_jtag_tap (
-        .tck_i,
-        .tms_i,
-        .trst_ni,
-        .td_i,
-        .td_o,
-        .tdo_oe_o,
-        .testmode_i         ( testmode_i       ),
-        .test_logic_reset_o ( test_logic_reset ),
-        .shift_dr_o         ( shift_dr         ),
-        .update_dr_o        ( update_dr        ),
-        .capture_dr_o       ( capture_dr       ),
-        .dmi_access_o       ( dmi_access       ),
-        .dtmcs_select_o     ( dtmcs_select     ),
-        .dmi_reset_o        ( dmi_reset        ),
-        .dmi_error_i        ( error_q          ),
-        .dmi_tdi_o          ( dmi_tdi          ),
-        .dmi_tdo_i          ( dmi_tdo          )
-    );
-
-    // ---------
-    // CDC
-    // ---------
-    dmi_cdc i_dmi_cdc (
-        // JTAG side (master side)
-        .tck_i,
-        .trst_ni,
-        .jtag_dmi_req_i    ( dmi_req          ),
-        .jtag_dmi_ready_o  ( dmi_req_ready    ),
-        .jtag_dmi_valid_i  ( dmi_req_valid    ),
-        .jtag_dmi_resp_o   ( dmi_resp         ),
-        .jtag_dmi_valid_o  ( dmi_resp_valid   ),
-        .jtag_dmi_ready_i  ( dmi_resp_ready   ),
-        // core side
-        .clk_i,
-        .rst_ni,
-        .core_dmi_req_o    ( dmi_req_o        ),
-        .core_dmi_valid_o  ( dmi_req_valid_o  ),
-        .core_dmi_ready_i  ( dmi_req_ready_i  ),
-        .core_dmi_resp_i   ( dmi_resp_i       ),
-        .core_dmi_ready_o  ( dmi_resp_ready_o ),
-        .core_dmi_valid_i  ( dmi_resp_valid_i )
-    );
-
-endmodule
diff --git a/src/debug/dmi_jtag_tap.sv b/src/debug/dmi_jtag_tap.sv
deleted file mode 100644
index ae4b2fcfb59e03c20914da5c9e3913a680c0dece..0000000000000000000000000000000000000000
--- a/src/debug/dmi_jtag_tap.sv
+++ /dev/null
@@ -1,341 +0,0 @@
-/* Copyright 2018 ETH Zurich and University of Bologna.
- * Copyright and related rights are licensed under the Solderpad Hardware
- * License, Version 0.51 (the “License”); you may not use this file except in
- * compliance with the License.  You may obtain a copy of the License at
- * http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
- * or agreed to in writing, software, hardware and materials distributed under
- * this License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR
- * CONDITIONS OF ANY KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations under the License.
- *
- * File:   dmi_jtag_tap.sv
- * Author: Florian Zaruba <zarubaf@iis.ee.ethz.ch>
- * Date:   19.7.2018
- *
- * Description: JTAG TAP for DMI (according to debug spec 0.13)
- *
- */
-
-module dmi_jtag_tap #(
-    parameter int IrLength = 5
-)(
-    input  logic        tck_i,    // JTAG test clock pad
-    input  logic        tms_i,    // JTAG test mode select pad
-    input  logic        trst_ni,  // JTAG test reset pad
-    input  logic        td_i,     // JTAG test data input pad
-    output logic        td_o,     // JTAG test data output pad
-    output logic        tdo_oe_o, // Data out output enable
-    input  logic        testmode_i,
-    output logic        test_logic_reset_o,
-    output logic        shift_dr_o,
-    output logic        update_dr_o,
-    output logic        capture_dr_o,
-
-    // we want to access DMI register
-    output logic        dmi_access_o,
-    // JTAG is interested in writing the DTM CSR register
-    output logic        dtmcs_select_o,
-    // clear error state
-    output logic        dmi_reset_o,
-    input  logic [1:0]  dmi_error_i,
-    // test data to submodule
-    output logic        dmi_tdi_o,
-    // test data in from submodule
-    input  logic        dmi_tdo_i
-
-);
-
-    // to submodule
-    assign dmi_tdi_o = td_i;
-
-    enum logic [3:0] { TestLogicReset, RunTestIdle, SelectDrScan,
-                     CaptureDr, ShiftDr, Exit1Dr, PauseDr, Exit2Dr,
-                     UpdateDr, SelectIrScan, CaptureIr, ShiftIr,
-                     Exit1Ir, PauseIr, Exit2Ir, UpdateIr } tap_state_q, tap_state_d;
-
-    typedef enum logic [IrLength-1:0] {
-        BYPASS0   = 'h0,
-        IDCODE    = 'h1,
-        DTMCSR    = 'h10,
-        DMIACCESS = 'h11,
-        BYPASS1   = 'h1f
-    } ir_reg_t;
-
-    typedef struct packed {
-        logic [31:18] zero1;
-        logic         dmihardreset;
-        logic         dmireset;
-        logic         zero0;
-        logic [14:12] idle;
-        logic [11:10] dmistat;
-        logic [9:4]   abits;
-        logic [3:0]   version;
-    } dtmcs_t;
-
-    // ----------------
-    // IR logic
-    // ----------------
-    logic [IrLength-1:0]  jtag_ir_shift_d, jtag_ir_shift_q; // shift register
-    ir_reg_t              jtag_ir_d, jtag_ir_q; // IR register -> this gets captured from shift register upon update_ir
-    logic capture_ir, shift_ir, pause_ir, update_ir;
-
-    always_comb begin
-        jtag_ir_shift_d = jtag_ir_shift_q;
-        jtag_ir_d       = jtag_ir_q;
-
-        // IR shift register
-        if (shift_ir) begin
-            jtag_ir_shift_d = {td_i, jtag_ir_shift_q[IrLength-1:1]};
-        end
-
-        // capture IR register
-        if (capture_ir) begin
-            jtag_ir_shift_d =  'b0101;
-        end
-
-        // update IR register
-        if (update_ir) begin
-            jtag_ir_d = ir_reg_t'(jtag_ir_shift_q);
-        end
-
-        // synchronous test-logic reset
-        if (test_logic_reset_o) begin
-            jtag_ir_shift_d = '0;
-            jtag_ir_d       = IDCODE;
-        end
-    end
-
-    always_ff @(posedge tck_i, negedge trst_ni) begin
-        if (~trst_ni) begin
-            jtag_ir_shift_q <= '0;
-            jtag_ir_q       <= IDCODE;
-        end else begin
-            jtag_ir_shift_q <= jtag_ir_shift_d;
-            jtag_ir_q       <= jtag_ir_d;
-        end
-    end
-
-    // ----------------
-    // TAP DR Regs
-    // ----------------
-    // - Bypass
-    // - IDCODE
-    // - DTM CS
-    // Define IDCODE Value
-    localparam IDCODE_VALUE = 32'h249511C3;
-    // 0001             version
-    // 0100100101010001 part number (IQ)
-    // 00011100001      manufacturer id (flextronics)
-    // 1                required by standard
-    logic [31:0] idcode_d, idcode_q;
-    logic        idcode_select;
-    logic        bypass_select;
-    dtmcs_t      dtmcs_d, dtmcs_q;
-    logic        bypass_d, bypass_q;  // this is a 1-bit register
-
-    assign dmi_reset_o = dtmcs_q.dmireset;
-
-    always_comb begin
-        idcode_d = idcode_q;
-        bypass_d = bypass_q;
-        dtmcs_d  = dtmcs_q;
-
-        if (capture_dr_o) begin
-            if (idcode_select) idcode_d = IDCODE_VALUE;
-            if (bypass_select) bypass_d = 1'b0;
-            if (dtmcs_select_o) begin
-                dtmcs_d  = '{
-                                zero1        : '0,
-                                dmihardreset : 1'b0,
-                                dmireset     : 1'b0,
-                                zero0        : '0,
-                                idle         : 'd1,         // 1: Enter Run-Test/Idle and leave it immediately
-                                dmistat      : dmi_error_i, // 0: No error, 1: Op failed, 2: too fast
-                                abits        : 'd7, // The size of address in dmi
-                                version      : 'd1  // Version described in spec version 0.13 (and later?)
-                            };
-            end
-        end
-
-        if (shift_dr_o) begin
-            if (idcode_select)  idcode_d = {td_i, idcode_q[31:1]};
-            if (bypass_select)  bypass_d = td_i;
-            if (dtmcs_select_o) dtmcs_d  = {td_i, dtmcs_q[31:1]};
-        end
-
-        if (test_logic_reset_o) begin
-            idcode_d = IDCODE_VALUE;
-            bypass_d = 1'b0;
-        end
-    end
-
-    // ----------------
-    // Data reg select
-    // ----------------
-    always_comb begin
-        dmi_access_o   = 1'b0;
-        dtmcs_select_o = 1'b0;
-        idcode_select  = 1'b0;
-        bypass_select  = 1'b0;
-        case (jtag_ir_q)
-            BYPASS0:   bypass_select  = 1'b1;
-            IDCODE:    idcode_select  = 1'b1;
-            DTMCSR:    dtmcs_select_o = 1'b1;
-            DMIACCESS: dmi_access_o   = 1'b1;
-            BYPASS1:   bypass_select  = 1'b1;
-            default:   bypass_select  = 1'b1;
-        endcase
-    end
-
-    // ----------------
-    // Output select
-    // ----------------
-    logic tdo_mux;
-
-    always_comb begin
-        // we are shifting out the IR register
-        if (shift_ir) begin
-            tdo_mux = jtag_ir_shift_q[0];
-        // here we are shifting the DR register
-        end else begin
-          case (jtag_ir_q)    // synthesis parallel_case
-            IDCODE:         tdo_mux = idcode_q[0];     // Reading ID code
-            DTMCSR:         tdo_mux = dtmcs_q[0];
-            DMIACCESS:      tdo_mux = dmi_tdo_i;       // Read from DMI TDO
-            default:        tdo_mux = bypass_q;      // BYPASS instruction
-          endcase
-        end
-
-    end
-
-    // DFT
-    logic tck_n, tck_ni;
-
-    cluster_clock_inverter i_tck_inv (
-        .clk_i ( tck_i  ),
-        .clk_o ( tck_ni )
-    );
-
-    pulp_clock_mux2 i_dft_tck_mux (
-        .clk0_i    ( tck_ni     ),
-        .clk1_i    ( tck_i      ), // bypass the inverted clock for testing
-        .clk_sel_i ( testmode_i ),
-        .clk_o     ( tck_n      )
-    );
-
-    // TDO changes state at negative edge of TCK
-    always_ff @(posedge tck_n, negedge trst_ni) begin
-        if (~trst_ni) begin
-            td_o     <= 1'b0;
-            tdo_oe_o <= 1'b0;
-        end else begin
-            td_o     <= tdo_mux;
-            tdo_oe_o <= (shift_ir | shift_dr_o);
-        end
-    end
-    // ----------------
-    // TAP FSM
-    // ----------------
-    // Determination of next state; purely combinatorial
-    always_comb begin
-        test_logic_reset_o = 1'b0;
-
-        capture_dr_o       = 1'b0;
-        shift_dr_o         = 1'b0;
-        update_dr_o        = 1'b0;
-
-        capture_ir         = 1'b0;
-        shift_ir           = 1'b0;
-        pause_ir           = 1'b0;
-        update_ir          = 1'b0;
-
-        case (tap_state_q)
-            TestLogicReset: begin
-                tap_state_d = (tms_i) ? TestLogicReset : RunTestIdle;
-            end
-            RunTestIdle: begin
-                tap_state_d = (tms_i) ? SelectDrScan : RunTestIdle;
-            end
-            // DR Path
-            SelectDrScan: begin
-                tap_state_d = (tms_i) ? SelectIrScan : CaptureDr;
-            end
-            CaptureDr: begin
-                capture_dr_o = 1'b1;
-                tap_state_d = (tms_i) ? Exit1Dr : ShiftDr;
-            end
-            ShiftDr: begin
-                shift_dr_o = 1'b1;
-                tap_state_d = (tms_i) ? Exit1Dr : ShiftDr;
-            end
-            Exit1Dr: begin
-                tap_state_d = (tms_i) ? UpdateDr : PauseDr;
-            end
-            PauseDr: begin
-                tap_state_d = (tms_i) ? Exit2Dr : PauseDr;
-            end
-            Exit2Dr: begin
-                tap_state_d = (tms_i) ? UpdateDr : ShiftDr;
-            end
-            UpdateDr: begin
-                update_dr_o = 1'b1;
-                tap_state_d = (tms_i) ? SelectDrScan : RunTestIdle;
-            end
-            // IR Path
-            SelectIrScan: begin
-                tap_state_d = (tms_i) ? TestLogicReset : CaptureIr;
-            end
-            // In this controller state, the shift register bank in the
-            // Instruction Register parallel loads a pattern of fixed values on
-            // the rising edge of TCK. The last two significant bits must always
-            // be "01".
-            CaptureIr: begin
-                capture_ir = 1'b1;
-                tap_state_d = (tms_i) ? Exit1Ir : ShiftIr;
-            end
-            // In this controller state, the instruction register gets connected
-            // between TDI and TDO, and the captured pattern gets shifted on
-            // each rising edge of TCK. The instruction available on the TDI
-            // pin is also shifted in to the instruction register.
-            ShiftIr: begin
-                shift_ir = 1'b1;
-                tap_state_d = (tms_i) ? Exit1Ir : ShiftIr;
-            end
-            Exit1Ir: begin
-                tap_state_d = (tms_i) ? UpdateIr : PauseIr;
-            end
-            PauseIr: begin
-                pause_ir = 1'b1;
-                tap_state_d = (tms_i) ? Exit2Ir : PauseIr;
-            end
-            Exit2Ir: begin
-                tap_state_d = (tms_i) ? UpdateIr : ShiftIr;
-            end
-            // In this controller state, the instruction in the instruction
-            // shift register is latched to the latch bank of the Instruction
-            // Register on every falling edge of TCK. This instruction becomes
-            // the current instruction once it is latched.
-            UpdateIr: begin
-                update_ir = 1'b1;
-                tap_state_d = (tms_i) ? SelectDrScan : RunTestIdle;
-            end
-            default: tap_state_d = TestLogicReset;  // can't actually happen
-      endcase
-    end
-
-    always_ff @(posedge tck_i or negedge trst_ni) begin
-        if (~trst_ni) begin
-            tap_state_q <= RunTestIdle;
-            idcode_q    <= IDCODE_VALUE;
-            bypass_q    <= 1'b0;
-            dtmcs_q     <= '0;
-        end else begin
-            tap_state_q <= tap_state_d;
-            idcode_q    <= idcode_d;
-            bypass_q    <= bypass_d;
-            dtmcs_q     <= dtmcs_d;
-        end
-    end
-
-
-endmodule
diff --git a/src/fpu_div_sqrt_mvp b/src/fpu_div_sqrt_mvp
deleted file mode 160000
index d94bf84ff56fca7e3f8e0a719c8a493ec6c39153..0000000000000000000000000000000000000000
--- a/src/fpu_div_sqrt_mvp
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit d94bf84ff56fca7e3f8e0a719c8a493ec6c39153
diff --git a/src/frontend/frontend.sv b/src/frontend/frontend.sv
index 267bb6dcfcd022aea151fd1f97b0cf7140b84348..8d661cd20b8c16946a51a18419bfbb9e63953b70 100644
--- a/src/frontend/frontend.sv
+++ b/src/frontend/frontend.sv
@@ -431,11 +431,13 @@ module frontend #(
     ras #(
         .DEPTH  ( RAS_DEPTH   )
     ) i_ras (
+        .clk_i,
+        .rst_ni,
+        .flush_i( flush_bp_i  ),
         .push_i ( ras_push    ),
         .pop_i  ( ras_pop     ),
         .data_i ( ras_update  ),
-        .data_o ( ras_predict ),
-        .*
+        .data_o ( ras_predict )
     );
 
     btb #(
diff --git a/src/frontend/ras.sv b/src/frontend/ras.sv
index 3fef9b803e2b82b69b5e32adb4fbf764b956504e..9d3a065616543b80489d4e5e20186bb1db0d4c74 100644
--- a/src/frontend/ras.sv
+++ b/src/frontend/ras.sv
@@ -19,6 +19,7 @@ module ras #(
 )(
     input  logic             clk_i,
     input  logic             rst_ni,
+    input  logic             flush_i,
     input  logic             push_i,
     input  logic             pop_i,
     input  logic [63:0]      data_i,
@@ -46,6 +47,10 @@ module ras #(
             stack_d[DEPTH-1].valid = 1'b0;
             stack_d[DEPTH-1].ra = 'b0;
         end
+
+        if (flush_i) begin
+          stack_d = '0;
+        end  
     end
 
     always_ff @(posedge clk_i or negedge rst_ni) begin
diff --git a/src/plic/plic_claim_complete_tracker.sv b/src/plic/plic_claim_complete_tracker.sv
index 0de0b212ca8b5bab8bb7592f20e165d1e1f18457..f7ab51d1d116000c20bde67c17a8a11e5f07586f 100644
--- a/src/plic/plic_claim_complete_tracker.sv
+++ b/src/plic/plic_claim_complete_tracker.sv
@@ -57,13 +57,9 @@ module plic_claim_complete_tracker #(
     logic                   save_claims_array_q [NUM_GATEWAYS+1][NUM_TARGETS];
     logic                   complete_array      [NUM_GATEWAYS+1][NUM_TARGETS];
 
-    logic [ID_BITWIDTH-1:0] complete_id;
-
-
     // for handling claims
     for (genvar counter = 0; counter < NUM_TARGETS; counter++) begin
         always_ff @(posedge clk_i or negedge rst_ni) begin : proc_target
-            integer id;
 
             if (~rst_ni) begin
                 claimed_gateways_q[counter]         <= '0;
@@ -82,23 +78,18 @@ module plic_claim_complete_tracker #(
 
                 // if a claim is issued, forward it to gateway with highest priority for the claiming target
                 if (target_irq_claims_i[counter]) begin
-                    id                                = identifier_of_largest_priority_per_target[counter];
-                    claim_array[id][counter]         <= 1;
-
+                    claim_array[identifier_of_largest_priority_per_target[counter]][counter]         <= 1;
                     // save claim for later when the complete-notification arrives
-                    save_claims_array_q[id][counter] <= 1;
+                    save_claims_array_q[identifier_of_largest_priority_per_target[counter]][counter] <= 1;
 
                 end else begin
                     // if a complete is issued, check if that gateway has previously been claimed by
                     // this target and forward the
                     // complete message to that gateway. if no claim has previously been issued, the
                     // complete message is ignored
-                    // integer complete_id = target_irq_completes_identifier_i[counter];
-                    complete_id = target_irq_completes_identifier_i[counter];
-
-                    if (target_irq_completes_i[counter] && (save_claims_array_q[complete_id][counter] > 0)) begin
-                        complete_array[complete_id][counter]      <= 1;
-                        save_claims_array_q[complete_id][counter] <= 0;
+                    if (target_irq_completes_i[counter] && (save_claims_array_q[target_irq_completes_identifier_i[counter]][counter] > 0)) begin
+                        complete_array[target_irq_completes_identifier_i[counter]][counter]      <= 1;
+                        save_claims_array_q[target_irq_completes_identifier_i[counter]][counter] <= 0;
                     end
                 end
             end
diff --git a/src/riscv-dbg b/src/riscv-dbg
new file mode 160000
index 0000000000000000000000000000000000000000..90f901ac91761aae8e04c25f4ede953047eddff3
--- /dev/null
+++ b/src/riscv-dbg
@@ -0,0 +1 @@
+Subproject commit 90f901ac91761aae8e04c25f4ede953047eddff3
diff --git a/tb/ariane_testharness.sv b/tb/ariane_testharness.sv
index aadb03557004f004e5fe571134bfc90c7727c14f..b02621591e26850e0495a832a6dedb7406e74422 100644
--- a/tb/ariane_testharness.sv
+++ b/tb/ariane_testharness.sv
@@ -174,17 +174,30 @@ module ariane_testharness #(
         assign dmi_exit = 1'b0;
     end
 
-    ariane_axi::req_t    dm_axi_m_req,  dm_axi_s_req;
-    ariane_axi::resp_t   dm_axi_m_resp, dm_axi_s_resp;
+    ariane_axi::req_t    dm_axi_m_req;
+    ariane_axi::resp_t   dm_axi_m_resp;
+
+    logic                dm_slave_req;
+    logic                dm_slave_we;
+    logic [64-1:0]       dm_slave_addr;
+    logic [64/8-1:0]     dm_slave_be;
+    logic [64-1:0]       dm_slave_wdata;
+    logic [64-1:0]       dm_slave_rdata;
+
+    logic                dm_master_req;
+    logic [64-1:0]       dm_master_add;
+    logic                dm_master_we;
+    logic [64-1:0]       dm_master_wdata;
+    logic [64/8-1:0]     dm_master_be;
+    logic                dm_master_gnt;
+    logic                dm_master_r_valid;
+    logic [64-1:0]       dm_master_r_rdata;
 
     // debug module
     dm_top #(
-        // current implementation only supports 1 hart
-        .NrHarts              ( 1                         ),
-        .AxiIdWidth           ( AXI_ID_WIDTH_SLAVES       ),
-        .AxiAddrWidth         ( AXI_ADDRESS_WIDTH         ),
-        .AxiDataWidth         ( AXI_DATA_WIDTH            ),
-        .AxiUserWidth         ( AXI_USER_WIDTH            )
+        .NrHarts              ( 1                    ),
+        .BusWidth             ( AXI_DATA_WIDTH       ),
+        .Selectable_Harts     ( 1'b1                 )
     ) i_dm_top (
 
         .clk_i                ( clk_i                ),
@@ -194,10 +207,20 @@ module ariane_testharness #(
         .dmactive_o           (                      ), // active debug session
         .debug_req_o          ( debug_req_core       ),
         .unavailable_i        ( '0                   ),
-        .axi_s_req_i          ( dm_axi_s_req         ),
-        .axi_s_resp_o         ( dm_axi_s_resp        ),
-        .axi_m_req_o          ( dm_axi_m_req         ),
-        .axi_m_resp_i         ( dm_axi_m_resp        ),
+        .slave_req_i          ( dm_slave_req         ),
+        .slave_we_i           ( dm_slave_we          ),
+        .slave_addr_i         ( dm_slave_addr        ),
+        .slave_be_i           ( dm_slave_be          ),
+        .slave_wdata_i        ( dm_slave_wdata       ),
+        .slave_rdata_o        ( dm_slave_rdata       ),
+        .master_req_o         ( dm_master_req        ),
+        .master_add_o         ( dm_master_add        ),
+        .master_we_o          ( dm_master_we         ),
+        .master_wdata_o       ( dm_master_wdata      ),
+        .master_be_o          ( dm_master_be         ),
+        .master_gnt_i         ( dm_master_gnt        ),
+        .master_r_valid_i     ( dm_master_r_valid    ),
+        .master_r_rdata_i     ( dm_master_r_rdata    ),
         .dmi_rst_ni           ( rst_ni               ),
         .dmi_req_valid_i      ( debug_req_valid      ),
         .dmi_req_ready_o      ( debug_req_ready      ),
@@ -207,8 +230,52 @@ module ariane_testharness #(
         .dmi_resp_o           ( debug_resp           )
     );
 
-    axi_master_connect i_axi_master_dm (.axi_req_i(dm_axi_m_req), .axi_resp_o(dm_axi_m_resp), .master(slave[1]));
-    axi_slave_connect  i_axi_slave_dm  (.axi_req_o(dm_axi_s_req), .axi_resp_i(dm_axi_s_resp), .slave(master[ariane_soc::Debug]));
+    axi2mem #(
+        .AXI_ID_WIDTH   ( AXI_ID_WIDTH_SLAVES   ),
+        .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH     ),
+        .AXI_DATA_WIDTH ( AXI_DATA_WIDTH        ),
+        .AXI_USER_WIDTH ( AXI_USER_WIDTH        )
+    ) i_dm_axi2mem (
+        .clk_i      ( clk_i                     ),
+        .rst_ni     ( rst_ni                    ),
+        .slave      ( master[ariane_soc::Debug] ),
+        .req_o      ( dm_slave_req              ),
+        .we_o       ( dm_slave_we               ),
+        .addr_o     ( dm_slave_addr             ),
+        .be_o       ( dm_slave_be               ),
+        .data_o     ( dm_slave_wdata            ),
+        .data_i     ( dm_slave_rdata            )
+    );        
+
+    axi_master_connect i_dm_axi_master_connect (
+      .axi_req_i(dm_axi_m_req), 
+      .axi_resp_o(dm_axi_m_resp), 
+      .master(slave[1])
+    );
+
+    axi_adapter #(
+        .DATA_WIDTH            ( AXI_DATA_WIDTH            )
+    ) i_dm_axi_master (
+        .clk_i                 ( clk_i                     ),
+        .rst_ni                ( rst_ni                    ),
+        .req_i                 ( dm_master_req             ),
+        .type_i                ( ariane_axi::SINGLE_REQ    ),
+        .gnt_o                 ( dm_master_gnt             ),
+        .gnt_id_o              (                           ),
+        .addr_i                ( dm_master_add             ),
+        .we_i                  ( dm_master_we              ),
+        .wdata_i               ( dm_master_wdata           ),
+        .be_i                  ( dm_master_be              ),
+        .size_i                ( 2'b11                     ), // always do 64bit here and use byte enables to gate
+        .id_i                  ( '0                        ),
+        .valid_o               ( dm_master_r_valid         ),
+        .rdata_o               ( dm_master_r_rdata         ),
+        .id_o                  (                           ),
+        .critical_word_o       (                           ), 
+        .critical_word_valid_o (                           ), 
+        .axi_req_o             ( dm_axi_m_req              ),
+        .axi_resp_i            ( dm_axi_m_resp             )
+    );
 
 
     // ---------------
@@ -372,7 +439,11 @@ module ariane_testharness #(
         .ipi_o       ( ipi            )
     );
 
-    axi_slave_connect i_axi_slave_connect_clint (.axi_req_o(axi_clint_req), .axi_resp_i(axi_clint_resp), .slave(master[ariane_soc::CLINT]));
+    axi_slave_connect i_axi_slave_connect_clint (
+      .axi_req_o(axi_clint_req), 
+      .axi_resp_i(axi_clint_resp), 
+      .slave(master[ariane_soc::CLINT])
+    );
 
     // ---------------
     // Peripherals
@@ -424,6 +495,7 @@ module ariane_testharness #(
 
     ariane #(
 `ifdef PITON_ARIANE
+        .AxiIdWidth    ( AXI_ID_WIDTH                                    ),
         .SwapEndianess ( 0                                               ),
         .CachedAddrEnd ( (ariane_soc::DRAMBase + ariane_soc::DRAMLength) ),
 `endif
@@ -442,6 +514,10 @@ module ariane_testharness #(
         .axi_resp_i           ( axi_ariane_resp     )
     );
 
-    axi_master_connect i_axi_master_connect_ariane (.axi_req_i(axi_ariane_req), .axi_resp_o(axi_ariane_resp), .master(slave[0]));
+    axi_master_connect i_axi_master_connect_ariane (
+      .axi_req_i(axi_ariane_req), 
+      .axi_resp_o(axi_ariane_resp), 
+      .master(slave[0])
+    );
 
 endmodule