From edca53dec5860703a13bfb492f24061918b49ef0 Mon Sep 17 00:00:00 2001
From: Alban Gruin <alban.gruin@irit.fr>
Date: Tue, 4 Jul 2023 12:21:55 +0200
Subject: [PATCH] fpga: port Ariane to the Ultrascale ZCU104

Signed-off-by: Alban Gruin <alban.gruin@irit.fr>
---
 Makefile                                      |  41 +-
 fpga/Makefile                                 |  33 +-
 fpga/constraints/cva6_fpga.xdc                |   4 +-
 fpga/constraints/zcu104.xdc                   |  38 +
 fpga/scripts/program_cva6_fpga.tcl            |   6 +-
 fpga/scripts/run_cva6_fpga.tcl                |  76 +-
 fpga/src/ariane_soc_pkg.sv                    |  95 +++
 fpga/src/cva6_ultrascale.sv                   | 704 ++++++++++++++++++
 fpga/src/zybo-z7-20.svh                       |   2 +-
 fpga/xilinx/xlnx_clk_gen/tcl/run.tcl          |   4 +-
 .../xlnx_processing_system7/tcl/run.tcl       | 107 +--
 11 files changed, 904 insertions(+), 206 deletions(-)
 create mode 100644 fpga/constraints/zcu104.xdc
 create mode 100644 fpga/src/ariane_soc_pkg.sv
 create mode 100644 fpga/src/cva6_ultrascale.sv

diff --git a/Makefile b/Makefile
index c2dd9ca7..2864d099 100644
--- a/Makefile
+++ b/Makefile
@@ -62,9 +62,9 @@ app_path := $(root-dir)/sw/app
 
 
 # board name for bitstream generation.
-BOARD          := zybo-z7-20
-XILINX_PART    := xc7z020clg400-1
-XILINX_BOARD   := digilentinc.com:zybo-z7-20:part0:1.0
+BOARD          := zcu104
+XILINX_PART    := xczu7ev-ffvc1156-2-e
+XILINX_BOARD   := xilinx.com:zcu104:part0:1.1
 CLK_PERIOD_NS  := 25
 BATCH_MODE ?= 1
 
@@ -194,9 +194,6 @@ src := $(addprefix $(root-dir), $(src))
 uart_src := $(wildcard fpga/src/apb_uart/src/*.vhd)
 uart_src := $(addprefix $(root-dir), $(uart_src))
 
-fpga_src :=  $(wildcard fpga/src/*.sv) $(wildcard fpga/src/bootrom/*.sv) $(wildcard fpga/src/ariane-ethernet/*.sv)
-fpga_src := $(addprefix $(root-dir), $(fpga_src))
-
 # look for testbenches
 tbs := tb/jtag_pkg.sv tb/ariane_tb.sv tb/ariane_testharness.sv
 # RISCV asm tests and benchmark setup (used for CI)
@@ -273,44 +270,20 @@ check-benchmarks:
 benchmark:
 	cd sw/app && make $(APP).mem
 
-fpga_filter := $(addprefix $(root-dir), bootrom/bootrom.sv)
-fpga_filter += $(addprefix $(root-dir), include/instr_tracer_pkg.sv)
-fpga_filter += $(addprefix $(root-dir), src/util/ex_trace_item.sv)
-fpga_filter += $(addprefix $(root-dir), src/util/instr_trace_item.sv)
-fpga_filter += $(addprefix $(root-dir), src/util/instr_tracer_if.sv)
-fpga_filter += $(addprefix $(root-dir), src/util/instr_tracer.sv)
-
 # target rused to run synthesis and place and route in out of context mode
 # make cva6_ooc CLK_PERIOD_NS=<period of the CVA6 architecture>
-cva6_ooc: $(ariane_pkg) $(util) $(src) $(fpga_src)
-	@echo "Generate sources for synthesis"
-	@echo read_verilog -sv {$(ariane_pkg)} >> fpga/scripts/add_sources.tcl
-	@echo read_verilog -sv {$(filter-out $(fpga_filter), $(util))}     >> fpga/scripts/add_sources.tcl
-	@echo read_verilog -sv {$(filter-out $(fpga_filter), $(src))} 	   >> fpga/scripts/add_sources.tcl
-	@echo read_verilog -sv {$(fpga_src)}   >> fpga/scripts/add_sources.tcl
+cva6_ooc:
 	cd fpga && make cva6_ooc BOARD=$(BOARD) XILINX_PART=$(XILINX_PART) XILINX_BOARD=$(XILINX_BOARD) CLK_PERIOD_NS=$(CLK_PERIOD_NS) BATCH_MODE=$(BATCH_MODE)
 
 .PHONY:  cva6_ooc cva6_fpga program_cva6_fpga
 
-cva6_fpga: $(ariane_pkg) $(util) $(src) $(fpga_src) $(uart_src)
-	@echo "[FPGA] Generate sources"
-	@echo read_vhdl        {$(uart_src)}    > fpga/scripts/add_sources.tcl
-	@echo read_verilog -sv {$(ariane_pkg)} >> fpga/scripts/add_sources.tcl
-	@echo read_verilog -sv {$(filter-out $(fpga_filter), $(util))}     >> fpga/scripts/add_sources.tcl
-	@echo read_verilog -sv {$(filter-out $(fpga_filter), $(src))} 	   >> fpga/scripts/add_sources.tcl
-	@echo read_verilog -sv {$(fpga_src)}   >> fpga/scripts/add_sources.tcl
+cva6_fpga:
 	@echo "[FPGA] Generate Bitstream"
 	cd fpga && make cva6_fpga BRAM=1 PS7_DDR=0 BOARD=$(BOARD) XILINX_PART=$(XILINX_PART) XILINX_BOARD=$(XILINX_BOARD) CLK_PERIOD_NS=$(CLK_PERIOD_NS) BATCH_MODE=$(BATCH_MODE)
 
-cva6_fpga_ddr: $(ariane_pkg) $(util) $(src) $(fpga_src) $(uart_src)
-	@echo "[FPGA] Generate sources"
-	@echo read_vhdl        {$(uart_src)}    > fpga/scripts/add_sources.tcl
-	@echo read_verilog -sv {$(ariane_pkg)} >> fpga/scripts/add_sources.tcl
-	@echo read_verilog -sv {$(filter-out $(fpga_filter), $(util))}     >> fpga/scripts/add_sources.tcl
-	@echo read_verilog -sv {$(filter-out $(fpga_filter), $(src))} 	   >> fpga/scripts/add_sources.tcl
-	@echo read_verilog -sv {$(fpga_src)}   >> fpga/scripts/add_sources.tcl
+cva6_fpga_ddr:
 	@echo "[FPGA] Generate Bitstream"
-	cd fpga && make cva6_fpga PS7_DDR=1 BRAM=0 XILINX_PART=$(XILINX_PART) XILINX_BOARD=$(XILINX_BOARD) CLK_PERIOD_NS=$(CLK_PERIOD_NS) BATCH_MODE=$(BATCH_MODE)
+	cd fpga && make cva6_fpga PS7_DDR=1 BRAM=0 BOARD=$(BOARD) XILINX_PART=$(XILINX_PART) XILINX_BOARD=$(XILINX_BOARD) CLK_PERIOD_NS=$(CLK_PERIOD_NS) BATCH_MODE=$(BATCH_MODE)
 
 
 program_cva6_fpga: 
diff --git a/fpga/Makefile b/fpga/Makefile
index ce632410..084da30c 100644
--- a/fpga/Makefile
+++ b/fpga/Makefile
@@ -32,13 +32,15 @@ VIVADO ?= vivado
 work-dir := work-fpga
 bit := $(work-dir)/cva6_fpga.bit
 
+mkfile_path := $(abspath $(lastword $(MAKEFILE_LIST)))
+root-dir := $(dir $(mkfile_path))
+project_dir := $(abspath $(root-dir)/..)
+
 ip-dir := xilinx
-ips := xlnx_blk_mem_gen.xci \
-       xlnx_processing_system7.xci \
-       xlnx_axi_clock_converter.xci \
-       xlnx_axi_dwidth_converter_dm_master.xci \
-       xlnx_axi_dwidth_converter_dm_slave.xci \
-       xlnx_clk_gen.xci             
+ips := $(ip-dir)/xlnx_blk_mem_gen/ip/xlnx_blk_mem_gen.xci \
+       $(ip-dir)/xlnx_clk_gen/ip/xlnx_clk_gen.xci \
+       $(ip-dir)/xlnx_axi_dwidth_converter_dm_master/ip/xlnx_axi_dwidth_converter_dm_master.xci \
+       $(ip-dir)/xlnx_axi_dwidth_converter_dm_slave/ip/xlnx_axi_dwidth_converter_dm_slave.xci
 
 all: $(cva6_ooc)
 
@@ -51,28 +53,20 @@ endif
 
 cva6_fpga: $(ips)
 ifeq ($(BATCH_MODE), 1)
-	mkdir -p $(work-dir)
-	$(VIVADO) -mode batch -source scripts/run_cva6_fpga.tcl
+	@ EXT_IPS="$(ips)" ARIANE_SRC=$(project_dir) FPGA_SRC=$(root-dir)/src $(VIVADO) -mode batch -source scripts/run_cva6_fpga.tcl
 else
-	$(VIVADO) -source scripts/run_cva6_fpga.tcl	
+	$(VIVADO) -source scripts/run_cva6_fpga.tcl
 endif
 
-
-
 program_cva6_fpga:
 	xsct scripts/program_cva6_fpga.tcl	
 
 get_hs2_sn:
 	$(VIVADO) -mode batch -source scripts/get_hs2_sn.tcl
 
-
-
-
-$(ips): %.xci :
-	mkdir -p $(work-dir)
+%.xci:
 	@echo Generating $(@F)
 	@cd $(ip-dir)/$(basename $(@F)) && make clean && make
-	@cp $(ip-dir)/$(basename $(@F))/ip/$(@F) $@
 
 clean:
 	rm -rf 	*.log \
@@ -88,6 +82,7 @@ clean:
 		cva6_ooc.runs \
 		cva6_ooc.hbs \
 		cva6_fpga.cache \
+		cva6_fpga.srcs \
 		cva6_fpga.hw \
 		cva6_fpga.ip_user_files \
 		cva6_fpga.sim \
@@ -98,8 +93,8 @@ clean:
 		reports_cva6_ooc_impl \
 		reports_cva6_fpga_synth \
 		reports_cva6_fpga_impl \
-		$(work-dir)
-
+		$(work-dir) \
+		$(ips)
 
 .PHONY:
 	clean
diff --git a/fpga/constraints/cva6_fpga.xdc b/fpga/constraints/cva6_fpga.xdc
index c9952ae1..0b8b07d8 100644
--- a/fpga/constraints/cva6_fpga.xdc
+++ b/fpga/constraints/cva6_fpga.xdc
@@ -2,7 +2,7 @@
 
 create_clock -period 100.000 -name tck -waveform {0.000 50.000} [get_ports tck]
 set_input_jitter tck 1.000
-set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets tck_IBUF]
+set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets tck]
 
 # minimize routing delay
 set_input_delay -clock tck -clock_fall 5.000 [get_ports tdi]
@@ -18,5 +18,3 @@ set_max_delay -datapath_only -from [get_pins i_dmi_jtag/i_dmi_cdc/i_cdc_req/i_ds
 # set multicycle path on reset, on the FPGA we do not care about the reset anyway
 set_multicycle_path -from [get_pins {i_rstgen_main/i_rstgen_bypass/synch_regs_q_reg[3]/C}] 4
 set_multicycle_path -hold -from [get_pins {i_rstgen_main/i_rstgen_bypass/synch_regs_q_reg[3]/C}] 3
-
-
diff --git a/fpga/constraints/zcu104.xdc b/fpga/constraints/zcu104.xdc
new file mode 100644
index 00000000..16c0e31c
--- /dev/null
+++ b/fpga/constraints/zcu104.xdc
@@ -0,0 +1,38 @@
+# set_property PACKAGE_PIN F23 [get_ports clk_sys_p]
+# set_property IOSTANDARD LVDS [get_ports clk_sys_p]
+
+# set_property PACKAGE_PIN E23 [get_ports clk_sys_n]
+# set_property IOSTANDARD LVDS [get_ports clk_sys_n]
+
+# set_property PACKAGE_PIN AH18 [get_ports clk_sys_p]
+# set_property IOSTANDARD DIFF_SSTL12_DCI [get_ports clk_sys_p]
+
+# set_property PACKAGE_PIN AH17 [get_ports clk_sys_n]
+# set_property IOSTANDARD DIFF_SSTL12_DCI [get_ports clk_sys_n]
+
+set_property -dict {PACKAGE_PIN F23 IOSTANDARD LVDS} [get_ports clk_sys_p]
+set_property -dict {PACKAGE_PIN E23 IOSTANDARD LVDS} [get_ports clk_sys_n]
+
+set_property -dict {PACKAGE_PIN M11 IOSTANDARD LVCMOS33} [get_ports reset]
+
+## To use FTDI FT2232 JTAG
+set_property -dict {PACKAGE_PIN L10 IOSTANDARD LVCMOS33} [get_ports trst_n]
+set_property -dict {PACKAGE_PIN L8 IOSTANDARD LVCMOS33} [get_ports tck]
+set_property -dict {PACKAGE_PIN K9 IOSTANDARD LVCMOS33} [get_ports tdi]
+set_property -dict {PACKAGE_PIN K8 IOSTANDARD LVCMOS33} [get_ports tdo]
+set_property -dict {PACKAGE_PIN J9 IOSTANDARD LVCMOS33} [get_ports tms]
+
+## UART
+set_property -dict {PACKAGE_PIN H8 IOSTANDARD LVCMOS33} [get_ports tx]
+set_property -dict {PACKAGE_PIN G7 IOSTANDARD LVCMOS33} [get_ports rx]
+
+## JTAG
+# minimize routing delay
+
+set_max_delay -to [get_ports tdo] 20.000
+set_max_delay -from [get_ports tms] 20.000
+set_max_delay -from [get_ports tdi] 20.000
+set_max_delay -from [get_ports trst_n] 20.000
+
+# reset signal
+set_false_path -from [get_ports trst_n]
diff --git a/fpga/scripts/program_cva6_fpga.tcl b/fpga/scripts/program_cva6_fpga.tcl
index 0002d810..0ae3947f 100644
--- a/fpga/scripts/program_cva6_fpga.tcl
+++ b/fpga/scripts/program_cva6_fpga.tcl
@@ -5,12 +5,8 @@ targets -set -nocase -filter {name =~"APU*"}
 rst -system
 after 3000
 #targets -set -filter {jtag_cable_name =~ "Digilent Zybo Z7 210351AD67C0A" && level==0 && jtag_device_ctx=="jsn-Zybo Z7-210351AD67C0A-23727093-0"}
-fpga -file cva6_fpga.runs/impl_1/cva6_zybo_z7_20.bit
+fpga -file cva6_fpga.runs/impl_1/cva6_ultrascale.bit
 #targets -set -nocase -filter {name =~"APU*"}
 #loadhw -hw /home/sjacq/Work_dir/USE_CASE/2020/contest_softcore_cva6/migration2github/test/workspace/design_1_wrapper/export/design_1_wrapper/hw/design_1_wrapper.xsa -mem-ranges [list {0x40000000 0xbfffffff}] -regs
 #configparams force-mem-access 1
 targets -set -nocase -filter {name =~"APU*"}
-source scripts/ps7_init.tcl
-ps7_init
-ps7_post_config
-
diff --git a/fpga/scripts/run_cva6_fpga.tcl b/fpga/scripts/run_cva6_fpga.tcl
index 9a52a7e2..39f27001 100644
--- a/fpga/scripts/run_cva6_fpga.tcl
+++ b/fpga/scripts/run_cva6_fpga.tcl
@@ -27,63 +27,53 @@
 # Date        Version  Author       Description
 # 2020-11-06  0.1      S.Jacq       Created
 # =========================================================================== #
+
+
 set project cva6_fpga
+set src_dir $::env(ARIANE_SRC)/src
+set include_dir $::env(ARIANE_SRC)/include
+set fpga_src_dir $::env(FPGA_SRC)
+set ext_ips $::env(EXT_IPS)
+set top_module cva6_ultrascale
 
 create_project $project . -force -part $::env(XILINX_PART)
 set_property board_part $::env(XILINX_BOARD) [current_project]
 
+# Adding all files recursively from src, include and fpga
+add_files -scan_for_includes $src_dir $include_dir $fpga_src_dir
+read_ip $ext_ips
 
+# Removing duplicate files
+remove_files [get_files -regexp [list ".*/fpu/.*/popcount.sv" ".*/fpu/.*/registers.svh" ".*/pmp/.*/riscv.sv"]]
 
-# set number of threads to 8 (maximum, unfortunately)
-set_param general.maxThreads 8
-
-set_msg_config -id {[Synth 8-5858]} -new_severity "info"
-
-set_msg_config -id {[Synth 8-4480]} -limit 1000
-
-add_files -fileset constrs_1 -norecurse constraints/zybo_z7_20.xdc
-
-read_ip xilinx/xlnx_processing_system7/ip/xlnx_processing_system7.xci
-read_ip xilinx/xlnx_blk_mem_gen/ip/xlnx_blk_mem_gen.xci
-read_ip xilinx/xlnx_axi_clock_converter/ip/xlnx_axi_clock_converter.xci
-read_ip xilinx/xlnx_axi_dwidth_converter_dm_slave/ip/xlnx_axi_dwidth_converter_dm_slave.xci
-read_ip xilinx/xlnx_axi_dwidth_converter_dm_master/ip/xlnx_axi_dwidth_converter_dm_master.xci
-
-read_ip xilinx/xlnx_clk_gen/ip/xlnx_clk_gen.xci
+# Defining registers.svh as a global header
+set global_headers [list ".*/registers.svh" ".*/zybo-z7-20.svh"]
+set_property -dict { file_type {Verilog Header} is_global_include 1} -objects [get_files -regexp $global_headers]
 
 set_property include_dirs { "src/axi_sd_bridge/include" "../src/common_cells/include" } [current_fileset]
 
-source scripts/add_sources.tcl
+# Setting top module
+set_property top $top_module [get_filesets sources_1]
 
-set_property top cva6_zybo_z7_20 [current_fileset]
+read_verilog -sv {../src/common_cells/include/common_cells/registers.svh}
+set registers "../src/common_cells/include/common_cells/registers.svh"
 
-read_verilog -sv {src/zybo-z7-20.svh src/zybo-z7-20-ddr.svh ../src/common_cells/include/common_cells/registers.svh}
-#set file "src/zybo-z7-20.svh"
-if { $::env(PS7_DDR) == 1 } {
-   set file "src/zybo-z7-20-ddr.svh"
-} elseif {$::env(BRAM) == 1} {
-   set file "src/zybo-z7-20.svh"
-} else {
-   puts "None of the values is matching"
-}
+# Disable unused files
+reorder_files -auto -disable_unused -fileset [get_filesets sources_1]
 
-set registers "../src/common_cells/include/common_cells/registers.svh"
+# Remove disabled files
+# remove_files [get_files -filter {!IS_ENABLED}]
 
-set file_obj [get_files -of_objects [get_filesets sources_1] [list "*$file" "$registers"]]
-set_property -dict { file_type {Verilog Header} is_global_include 1} -objects $file_obj
+set_msg_config -id {[Synth 8-5858]} -new_severity "info"
+set_msg_config -id {[Synth 8-4480]} -limit 1000
 
-update_compile_order -fileset sources_1
+add_files -fileset constrs_1 -norecurse constraints/cva6_fpga.xdc
+add_files -fileset constrs_1 -norecurse constraints/zcu104.xdc
 
-add_files -fileset constrs_1 -norecurse constraints/$project.xdc
+set_property STEPS.SYNTH_DESIGN.ARGS.RETIMING true [get_runs synth_1]
+update_compile_order -fileset [get_filesets sources_1]
 
-# synth_design -verilog_define PS7_DDR=$::env(PS7_DDR) -verilog_define BRAM=$::env(BRAM) -rtl -name rtl_1
-if { $::env(PS7_DDR) == 1 } {
-   synth_design -verilog_define PS7_DDR=PS7_DDR -rtl -name rtl_1
-} elseif {$::env(BRAM) == 1} {
-   synth_design -verilog_define BRAM=BRAM -rtl -name rtl_1
-} else {
-   puts "None of the values is matching"
-}
+set_property generate_synth_checkpoint 0 [get_files $ext_ips]
 
 set_property STEPS.SYNTH_DESIGN.ARGS.RETIMING true [get_runs synth_1]
 
@@ -94,7 +84,6 @@ open_run synth_1
 exec mkdir -p reports_cva6_fpga_synth/
 exec rm -rf reports_cva6_fpga_synth/*
 
-
 check_timing -verbose                                                   -file reports_cva6_fpga_synth/$project.check_timing.rpt
 report_timing -max_paths 100 -nworst 100 -delay_type max -sort_by slack -file reports_cva6_fpga_synth/$project.timing_WORST_100.rpt
 report_timing -nworst 1 -delay_type max -sort_by group                  -file reports_cva6_fpga_synth/$project.timing.rpt
@@ -106,11 +95,6 @@ report_clock_interaction                                                -file re
 set_property "steps.place_design.args.directive" "RuntimeOptimized" [get_runs impl_1]
 set_property "steps.route_design.args.directive" "RuntimeOptimized" [get_runs impl_1]
 
-##create_clock -period $::env(CLK_PERIOD_NS) -name clk_i   [get_ports clk_i]
-
-#set_property HD.CLK_SRC BUFGCTRL_X1Y2 [get_ports clk_i]
-
-
 launch_runs impl_1
 wait_on_run impl_1
 launch_runs impl_1 -to_step write_bitstream
diff --git a/fpga/src/ariane_soc_pkg.sv b/fpga/src/ariane_soc_pkg.sv
new file mode 100644
index 00000000..5036162c
--- /dev/null
+++ b/fpga/src/ariane_soc_pkg.sv
@@ -0,0 +1,95 @@
+// Copyright 2018 ETH Zurich and University of Bologna.
+// Copyright and related rights are licensed under the Solderpad Hardware
+// License, Version 0.51 (the "License"); you may not use this file except in
+// compliance with the License.  You may obtain a copy of the License at
+// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law
+// or agreed to in writing, software, hardware and materials distributed under
+// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+// CONDITIONS OF ANY KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations under the License.
+//
+// Author: Florian Zaruba, ETH Zurich
+// Description: Contains SoC information as constants
+package ariane_soc;
+  // M-Mode Hart, S-Mode Hart
+  localparam int unsigned NumTargets = 2;
+  // Uart, SPI, Ethernet, reserved
+  localparam int unsigned NumSources = 30;
+  localparam int unsigned MaxPriority = 7;
+
+  localparam NrSlaves = 2; // actually masters, but slaves on the crossbar
+
+  // 4 is recommended by AXI standard, so lets stick to it, do not change
+  localparam IdWidth   = 4;
+  localparam IdWidthSlave = IdWidth + $clog2(NrSlaves);
+
+  typedef enum int unsigned {
+    DRAM     = 0,
+    GPIO     = 1,
+    Ethernet = 2,
+    SPI      = 3,
+    Timer    = 4,
+    UART     = 5,
+    PLIC     = 6,
+    CLINT    = 7,
+    ROM      = 8,
+    Debug    = 9
+  } axi_slaves_t;
+
+  localparam NB_PERIPHERALS = Debug + 1;
+
+
+  localparam logic[63:0] DebugLength    = 64'h1000;
+  localparam logic[63:0] ROMLength      = 64'h10000;
+  localparam logic[63:0] CLINTLength    = 64'hC0000;
+  localparam logic[63:0] PLICLength     = 64'h3FF_FFFF;
+  localparam logic[63:0] UARTLength     = 64'h1000;
+  localparam logic[63:0] TimerLength    = 64'h1000;
+  localparam logic[63:0] SPILength      = 64'h800000;
+  localparam logic[63:0] EthernetLength = 64'h10000;
+  localparam logic[63:0] GPIOLength     = 64'h1000;
+  localparam logic[63:0] DRAMLength     = 64'h40000000; // 1GByte of DDR (split between two chips on Genesys2)
+  localparam logic[63:0] SRAMLength     = 64'h1800000;  // 24 MByte of SRAM
+  // Instantiate AXI protocol checkers
+  localparam bit GenProtocolChecker = 1'b0;
+
+  typedef enum logic [63:0] {
+    DebugBase    = 64'h0000_0000,
+    ROMBase      = 64'h0001_0000,
+    CLINTBase    = 64'h0200_0000,
+    PLICBase     = 64'h0C00_0000,
+    UARTBase     = 64'h1000_0000,
+    TimerBase    = 64'h1800_0000,
+    SPIBase      = 64'h2000_0000,
+    EthernetBase = 64'h3000_0000,
+    GPIOBase     = 64'h4000_0000,
+    DRAMBase     = 64'h8000_0000
+  } soc_bus_start_t;
+
+  localparam NrRegion = 1;
+  localparam logic [NrRegion-1:0][NB_PERIPHERALS-1:0] ValidRule = {{NrRegion * NB_PERIPHERALS}{1'b1}};
+
+  localparam ariane_pkg::ariane_cfg_t ArianeSocCfg = '{
+    RASDepth: 2,
+    BTBEntries: 32,
+    BHTEntries: 128,
+    // idempotent region
+    NrNonIdempotentRules:  1,
+    NonIdempotentAddrBase: {64'b0},
+    NonIdempotentLength:   {DRAMBase},
+    NrExecuteRegionRules:  3,
+    ExecuteRegionAddrBase: {DRAMBase,   ROMBase,   DebugBase},
+    ExecuteRegionLength:   {DRAMLength, ROMLength, DebugLength},
+    // cached region
+    NrCachedRegionRules:    1,
+    CachedRegionAddrBase:  {DRAMBase},
+    CachedRegionLength:    {DRAMLength},
+    //  cache config
+    Axi64BitCompliant:      1'b1,
+    SwapEndianess:          1'b0,
+    // debug
+    DmBaseAddress:          DebugBase,
+    NrPMPEntries:           8
+  };
+
+endpackage
diff --git a/fpga/src/cva6_ultrascale.sv b/fpga/src/cva6_ultrascale.sv
new file mode 100644
index 00000000..9d9c976c
--- /dev/null
+++ b/fpga/src/cva6_ultrascale.sv
@@ -0,0 +1,704 @@
+module cva6_ultrascale (
+  input logic  clk_sys_n,
+  input logic  clk_sys_p,
+  input logic  reset,
+
+  // JTAG
+  input logic  trst_n,
+  input logic  tck,
+  input logic  tms,
+  input logic  tdi,
+  output wire  tdo,
+
+  // UART
+  input logic  rx,
+  output logic tx
+);
+
+// 24 MByte in 8 byte words
+localparam NumWords = (24 * 1024 * 1024) / 8;
+localparam NBSlave = 2; // debug, ariane
+localparam AxiAddrWidth = 64;
+localparam AxiDataWidth = 64;
+localparam AxiIdWidthMaster = 4;
+localparam AxiIdWidthSlaves = AxiIdWidthMaster + $clog2(NBSlave); // 5
+localparam AxiUserWidth = 1;
+
+AXI_BUS #(
+    .AXI_ADDR_WIDTH ( AxiAddrWidth     ),
+    .AXI_DATA_WIDTH ( AxiDataWidth     ),
+    .AXI_ID_WIDTH   ( AxiIdWidthMaster ),
+    .AXI_USER_WIDTH ( AxiUserWidth     )
+) slave[NBSlave-1:0]();
+
+AXI_BUS #(
+    .AXI_ADDR_WIDTH ( AxiAddrWidth     ),
+    .AXI_DATA_WIDTH ( AxiDataWidth     ),
+    .AXI_ID_WIDTH   ( AxiIdWidthSlaves ),
+    .AXI_USER_WIDTH ( AxiUserWidth     )
+) master[ariane_soc::NB_PERIPHERALS-1:0]();
+
+AXI_BUS #(
+    .AXI_ADDR_WIDTH ( 32     ),
+    .AXI_DATA_WIDTH ( 32     ),
+    .AXI_ID_WIDTH   ( AxiIdWidthSlaves ),
+    .AXI_USER_WIDTH ( AxiUserWidth     )
+) master_to_dm[0:0]();
+
+// disable test-enable
+logic test_en;
+logic ndmreset;
+logic ndmreset_n;
+logic debug_req_irq;
+logic time_irq;
+logic ipi;
+
+logic clk, eth_clk, spi_clk_i, phy_tx_clk, sd_clk_sys;
+
+logic rst_n;
+assign rst_n = ~reset;
+
+logic rtc;
+
+// ROM
+logic                    rom_req;
+logic [AxiAddrWidth-1:0] rom_addr;
+logic [AxiDataWidth-1:0] rom_rdata;
+
+// Debug
+logic          debug_req_valid;
+logic          debug_req_ready;
+dm::dmi_req_t  debug_req;
+logic          debug_resp_valid;
+logic          debug_resp_ready;
+dm::dmi_resp_t debug_resp;
+
+logic dmactive;
+
+// IRQ
+logic [1:0] irq;
+assign test_en    = 1'b0;
+
+logic [NBSlave-1:0] pc_asserted;
+
+logic dmi_trst_n;
+
+rstgen i_rstgen_main (
+    .clk_i        ( clk                      ),
+    .rst_ni       ( pll_locked & (~ndmreset) ),
+    .test_mode_i  ( test_en                  ),
+    .rst_no       ( ndmreset_n               ),
+    .init_no      (                          ) // keep open
+);
+
+// ---------------
+// AXI Xbar
+// ---------------
+axi_node_wrap_with_slices #(
+    // three ports from Ariane (instruction, data and bypass)
+    .NB_SLAVE           ( NBSlave                    ),
+    .NB_MASTER          ( ariane_soc::NB_PERIPHERALS ),
+    .NB_REGION          ( ariane_soc::NrRegion       ),
+    .AXI_ADDR_WIDTH     ( AxiAddrWidth               ),
+    .AXI_DATA_WIDTH     ( AxiDataWidth               ),
+    .AXI_USER_WIDTH     ( AxiUserWidth               ),
+    .AXI_ID_WIDTH       ( AxiIdWidthMaster           ),
+    .MASTER_SLICE_DEPTH ( 2                          ),
+    .SLAVE_SLICE_DEPTH  ( 2                          )
+) i_axi_xbar (
+    .clk          ( clk        ),
+    .rst_n        ( ndmreset_n ),
+    .test_en_i    ( test_en    ),
+    .slave        ( slave      ),
+    .master       ( master     ),
+    .start_addr_i ({
+        ariane_soc::DebugBase,
+        ariane_soc::ROMBase,
+        ariane_soc::CLINTBase,
+        ariane_soc::PLICBase,
+        ariane_soc::UARTBase,
+        ariane_soc::TimerBase,
+        ariane_soc::SPIBase,
+        ariane_soc::EthernetBase,
+        ariane_soc::GPIOBase,
+        ariane_soc::DRAMBase
+    }),
+    .end_addr_i   ({
+        ariane_soc::DebugBase    + ariane_soc::DebugLength - 1,
+        ariane_soc::ROMBase      + ariane_soc::ROMLength - 1,
+        ariane_soc::CLINTBase    + ariane_soc::CLINTLength - 1,
+        ariane_soc::PLICBase     + ariane_soc::PLICLength - 1,
+        ariane_soc::UARTBase     + ariane_soc::UARTLength - 1,
+        ariane_soc::TimerBase    + ariane_soc::TimerLength - 1,
+        ariane_soc::SPIBase      + ariane_soc::SPILength - 1,
+        ariane_soc::EthernetBase + ariane_soc::EthernetLength -1,
+        ariane_soc::GPIOBase     + ariane_soc::GPIOLength - 1,
+        ariane_soc::DRAMBase     + ariane_soc::DRAMLength - 1
+    }),
+    .valid_rule_i (ariane_soc::ValidRule)
+);
+
+`ifdef LAUTERBACH_DEBUG_PROBE
+  assign dmi_trst_n = trst_n;
+`else
+  assign dmi_trst_n = 1'b1;
+`endif
+
+// ---------------
+// Debug Module
+// ---------------
+dmi_jtag  #(
+        .IdcodeValue          ( 32'h249511C3    )
+    )i_dmi_jtag (
+    .clk_i                ( clk                  ),
+    .rst_ni               ( rst_n                ),
+    .dmi_rst_no           (                      ), // keep open
+    .testmode_i           ( test_en              ),
+    .dmi_req_valid_o      ( debug_req_valid      ),
+    .dmi_req_ready_i      ( debug_req_ready      ),
+    .dmi_req_o            ( debug_req            ),
+    .dmi_resp_valid_i     ( debug_resp_valid     ),
+    .dmi_resp_ready_o     ( debug_resp_ready     ),
+    .dmi_resp_i           ( debug_resp           ),
+    .tck_i                ( tck    ),
+    .tms_i                ( tms    ),
+    .trst_ni              ( 1'b1   ),
+    .td_i                 ( tdi    ),
+    .td_o                 ( tdo    ),
+    .tdo_oe_o             (        )
+);
+
+ariane_axi::req_t    dm_axi_m_req;
+ariane_axi::resp_t   dm_axi_m_resp;
+
+logic                dm_slave_req;
+logic                dm_slave_we;
+logic [32-1:0]       dm_slave_addr;
+logic [32/8-1:0]     dm_slave_be;
+logic [32-1:0]       dm_slave_wdata;
+logic [32-1:0]       dm_slave_rdata;
+
+logic                dm_master_req;
+logic [32-1:0]       dm_master_add;
+logic                dm_master_we;
+logic [32-1:0]       dm_master_wdata;
+logic [32/8-1:0]     dm_master_be;
+logic                dm_master_gnt;
+logic                dm_master_r_valid;
+logic [32-1:0]       dm_master_r_rdata;
+
+// debug module
+dm_top #(
+    .NrHarts          ( 1                 ),
+    .BusWidth         ( 32      ),
+    .SelectableHarts  ( 1'b1              )
+) i_dm_top (
+    .clk_i            ( clk               ),
+    .rst_ni           ( rst_n             ), // PoR
+    .testmode_i       ( test_en           ),
+    .ndmreset_o       ( ndmreset          ),
+    .dmactive_o       ( dmactive          ), // active debug session
+    .debug_req_o      ( debug_req_irq     ),
+    .unavailable_i    ( '0                ),
+    .hartinfo_i       ( {ariane_pkg::DebugHartInfo} ),
+    .slave_req_i      ( dm_slave_req      ),
+    .slave_we_i       ( dm_slave_we       ),
+    .slave_addr_i     ( dm_slave_addr     ),
+    .slave_be_i       ( dm_slave_be       ),
+    .slave_wdata_i    ( dm_slave_wdata    ),
+    .slave_rdata_o    ( dm_slave_rdata    ),
+    .master_req_o     ( dm_master_req     ),
+    .master_add_o     ( dm_master_add     ),
+    .master_we_o      ( dm_master_we      ),
+    .master_wdata_o   ( dm_master_wdata   ),
+    .master_be_o      ( dm_master_be      ),
+    .master_gnt_i     ( dm_master_gnt     ),
+    .master_r_valid_i ( dm_master_r_valid ),
+    .master_r_rdata_i ( dm_master_r_rdata ),
+    .dmi_rst_ni       ( rst_n             ),
+    .dmi_req_valid_i  ( debug_req_valid   ),
+    .dmi_req_ready_o  ( debug_req_ready   ),
+    .dmi_req_i        ( debug_req         ),
+    .dmi_resp_valid_o ( debug_resp_valid  ),
+    .dmi_resp_ready_i ( debug_resp_ready  ),
+    .dmi_resp_o       ( debug_resp        )
+);
+/********************************************************/
+axi2mem #(
+    .AXI_ID_WIDTH   ( AxiIdWidthSlaves    ),
+    .AXI_ADDR_WIDTH ( 32        ),
+    .AXI_DATA_WIDTH ( 32        ),
+    .AXI_USER_WIDTH ( AxiUserWidth        )
+) i_dm_axi2mem (
+    .clk_i      ( clk                       ),
+    .rst_ni     ( rst_n                     ),
+    .slave      ( master_to_dm[0] ),
+    .req_o      ( dm_slave_req              ),
+    .we_o       ( dm_slave_we               ),
+    .addr_o     ( dm_slave_addr             ),
+    .be_o       ( dm_slave_be               ),
+    .data_o     ( dm_slave_wdata            ),
+    .data_i     ( dm_slave_rdata            )
+);
+
+assign master_to_dm[0].aw_user = '0;
+assign master_to_dm[0].w_user = '0;
+assign master_to_dm[0].ar_user = '0;
+
+assign master_to_dm[0].aw_id = dm_axi_m_req.aw.id;
+assign master_to_dm[0].ar_id = dm_axi_m_req.ar.id;
+
+assign master[ariane_soc::Debug].r_user ='0;
+assign master[ariane_soc::Debug].b_user ='0;
+
+xlnx_axi_dwidth_converter_dm_slave  i_axi_dwidth_converter_dm_slave(
+    .s_axi_aclk(clk),
+    .s_axi_aresetn(ndmreset_n),
+    .s_axi_awid(master[ariane_soc::Debug].aw_id),
+    .s_axi_awaddr(master[ariane_soc::Debug].aw_addr[31:0]),
+    .s_axi_awlen(master[ariane_soc::Debug].aw_len),
+    .s_axi_awsize(master[ariane_soc::Debug].aw_size),
+    .s_axi_awburst(master[ariane_soc::Debug].aw_burst),
+    .s_axi_awlock(master[ariane_soc::Debug].aw_lock),
+    .s_axi_awcache(master[ariane_soc::Debug].aw_cache),
+    .s_axi_awprot(master[ariane_soc::Debug].aw_prot),
+    .s_axi_awregion(master[ariane_soc::Debug].aw_region),
+    .s_axi_awqos(master[ariane_soc::Debug].aw_qos),
+    .s_axi_awvalid(master[ariane_soc::Debug].aw_valid),
+    .s_axi_awready(master[ariane_soc::Debug].aw_ready),
+    .s_axi_wdata(master[ariane_soc::Debug].w_data),
+    .s_axi_wstrb(master[ariane_soc::Debug].w_strb),
+    .s_axi_wlast(master[ariane_soc::Debug].w_last),
+    .s_axi_wvalid(master[ariane_soc::Debug].w_valid),
+    .s_axi_wready(master[ariane_soc::Debug].w_ready),
+    .s_axi_bid(master[ariane_soc::Debug].b_id),
+    .s_axi_bresp(master[ariane_soc::Debug].b_resp),
+    .s_axi_bvalid(master[ariane_soc::Debug].b_valid),
+    .s_axi_bready(master[ariane_soc::Debug].b_ready),
+    .s_axi_arid(master[ariane_soc::Debug].ar_id),
+    .s_axi_araddr(master[ariane_soc::Debug].ar_addr[31:0]),
+    .s_axi_arlen(master[ariane_soc::Debug].ar_len),
+    .s_axi_arsize(master[ariane_soc::Debug].ar_size),
+    .s_axi_arburst(master[ariane_soc::Debug].ar_burst),
+    .s_axi_arlock(master[ariane_soc::Debug].ar_lock),
+    .s_axi_arcache(master[ariane_soc::Debug].ar_cache),
+    .s_axi_arprot(master[ariane_soc::Debug].ar_prot),
+    .s_axi_arregion(master[ariane_soc::Debug].ar_region),
+    .s_axi_arqos(master[ariane_soc::Debug].ar_qos),
+    .s_axi_arvalid(master[ariane_soc::Debug].ar_valid),
+    .s_axi_arready(master[ariane_soc::Debug].ar_ready),
+    .s_axi_rid(master[ariane_soc::Debug].r_id),
+    .s_axi_rdata(master[ariane_soc::Debug].r_data),
+    .s_axi_rresp(master[ariane_soc::Debug].r_resp),
+    .s_axi_rlast(master[ariane_soc::Debug].r_last),
+    .s_axi_rvalid(master[ariane_soc::Debug].r_valid),
+    .s_axi_rready(master[ariane_soc::Debug].r_ready),
+    .m_axi_awaddr(master_to_dm[0].aw_addr),
+    .m_axi_awlen(master_to_dm[0].aw_len),
+    .m_axi_awsize(master_to_dm[0].aw_size),
+    .m_axi_awburst(master_to_dm[0].aw_burst),
+    .m_axi_awlock(master_to_dm[0].aw_lock),
+    .m_axi_awcache(master_to_dm[0].aw_cache),
+    .m_axi_awprot(master_to_dm[0].aw_prot),
+    .m_axi_awregion(master_to_dm[0].aw_region),
+    .m_axi_awqos(master_to_dm[0].aw_qos),
+    .m_axi_awvalid(master_to_dm[0].aw_valid),
+    .m_axi_awready(master_to_dm[0].aw_ready),
+    .m_axi_wdata(master_to_dm[0].w_data ),
+    .m_axi_wstrb(master_to_dm[0].w_strb),
+    .m_axi_wlast(master_to_dm[0].w_last),
+    .m_axi_wvalid(master_to_dm[0].w_valid),
+    .m_axi_wready(master_to_dm[0].w_ready),
+    .m_axi_bresp(master_to_dm[0].b_resp),
+    .m_axi_bvalid(master_to_dm[0].b_valid),
+    .m_axi_bready(master_to_dm[0].b_ready),
+    .m_axi_araddr(master_to_dm[0].ar_addr),
+    .m_axi_arlen(master_to_dm[0].ar_len),
+    .m_axi_arsize(master_to_dm[0].ar_size),
+    .m_axi_arburst(master_to_dm[0].ar_burst),
+    .m_axi_arlock(master_to_dm[0].ar_lock),
+    .m_axi_arcache(master_to_dm[0].ar_cache),
+    .m_axi_arprot(master_to_dm[0].ar_prot),
+    .m_axi_arregion(master_to_dm[0].ar_region),
+    .m_axi_arqos(master_to_dm[0].ar_qos),
+    .m_axi_arvalid(master_to_dm[0].ar_valid),
+    .m_axi_arready(master_to_dm[0].ar_ready),
+    .m_axi_rdata(master_to_dm[0].r_data),
+    .m_axi_rresp(master_to_dm[0].r_resp),
+    .m_axi_rlast(master_to_dm[0].r_last),
+    .m_axi_rvalid(master_to_dm[0].r_valid),
+    .m_axi_rready(master_to_dm[0].r_ready)
+  );
+
+/*****************************************************************/
+logic [31:0] dm_master_m_awaddr;
+logic [31:0] dm_master_m_araddr;
+
+assign slave[1].aw_addr = {32'h0000_0000, dm_master_m_awaddr};
+assign slave[1].ar_addr = {32'h0000_0000, dm_master_m_araddr};
+
+logic [31 : 0] dm_master_s_rdata;
+
+assign dm_axi_m_resp.r.data = {32'h0000_0000, dm_master_s_rdata};
+
+assign slave[1].aw_user = '0;
+assign slave[1].w_user = '0;
+assign slave[1].ar_user = '0;
+
+assign slave[1].aw_id = dm_axi_m_req.aw.id;
+assign slave[1].ar_id = dm_axi_m_req.ar.id;
+assign slave[1].aw_atop = dm_axi_m_req.aw.atop;
+
+xlnx_axi_dwidth_converter_dm_master  i_axi_dwidth_converter_dm_master(
+    .s_axi_aclk(clk),
+    .s_axi_aresetn(ndmreset_n),
+    .s_axi_awid(dm_axi_m_req.aw.id),
+    .s_axi_awaddr(dm_axi_m_req.aw.addr[31:0]),
+    .s_axi_awlen(dm_axi_m_req.aw.len),
+    .s_axi_awsize(dm_axi_m_req.aw.size),
+    .s_axi_awburst(dm_axi_m_req.aw.burst),
+    .s_axi_awlock(dm_axi_m_req.aw.lock),
+    .s_axi_awcache(dm_axi_m_req.aw.cache),
+    .s_axi_awprot(dm_axi_m_req.aw.prot),
+    .s_axi_awregion(dm_axi_m_req.aw.region),
+    .s_axi_awqos(dm_axi_m_req.aw.qos),
+    .s_axi_awvalid(dm_axi_m_req.aw_valid),
+    .s_axi_awready(dm_axi_m_resp.aw_ready),
+    .s_axi_wdata(dm_axi_m_req.w.data[31:0]),
+    .s_axi_wstrb(dm_axi_m_req.w.strb[3:0]),
+    .s_axi_wlast(dm_axi_m_req.w.last),
+    .s_axi_wvalid(dm_axi_m_req.w_valid),
+    .s_axi_wready(dm_axi_m_resp.w_ready),
+    .s_axi_bid(dm_axi_m_resp.b.id),
+    .s_axi_bresp(dm_axi_m_resp.b.resp),
+    .s_axi_bvalid(dm_axi_m_resp.b_valid),
+    .s_axi_bready(dm_axi_m_req.b_ready),
+    .s_axi_arid(dm_axi_m_req.ar.id),
+    .s_axi_araddr(dm_axi_m_req.ar.addr[31:0]),
+    .s_axi_arlen(dm_axi_m_req.ar.len),
+    .s_axi_arsize(dm_axi_m_req.ar.size),
+    .s_axi_arburst(dm_axi_m_req.ar.burst),
+    .s_axi_arlock(dm_axi_m_req.ar.lock),
+    .s_axi_arcache(dm_axi_m_req.ar.cache),
+    .s_axi_arprot(dm_axi_m_req.ar.prot),
+    .s_axi_arregion(dm_axi_m_req.ar.region),
+    .s_axi_arqos(dm_axi_m_req.ar.qos),
+    .s_axi_arvalid(dm_axi_m_req.ar_valid),
+    .s_axi_arready(dm_axi_m_resp.ar_ready),
+    .s_axi_rid(dm_axi_m_resp.r.id),
+    .s_axi_rdata(dm_master_s_rdata),
+    .s_axi_rresp(dm_axi_m_resp.r.resp),
+    .s_axi_rlast(dm_axi_m_resp.r.last),
+    .s_axi_rvalid(dm_axi_m_resp.r_valid),
+    .s_axi_rready(dm_axi_m_req.r_ready),
+    .m_axi_awaddr(dm_master_m_awaddr),
+    .m_axi_awlen(slave[1].aw_len),
+    .m_axi_awsize(slave[1].aw_size),
+    .m_axi_awburst(slave[1].aw_burst),
+    .m_axi_awlock(slave[1].aw_lock),
+    .m_axi_awcache(slave[1].aw_cache),
+    .m_axi_awprot(slave[1].aw_prot),
+    .m_axi_awregion(slave[1].aw_region),
+    .m_axi_awqos(slave[1].aw_qos),
+    .m_axi_awvalid(slave[1].aw_valid),
+    .m_axi_awready(slave[1].aw_ready),
+    .m_axi_wdata(slave[1].w_data ),
+    .m_axi_wstrb(slave[1].w_strb),
+    .m_axi_wlast(slave[1].w_last),
+    .m_axi_wvalid(slave[1].w_valid),
+    .m_axi_wready(slave[1].w_ready),
+    .m_axi_bresp(slave[1].b_resp),
+    .m_axi_bvalid(slave[1].b_valid),
+    .m_axi_bready(slave[1].b_ready),
+    .m_axi_araddr(dm_master_m_araddr),
+    .m_axi_arlen(slave[1].ar_len),
+    .m_axi_arsize(slave[1].ar_size),
+    .m_axi_arburst(slave[1].ar_burst),
+    .m_axi_arlock(slave[1].ar_lock),
+    .m_axi_arcache(slave[1].ar_cache),
+    .m_axi_arprot(slave[1].ar_prot),
+    .m_axi_arregion(slave[1].ar_region),
+    .m_axi_arqos(slave[1].ar_qos),
+    .m_axi_arvalid(slave[1].ar_valid),
+    .m_axi_arready(slave[1].ar_ready),
+    .m_axi_rdata(slave[1].r_data),
+    .m_axi_rresp(slave[1].r_resp),
+    .m_axi_rlast(slave[1].r_last),
+    .m_axi_rvalid(slave[1].r_valid),
+    .m_axi_rready(slave[1].r_ready)
+  );
+
+axi_adapter_32 #(
+    .DATA_WIDTH            ( 32              )
+) i_dm_axi_master (
+    .clk_i                 ( clk                       ),
+    .rst_ni                ( rst_n                     ),
+    .req_i                 ( dm_master_req             ),
+    .type_i                ( ariane_axi::SINGLE_REQ    ),
+    .gnt_o                 ( dm_master_gnt             ),
+    .gnt_id_o              (                           ),
+    .addr_i                (  dm_master_add             ),
+    .we_i                  ( dm_master_we              ),
+    .wdata_i               ( dm_master_wdata           ),
+    .be_i                  ( dm_master_be              ),
+    .size_i                ( 2'b10                     ), // always do 32bit here and use byte enables to gate
+    .id_i                  ( '0                        ),
+    .valid_o               ( dm_master_r_valid         ),
+    .rdata_o               ( dm_master_r_rdata         ),
+    .id_o                  (                           ),
+    .critical_word_o       (                           ),
+    .critical_word_valid_o (                           ),
+    .axi_req_o             ( dm_axi_m_req              ),
+    .axi_resp_i            ( dm_axi_m_resp             )
+);
+
+// ---------------
+// Core
+// ---------------
+ariane_axi::req_t    axi_ariane_req;
+ariane_axi::resp_t   axi_ariane_resp;
+
+ariane #(
+    .ArianeCfg ( ariane_soc::ArianeSocCfg )
+) i_ariane (
+    .clk_i        ( clk                 ),
+    .rst_ni       ( ndmreset_n          ),
+    .boot_addr_i  ( ariane_soc::ROMBase[riscv::XLEN-1:0] ), // start fetching from ROM
+    .hart_id_i    ( '0                  ),
+    .irq_i        ( irq                 ),
+    .ipi_i        ( ipi                 ),
+    .time_irq_i   ( timer_irq           ),
+    .debug_req_i  ( debug_req_irq       ),
+    .axi_req_o    ( axi_ariane_req      ),
+    .axi_resp_i   ( axi_ariane_resp     )
+);
+
+axi_master_connect i_axi_master_connect_ariane (.axi_req_i(axi_ariane_req), .axi_resp_o(axi_ariane_resp), .master(slave[0]));
+
+// ---------------
+// CLINT
+// ---------------
+// divide clock by two
+always_ff @(posedge clk or negedge ndmreset_n) begin
+  if (~ndmreset_n) begin
+    rtc <= 0;
+  end else begin
+    rtc <= rtc ^ 1'b1;
+  end
+end
+
+ariane_axi::req_t    axi_clint_req;
+ariane_axi::resp_t   axi_clint_resp;
+
+clint #(
+    .AXI_ADDR_WIDTH ( AxiAddrWidth     ),
+    .AXI_DATA_WIDTH ( AxiDataWidth     ),
+    .AXI_ID_WIDTH   ( AxiIdWidthSlaves ),
+    .NR_CORES       ( 1                )
+) i_clint (
+    .clk_i       ( clk            ),
+    .rst_ni      ( ndmreset_n     ),
+    .testmode_i  ( test_en        ),
+    .axi_req_i   ( axi_clint_req  ),
+    .axi_resp_o  ( axi_clint_resp ),
+    .rtc_i       ( rtc            ),
+    .timer_irq_o ( timer_irq      ),
+    .ipi_o       ( ipi            )
+);
+
+axi_slave_connect i_axi_slave_connect_clint (.axi_req_o(axi_clint_req), .axi_resp_i(axi_clint_resp), .slave(master[ariane_soc::CLINT]));
+
+// ---------------
+// ROM
+// ---------------
+axi2mem #(
+    .AXI_ID_WIDTH   ( AxiIdWidthSlaves ),
+    .AXI_ADDR_WIDTH ( AxiAddrWidth     ),
+    .AXI_DATA_WIDTH ( AxiDataWidth     ),
+    .AXI_USER_WIDTH ( AxiUserWidth     )
+) i_axi2rom (
+    .clk_i  ( clk                     ),
+    .rst_ni ( ndmreset_n              ),
+    .slave  ( master[ariane_soc::ROM] ),
+    .req_o  ( rom_req                 ),
+    .we_o   (                         ),
+    .addr_o ( rom_addr                ),
+    .be_o   (                         ),
+    .data_o (                         ),
+    .data_i ( rom_rdata               )
+);
+
+bootrom i_bootrom (
+    .clk_i   ( clk       ),
+    .req_i   ( rom_req   ),
+    .addr_i  ( rom_addr  ),
+    .rdata_o ( rom_rdata )
+);
+
+ariane_peripherals #(
+    .AxiAddrWidth ( AxiAddrWidth     ),
+    .AxiDataWidth ( AxiDataWidth     ),
+    .AxiIdWidth   ( AxiIdWidthSlaves ),
+    .AxiUserWidth ( AxiUserWidth     ),
+    .InclUART     ( 1'b1             ),
+    .InclGPIO     ( 1'b0             ),
+    .InclSPI      ( 1'b0         ),
+    .InclEthernet ( 1'b0         )
+) i_ariane_peripherals (
+    .clk_i        ( clk                          ),
+    .clk_200MHz_i ( 1'b0               ),
+    .rst_ni       ( ndmreset_n                   ),
+    .plic         ( master[ariane_soc::PLIC]     ),
+    .uart         ( master[ariane_soc::UART]     ),
+    .spi          ( master[ariane_soc::SPI]      ),
+    .gpio         ( master[ariane_soc::GPIO]     ),
+    .eth_clk_i    ( eth_clk                      ),
+    .ethernet     ( master[ariane_soc::Ethernet] ),
+    .timer        ( master[ariane_soc::Timer]    ),
+    .irq_o        ( irq                          ),
+    .rx_i         ( rx                           ),
+    .tx_o         ( tx                           ),
+    .eth_txck (),
+    .eth_rxck (1'b0),
+    .eth_rxctl (1'b0),
+    .eth_rxd (4'b0000),
+    .eth_rst_n (),
+    .eth_txctl (),
+    .eth_txd (),
+    .eth_mdio (),
+    .eth_mdc (),
+    .phy_tx_clk_i   ( phy_tx_clk                  ),
+    .sd_clk_i       ( sd_clk_sys                  ),
+    .spi_clk_o      (                             ),
+    .spi_mosi       ( spi_mosi                    ),
+    .spi_miso       ( spi_miso                    ),
+    .spi_ss         ( spi_ss                      ),
+
+    .leds_o         (                        ),
+    .dip_switches_i (                        )
+);
+
+// ---------------------
+// Board peripherals
+// ---------------------
+// ---------------
+// DDR
+// ---------------
+logic [AxiIdWidthSlaves-1:0] s_axi_awid;
+logic [AxiAddrWidth-1:0]     s_axi_awaddr;
+logic [7:0]                  s_axi_awlen;
+logic [2:0]                  s_axi_awsize;
+logic [1:0]                  s_axi_awburst;
+logic [0:0]                  s_axi_awlock;
+logic [3:0]                  s_axi_awcache;
+logic [2:0]                  s_axi_awprot;
+logic [3:0]                  s_axi_awregion;
+logic [3:0]                  s_axi_awqos;
+logic                        s_axi_awvalid;
+logic                        s_axi_awready;
+logic [AxiDataWidth-1:0]     s_axi_wdata;
+logic [AxiDataWidth/8-1:0]   s_axi_wstrb;
+logic                        s_axi_wlast;
+logic                        s_axi_wvalid;
+logic                        s_axi_wready;
+logic [AxiIdWidthSlaves-1:0] s_axi_bid;
+logic [1:0]                  s_axi_bresp;
+logic                        s_axi_bvalid;
+logic                        s_axi_bready;
+logic [AxiIdWidthSlaves-1:0] s_axi_arid;
+logic [AxiAddrWidth-1:0]     s_axi_araddr;
+logic [7:0]                  s_axi_arlen;
+logic [2:0]                  s_axi_arsize;
+logic [1:0]                  s_axi_arburst;
+logic [0:0]                  s_axi_arlock;
+logic [3:0]                  s_axi_arcache;
+logic [2:0]                  s_axi_arprot;
+logic [3:0]                  s_axi_arregion;
+logic [3:0]                  s_axi_arqos;
+logic                        s_axi_arvalid;
+logic                        s_axi_arready;
+logic [AxiIdWidthSlaves-1:0] s_axi_rid;
+logic [AxiDataWidth-1:0]     s_axi_rdata;
+logic [1:0]                  s_axi_rresp;
+logic                        s_axi_rlast;
+logic                        s_axi_rvalid;
+logic                        s_axi_rready;
+
+AXI_BUS #(
+    .AXI_ADDR_WIDTH ( AxiAddrWidth     ),
+    .AXI_DATA_WIDTH ( AxiDataWidth     ),
+    .AXI_ID_WIDTH   ( AxiIdWidthSlaves ),
+    .AXI_USER_WIDTH ( AxiUserWidth     )
+) dram();
+
+axi_riscv_atomics_wrap #(
+    .AXI_ADDR_WIDTH ( AxiAddrWidth     ),
+    .AXI_DATA_WIDTH ( AxiDataWidth     ),
+    .AXI_ID_WIDTH   ( AxiIdWidthSlaves ),
+    .AXI_USER_WIDTH ( AxiUserWidth     ),
+    .AXI_MAX_WRITE_TXNS ( 1  ),
+    .RISCV_WORD_WIDTH   ( 64 )
+) i_axi_riscv_atomics (
+    .clk_i  ( clk                      ),
+    .rst_ni ( ndmreset_n               ),
+    .slv    ( master[ariane_soc::DRAM] ),
+    .mst    ( dram                     )
+);
+
+assign dram.r_user = '0;
+assign dram.b_user = '0;
+
+xlnx_clk_gen i_xlnx_clk_gen (
+  .clk_out1 ( clk           ), // 100 MHz
+  .clk_out2 ( phy_tx_clk    ), // 125 MHz (for RGMII PHY)
+  .clk_out3 ( eth_clk       ), // 125 MHz quadrature (90 deg phase shift)
+  .clk_out4 ( sd_clk_sys    ), // 50 MHz clock
+  .reset    ( reset         ),
+  .locked   ( pll_locked    ),
+  .clk_in1_n( clk_sys_n ), // 125 MHz
+  .clk_in1_p( clk_sys_p )  // 125 MHz
+);
+
+logic [31:0] saxibram_awaddr;
+logic [31:0] saxibram_araddr;
+
+assign saxibram_awaddr = dram.aw_addr & 32'h7fff_ffff;
+assign saxibram_araddr = dram.ar_addr & 32'h7fff_ffff;
+
+xlnx_blk_mem_gen i_xlnx_blk_mem_gen (
+    .rsta_busy (      ),
+    .rstb_busy (      ),
+    .s_aclk ( clk ),
+    .s_aresetn ( ndmreset_n  ),
+    .s_axi_awid ( dram.aw_id ),
+    .s_axi_awaddr ( saxibram_awaddr ),
+    .s_axi_awlen ( dram.aw_len ),
+    .s_axi_awsize ( dram.aw_size ),
+    .s_axi_awburst ( dram.aw_burst ),
+    .s_axi_awvalid ( dram.aw_valid ),
+    .s_axi_awready ( dram.aw_ready ),
+    .s_axi_wdata ( dram.w_data ),
+    .s_axi_wstrb ( dram.w_strb ),
+    .s_axi_wlast ( dram.w_last ),
+    .s_axi_wvalid ( dram.w_valid ),
+    .s_axi_wready ( dram.w_ready ),
+    .s_axi_bid ( dram.b_id ),
+    .s_axi_bresp ( dram.b_resp ),
+    .s_axi_bvalid ( dram.b_valid ),
+    .s_axi_bready ( dram.b_ready ),
+    .s_axi_arid ( dram.ar_id ),
+    .s_axi_araddr ( saxibram_araddr ),
+    .s_axi_arlen ( dram.ar_len ),
+    .s_axi_arsize ( dram.ar_size ),
+    .s_axi_arburst( dram.ar_burst ),
+    .s_axi_arvalid ( dram.ar_valid ),
+    .s_axi_arready ( dram.ar_ready ),
+    .s_axi_rid ( dram.r_id ),
+    .s_axi_rdata ( dram.r_data ),
+    .s_axi_rresp ( dram.r_resp ),
+    .s_axi_rlast ( dram.r_last ),
+    .s_axi_rvalid ( dram.r_valid ),
+    .s_axi_rready ( dram.r_ready )
+  );
+
+endmodule
diff --git a/fpga/src/zybo-z7-20.svh b/fpga/src/zybo-z7-20.svh
index 137aa898..66bf5382 100644
--- a/fpga/src/zybo-z7-20.svh
+++ b/fpga/src/zybo-z7-20.svh
@@ -11,7 +11,7 @@
 `define WT_DCACHE
 
 // debug probe
-`define LAUTERBACH_DEBUG_PROBE
+// `define LAUTERBACH_DEBUG_PROBE
 
 // to use BRAM in FPGA fabric  
 `define BRAM
diff --git a/fpga/xilinx/xlnx_clk_gen/tcl/run.tcl b/fpga/xilinx/xlnx_clk_gen/tcl/run.tcl
index 26a329e6..33c469e7 100644
--- a/fpga/xilinx/xlnx_clk_gen/tcl/run.tcl
+++ b/fpga/xilinx/xlnx_clk_gen/tcl/run.tcl
@@ -10,10 +10,12 @@ create_ip -name clk_wiz -vendor xilinx.com -library ip -module_name $ipName
 
 set_property -dict [list CONFIG.PRIM_IN_FREQ {125.000} \
                         CONFIG.NUM_OUT_CLKS {4} \
+                        CONFIG.PRIM_SOURCE {Differential_clock_capable_pin} \
+                        CONFIG.CLKOUT1_USED {true} \
                         CONFIG.CLKOUT2_USED {true} \
                         CONFIG.CLKOUT3_USED {true} \
                         CONFIG.CLKOUT4_USED {true} \
-                        CONFIG.CLKOUT1_REQUESTED_OUT_FREQ {25} \
+                        CONFIG.CLKOUT1_REQUESTED_OUT_FREQ {100} \
                         CONFIG.CLKOUT2_REQUESTED_OUT_FREQ {125} \
                         CONFIG.CLKOUT3_REQUESTED_OUT_FREQ {125} \
                         CONFIG.CLKOUT3_REQUESTED_PHASE {90.000} \
diff --git a/fpga/xilinx/xlnx_processing_system7/tcl/run.tcl b/fpga/xilinx/xlnx_processing_system7/tcl/run.tcl
index c58edef0..783f14f6 100644
--- a/fpga/xilinx/xlnx_processing_system7/tcl/run.tcl
+++ b/fpga/xilinx/xlnx_processing_system7/tcl/run.tcl
@@ -6,104 +6,17 @@ set ipName xlnx_processing_system7
 create_project $ipName . -force -part $partNumber
 set_property board_part $boardName [current_project]
 
-create_ip -name processing_system7 -vendor xilinx.com -library ip -module_name $ipName
-
-set_property -dict [list CONFIG.PCW_ACT_APU_PERIPHERAL_FREQMHZ {666.666687} \
-                        CONFIG.PCW_ACT_CAN_PERIPHERAL_FREQMHZ {10.000000} \
-                        CONFIG.PCW_ACT_DCI_PERIPHERAL_FREQMHZ {10.158730} \
-                        CONFIG.PCW_ACT_ENET0_PERIPHERAL_FREQMHZ {10.000000} \
-                        CONFIG.PCW_ACT_ENET1_PERIPHERAL_FREQMHZ {10.000000} \
-                        CONFIG.PCW_ACT_FPGA0_PERIPHERAL_FREQMHZ {10.000000} \
-                        CONFIG.PCW_ACT_FPGA1_PERIPHERAL_FREQMHZ {10.000000} \
-                        CONFIG.PCW_ACT_FPGA2_PERIPHERAL_FREQMHZ {10.000000} \
-                        CONFIG.PCW_ACT_FPGA3_PERIPHERAL_FREQMHZ {10.000000} \
-                        CONFIG.PCW_ACT_PCAP_PERIPHERAL_FREQMHZ {200.000000} \
-                        CONFIG.PCW_ACT_QSPI_PERIPHERAL_FREQMHZ {10.000000} \
-                        CONFIG.PCW_ACT_SDIO_PERIPHERAL_FREQMHZ {10.000000} \
-                        CONFIG.PCW_ACT_SMC_PERIPHERAL_FREQMHZ {10.000000} \
-                        CONFIG.PCW_ACT_SPI_PERIPHERAL_FREQMHZ {10.000000} \
-                        CONFIG.PCW_ACT_TPIU_PERIPHERAL_FREQMHZ {200.000000} \
-                        CONFIG.PCW_ACT_TTC0_CLK0_PERIPHERAL_FREQMHZ {111.111115} \
-                        CONFIG.PCW_ACT_TTC0_CLK1_PERIPHERAL_FREQMHZ {111.111115} \
-                        CONFIG.PCW_ACT_TTC0_CLK2_PERIPHERAL_FREQMHZ {111.111115} \
-                        CONFIG.PCW_ACT_TTC1_CLK0_PERIPHERAL_FREQMHZ {111.111115} \
-                        CONFIG.PCW_ACT_TTC1_CLK1_PERIPHERAL_FREQMHZ {111.111115} \
-                        CONFIG.PCW_ACT_TTC1_CLK2_PERIPHERAL_FREQMHZ {111.111115} \
-                        CONFIG.PCW_ACT_UART_PERIPHERAL_FREQMHZ {10.000000} \
-                        CONFIG.PCW_ACT_WDT_PERIPHERAL_FREQMHZ {111.111115} \
-                        CONFIG.PCW_ARMPLL_CTRL_FBDIV {40} \
-                        CONFIG.PCW_CAN_PERIPHERAL_DIVISOR0 {1} \
-                        CONFIG.PCW_CAN_PERIPHERAL_DIVISOR1 {1} \
-                        CONFIG.PCW_CLK0_FREQ {10000000} \
-                        CONFIG.PCW_CLK1_FREQ {10000000} \
-                        CONFIG.PCW_CLK2_FREQ {10000000} \
-                        CONFIG.PCW_CLK3_FREQ {10000000} \
-                        CONFIG.PCW_CPU_CPU_PLL_FREQMHZ {1333.333} \
-                        CONFIG.PCW_CPU_PERIPHERAL_DIVISOR0 {2} \
-                        CONFIG.PCW_DCI_PERIPHERAL_DIVISOR0 {15} \
-                        CONFIG.PCW_DCI_PERIPHERAL_DIVISOR1 {7} \
-                        CONFIG.PCW_DDRPLL_CTRL_FBDIV {32} \
-                        CONFIG.PCW_DDR_DDR_PLL_FREQMHZ {1066.667} \
-                        CONFIG.PCW_DDR_PERIPHERAL_DIVISOR0 {2} \
-                        CONFIG.PCW_DDR_RAM_HIGHADDR {0x3FFFFFFF} \
-                        CONFIG.PCW_ENET0_PERIPHERAL_DIVISOR0 {1} \
-                        CONFIG.PCW_ENET0_PERIPHERAL_DIVISOR1 {1} \
-                        CONFIG.PCW_ENET1_PERIPHERAL_DIVISOR0 {1} \
-                        CONFIG.PCW_ENET1_PERIPHERAL_DIVISOR1 {1} \
-                        CONFIG.PCW_EN_CLK0_PORT {0} \
-                        CONFIG.PCW_EN_RST0_PORT {0} \
-                        CONFIG.PCW_FCLK0_PERIPHERAL_DIVISOR0 {1} \
-                        CONFIG.PCW_FCLK0_PERIPHERAL_DIVISOR1 {1} \
-                        CONFIG.PCW_FCLK1_PERIPHERAL_DIVISOR0 {1} \
-                        CONFIG.PCW_FCLK1_PERIPHERAL_DIVISOR1 {1} \
-                        CONFIG.PCW_FCLK2_PERIPHERAL_DIVISOR0 {1} \
-                        CONFIG.PCW_FCLK2_PERIPHERAL_DIVISOR1 {1} \
-                        CONFIG.PCW_FCLK3_PERIPHERAL_DIVISOR0 {1} \
-                        CONFIG.PCW_FCLK3_PERIPHERAL_DIVISOR1 {1} \
-                        CONFIG.PCW_FCLK_CLK0_BUF {FALSE} \
-                        CONFIG.PCW_FPGA_FCLK0_ENABLE {0} \
-                        CONFIG.PCW_FPGA_FCLK1_ENABLE {0} \
-                        CONFIG.PCW_FPGA_FCLK2_ENABLE {0} \
-                        CONFIG.PCW_FPGA_FCLK3_ENABLE {0} \
-                        CONFIG.PCW_I2C_PERIPHERAL_FREQMHZ {25} \
-                        CONFIG.PCW_IOPLL_CTRL_FBDIV {48} \
-                        CONFIG.PCW_IO_IO_PLL_FREQMHZ {1600.000} \
-                        CONFIG.PCW_PCAP_PERIPHERAL_DIVISOR0 {8} \
-                        CONFIG.PCW_QSPI_PERIPHERAL_DIVISOR0 {1} \
-                        CONFIG.PCW_SDIO_PERIPHERAL_DIVISOR0 {1} \
-                        CONFIG.PCW_SMC_PERIPHERAL_DIVISOR0 {1} \
-                        CONFIG.PCW_SPI_PERIPHERAL_DIVISOR0 {1} \
-                        CONFIG.PCW_TPIU_PERIPHERAL_DIVISOR0 {1} \
-                        CONFIG.PCW_UART_PERIPHERAL_DIVISOR0 {1} \
-                        CONFIG.PCW_UIPARAM_ACT_DDR_FREQ_MHZ {533.333374} \
-                        CONFIG.PCW_UIPARAM_DDR_BANK_ADDR_COUNT {3} \
-                        CONFIG.PCW_UIPARAM_DDR_BL {8} \
-                        CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY0 {0.221} \
-                        CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY1 {0.222} \
-                        CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY2 {0.217} \
-                        CONFIG.PCW_UIPARAM_DDR_BOARD_DELAY3 {0.244} \
-                        CONFIG.PCW_UIPARAM_DDR_CL {7} \
-                        CONFIG.PCW_UIPARAM_DDR_COL_ADDR_COUNT {10} \
-                        CONFIG.PCW_UIPARAM_DDR_CWL {6} \
-                        CONFIG.PCW_UIPARAM_DDR_DEVICE_CAPACITY {4096 MBits} \
-                        CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_0 {-0.05} \
-                        CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_1 {-0.044} \
-                        CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_2 {-0.035} \
-                        CONFIG.PCW_UIPARAM_DDR_DQS_TO_CLK_DELAY_3 {-0.100} \
-                        CONFIG.PCW_UIPARAM_DDR_DRAM_WIDTH {16 Bits} \
-                        CONFIG.PCW_UIPARAM_DDR_MEMORY_TYPE {DDR 3 (Low Voltage)} \
-                        CONFIG.PCW_UIPARAM_DDR_PARTNO {MT41K256M16 RE-125} \
-                        CONFIG.PCW_UIPARAM_DDR_ROW_ADDR_COUNT {15} \
-                        CONFIG.PCW_UIPARAM_DDR_SPEED_BIN {DDR3_1066F} \
-                        CONFIG.PCW_UIPARAM_DDR_T_FAW {40.0} \
-                        CONFIG.PCW_UIPARAM_DDR_T_RAS_MIN {35.0} \
-                        CONFIG.PCW_UIPARAM_DDR_T_RC {48.75} \
-                        CONFIG.PCW_UIPARAM_DDR_T_RCD {7} \
-                        CONFIG.PCW_UIPARAM_DDR_T_RP {7} \
-                        CONFIG.PCW_USE_M_AXI_GP0 {0} \
-                        CONFIG.PCW_USE_S_AXI_HP0 {1} \
-                       ] [get_ips $ipName]
+create_ip -name zynq_ultra_ps_e -vendor xilinx.com -library ip -module_name $ipName
 
+set_property -dict [list \
+  CONFIG.PSU__USE__M_AXI_GP0 {0} \
+  CONFIG.PSU__USE__M_AXI_GP1 {0} \
+  CONFIG.PSU__USE__S_AXI_GP2 {1} \
+  CONFIG.PSU__QSPI__PERIPHERAL__ENABLE {0} \
+  CONFIG.PSU__USE__FABRIC__RST {0} \
+  CONFIG.PSU__FPGA_PL0_ENABLE {0} \
+  CONFIG.PSU__USE__IRQ0 {0} \
+] [get_ips $ipName]
 
 generate_target {instantiation_template} [get_files ./$ipName.srcs/sources_1/ip/$ipName/$ipName.xci]
 generate_target all [get_files  ./$ipName.srcs/sources_1/ip/$ipName/$ipName.xci]
-- 
GitLab