From dd1c3dfd4b02d417610546482d9839370ad9ccac Mon Sep 17 00:00:00 2001
From: Prasad Pardeshi <prasadp@xilinx.com>
Date: Fri, 29 Sep 2023 21:41:14 +0530
Subject: [PATCH] QDMA DPDK reference driver for 2023.2.0 release

---
 QDMA/DPDK/RELEASE                             |  15 +-
 QDMA/DPDK/docs/README.txt                     |  43 ++-
 QDMA/DPDK/drivers/net/qdma/qdma.h             |  66 +++-
 .../qdma/qdma_access/qdma_access_version.h    |   6 +-
 .../qdma_cpm4_access/qdma_cpm4_access.c       |  64 ++++
 .../qdma_cpm4_access/qdma_cpm4_access.h       |   4 +
 QDMA/DPDK/drivers/net/qdma/qdma_devops.c      |  34 +-
 QDMA/DPDK/drivers/net/qdma/qdma_ethdev.c      |  55 ++-
 QDMA/DPDK/drivers/net/qdma/qdma_rxtx.c        |  77 +++-
 .../DPDK/drivers/net/qdma/qdma_rxtx_vec_sse.c |  84 +++-
 QDMA/DPDK/drivers/net/qdma/qdma_vf_ethdev.c   |  14 +-
 QDMA/DPDK/drivers/net/qdma/qdma_xdebug.c      | 335 ++++++++++++++++
 QDMA/DPDK/drivers/net/qdma/rte_pmd_qdma.h     |  28 ++
 QDMA/DPDK/drivers/net/qdma/version.h          |   4 +-
 QDMA/DPDK/drivers/net/qdma/version.map        |  13 +-
 QDMA/DPDK/examples/qdma_testapp/testapp.c     |  24 +-
 ...MA-xdebug-to-proc-info-of-dpdk-22.11.patch | 362 ++++++++++++++++++
 17 files changed, 1139 insertions(+), 89 deletions(-)
 create mode 100755 QDMA/DPDK/tools/0001-Add-QDMA-xdebug-to-proc-info-of-dpdk-22.11.patch

diff --git a/QDMA/DPDK/RELEASE b/QDMA/DPDK/RELEASE
index 92c1060..d300616 100755
--- a/QDMA/DPDK/RELEASE
+++ b/QDMA/DPDK/RELEASE
@@ -1,4 +1,4 @@
-RELEASE: 2023.1.2
+RELEASE: 2023.2.0
 =================
 
 This release is based on DPDK v20.11, v21.11 and v22.11 and
@@ -9,6 +9,7 @@ This release is validated for
 	- On VCU1525 for QDMA4.0 2020.2 example design
 	- On VCU1525 for QDMA3.1 2019.2 example design
 	- On XCVP1202 for CPM5 2022.1 example design
+	- On XCVC1902 for CPM4 2022.1 example design
 
 This release includes patch files for dpdk-pktgen v20.12.0 and v22.04.1 that extends
 dpdk-pktgen application to handle packets with packet sizes more than 1518 bytes
@@ -125,8 +126,20 @@ CPM5
 ----------------
 - Optimized dpdk PMD and HW register settings for CPM5 performance improvements
 
+2023.2.0 Updates
+----------------
+- Added driver support for CPM4 design.
+- Added support for Tx and Rx queue statistics to enhance debugging capabilities
+- Added support for latency measurements in Tx and Rx data path to enhance debugging capabilities
+
 KNOWN ISSUE:
 ============
+- CPM4:
+	- HW pdi limitation
+		- VF functionality with vfio-pci on host is not verified
+		- VF functionality on VM is not verified
+		- Forwarding performance numbers are not reaching 100Gbps and capping at 98.8Gbps
+
 - CPM5:
 	- Smaller packet forwarding performance optimizations are in progress and report will be updated in subsequent releases
 
diff --git a/QDMA/DPDK/docs/README.txt b/QDMA/DPDK/docs/README.txt
index c14175a..a4ff05e 100755
--- a/QDMA/DPDK/docs/README.txt
+++ b/QDMA/DPDK/docs/README.txt
@@ -477,7 +477,48 @@ Commands supported by the qdma_testapp CLI
 
 	The keyboard keys Ctrl and D when pressed together quits the application.
 
-
+Instructions on how to use proc-info test for driver debugging:
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+1. Apply the below patch on top of the code changes mentioned in the dpdk driver documentation page and build the dpdk source code.
+	patch -p1 < 0001-Add-QDMA-xdebug-to-proc-info-of-dpdk-22.11.patch
+
+2. Run the testpmd application as primary application on one linux terminal
+	./build/app/dpdk-testpmd -l 1-17 -n 4 -a 65:00.0,desc_prefetch=1,cmpt_desc_len=16 --log-level=3 -- --burst=256 -i --nb-cores=1 --rxq=1 --txq=1 --forward-mode=io --rxd=2048 --txd=2048 --mbcache=512 --mbuf-size=4096
+
+3. Run the proc info as secondary application on another linux terminal as mentioned below with diferent port combinations.
+One port:
+	./build/app/dpdk-proc-info -a 81:00.0 --log-level=7 -- -p 1 -q 0 -g
+	./build/app/dpdk-proc-info -a 81:00.0 --log-level=7 -- -p 1 -q 0 --qdevice
+	./build/app/dpdk-proc-info -a 81:00.0 --log-level=7 -- -p 1 -q 0 --qinfo
+	./build/app/dpdk-proc-info -a 81:00.0 --log-level=7 -- -p 1 -q 0 --qstats_clr
+	./build/app/dpdk-proc-info -a 81:00.0 --log-level=7 -- -p 1 -q 0 --qstats
+	./build/app/dpdk-proc-info -a 81:00.0 --log-level=7 -- -p 1 -q 0 --desc-dump tx
+	./build/app/dpdk-proc-info -a 81:00.0 --log-level=7 -- -p 1 -q 0 --desc-dump rx
+	./build/app/dpdk-proc-info -a 81:00.0 --log-level=7 -- -p 1 -q 0 --desc-dump cmpt
+	./build/app/dpdk-proc-info -a 81:00.0 --log-level=7 -- -p 1 -q 0 --stats
+Two ports:
+	./build/app/dpdk-proc-info -a 81:00.0, -a 81:00.1, --log-level=7 -- -p 3 -q 0 -g
+	./build/app/dpdk-proc-info -a 81:00.0, -a 81:00.1, --log-level=7 -- -p 3 -q 0 --qdevice
+	./build/app/dpdk-proc-info -a 81:00.0, -a 81:00.1, --log-level=7 -- -p 3 -q 0 --qinfo
+	./build/app/dpdk-proc-info -a 81:00.0, -a 81:00.1, --log-level=7 -- -p 3 -q 0 --qstats_clr
+	./build/app/dpdk-proc-info -a 81:00.0, -a 81:00.1, --log-level=7 -- -p 3 -q 0 --qstats
+	./build/app/dpdk-proc-info -a 81:00.0, -a 81:00.1, --log-level=7 -- -p 3 -q 0 --desc-dump tx
+	./build/app/dpdk-proc-info -a 81:00.0, -a 81:00.1, --log-level=7 -- -p 3 -q 0 --desc-dump rx
+	./build/app/dpdk-proc-info -a 81:00.0, -a 81:00.1, --log-level=7 -- -p 3 -q 0 --desc-dump cmpt
+	./build/app/dpdk-proc-info -a 81:00.0, -a 81:00.1, --log-level=7 -- -p 3 -q 0 --stats
+4. Available commands for proc info are mentioned below.
+		-m to display DPDK memory zones, segments and TAILQ information
+		-g to display DPDK QDMA PMD global CSR info
+		-p PORTMASK: hexadecimal bitmask of ports to retrieve stats for
+		--stats: to display port statistics, enabled by default
+		--qdevice: to display QDMA device structure
+		--qinfo: to display QDMA queue context and queue structures
+		--qstats: to display QDMA Tx and Rx queue stats
+		--qstats_clr: to clear QDMA Tx and Rx queue stats
+		--desc-dump {rx | tx | cmpt}: to dump QDMA queue descriptors
+		--xstats: to display extended port statistics, disabled by default
+		--metrics: to display derived metrics of the ports, disabled by
 
 
 
diff --git a/QDMA/DPDK/drivers/net/qdma/qdma.h b/QDMA/DPDK/drivers/net/qdma/qdma.h
index cf7657e..b699069 100755
--- a/QDMA/DPDK/drivers/net/qdma/qdma.h
+++ b/QDMA/DPDK/drivers/net/qdma/qdma.h
@@ -108,6 +108,15 @@
 
 #define DEFAULT_QDMA_CMPT_DESC_LEN (RTE_PMD_QDMA_CMPT_DESC_LEN_8B)
 
+#define LATENCY_MAX_QUEUES 4
+#define LATENCY_CNT 20
+
+#ifdef LATENCY_MEASUREMENT
+extern const struct rte_memzone *txq_lat_buf_mz;
+extern const struct rte_memzone *rxq_lat_buf_mz;
+extern double (*h2c_pidx_to_hw_cidx_lat)[LATENCY_CNT];
+extern double (*c2h_pidx_to_cmpt_pidx_lat)[LATENCY_CNT];
+#endif
 
 enum dma_data_direction {
 	DMA_BIDIRECTIONAL = 0,
@@ -136,6 +145,45 @@ struct qdma_pkt_stats {
 	uint64_t bytes;
 };
 
+struct qdma_pkt_lat {
+	double prev;
+	double curr;
+};
+
+struct qdma_txq_stats {
+	uint16_t pidx;
+	uint16_t wrb_cidx;
+	uint16_t txq_tail;
+	uint16_t in_use_desc;
+	uint16_t nb_pkts;
+	uint16_t lat_cnt;
+	uint32_t ring_wrap_cnt;
+	uint32_t txq_full_cnt;
+#ifdef LATENCY_MEASUREMENT
+	uint32_t wrb_cidx_cnt_no_change;
+	uint32_t wrb_cidx_cnt_lt_8;
+	uint32_t wrb_cidx_cnt_8_to_32;
+	uint32_t wrb_cidx_cnt_32_to_64;
+	uint32_t wrb_cidx_cnt_gt_64;
+	struct qdma_pkt_lat pkt_lat;
+#endif
+};
+
+struct qdma_rxq_stats {
+	uint16_t pidx;
+	uint16_t wrb_pidx;
+	uint16_t wrb_cidx;
+	uint16_t rxq_cmpt_tail;
+	uint16_t pending_desc;
+	uint16_t lat_cnt;
+	uint32_t ring_wrap_cnt;
+	uint32_t mbuf_avail_cnt;
+	uint32_t mbuf_in_use_cnt;
+#ifdef LATENCY_MEASUREMENT
+	struct qdma_pkt_lat pkt_lat;
+#endif
+};
+
 /*
  * Structure associated with each CMPT queue.
  */
@@ -185,6 +233,7 @@ struct qdma_rx_queue {
 	struct qdma_q_pidx_reg_info	q_pidx_info;
 	struct qdma_q_cmpt_cidx_reg_info cmpt_cidx_info;
 	struct qdma_pkt_stats	stats;
+	struct qdma_rxq_stats   qstats;
 
 	struct rte_eth_dev	*dev;
 
@@ -257,6 +306,7 @@ struct qdma_tx_queue {
 	int8_t				ringszidx;
 
 	struct qdma_pkt_stats stats;
+	struct qdma_txq_stats qstats;
 
 	uint64_t			ep_addr;
 	uint32_t			queue_id; /* TX queue index. */
@@ -379,13 +429,13 @@ int qdma_identify_bars(struct rte_eth_dev *dev);
 int qdma_get_hw_version(struct rte_eth_dev *dev);
 
 /* implemented in rxtx.c */
-uint16_t qdma_recv_pkts_st(void *rx_queue, struct rte_mbuf **rx_pkts,
+uint16_t qdma_recv_pkts_st(struct qdma_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 				uint16_t nb_pkts);
-uint16_t qdma_recv_pkts_mm(void *rx_queue, struct rte_mbuf **rx_pkts,
+uint16_t qdma_recv_pkts_mm(struct qdma_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 				uint16_t nb_pkts);
-uint16_t qdma_xmit_pkts_st(void *tx_queue, struct rte_mbuf **tx_pkts,
+uint16_t qdma_xmit_pkts_st(struct qdma_tx_queue *txq, struct rte_mbuf **tx_pkts,
 				uint16_t nb_pkts);
-uint16_t qdma_xmit_pkts_mm(void *tx_queue, struct rte_mbuf **tx_pkts,
+uint16_t qdma_xmit_pkts_mm(struct qdma_tx_queue *txq, struct rte_mbuf **tx_pkts,
 				uint16_t nb_pkts);
 
 #ifdef TEST_64B_DESC_BYPASS
@@ -427,22 +477,24 @@ struct rte_mbuf *prepare_segmented_packet(struct qdma_rx_queue *rxq,
 		uint16_t pkt_length, uint16_t *tail);
 int reclaim_tx_mbuf(struct qdma_tx_queue *txq,
 		uint16_t cidx, uint16_t free_cnt);
-int qdma_extract_st_cmpt_info(void *ul_cmpt_entry, void *cmpt_info);
 int qdma_ul_extract_st_cmpt_info(void *ul_cmpt_entry, void *cmpt_info);
 
 /* Transmit API for Streaming mode */
 uint16_t qdma_xmit_pkts_vec(void *tx_queue,
 		struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
-uint16_t qdma_xmit_pkts_st_vec(void *tx_queue,
+uint16_t qdma_xmit_pkts_st_vec(struct qdma_tx_queue *txq,
 		struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
 
 /* Receive API for Streaming mode */
 uint16_t qdma_recv_pkts_vec(void *rx_queue,
 		struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
-uint16_t qdma_recv_pkts_st_vec(void *rx_queue,
+uint16_t qdma_recv_pkts_st_vec(struct qdma_rx_queue *rxq,
 		struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
 
 void __rte_cold qdma_set_tx_function(struct rte_eth_dev *dev);
 void __rte_cold qdma_set_rx_function(struct rte_eth_dev *dev);
 
+int qdma_tx_qstats_clear(struct rte_eth_dev *dev, uint16_t queue);
+int qdma_rx_qstats_clear(struct rte_eth_dev *dev, uint16_t queue);
+
 #endif /* ifndef __QDMA_H__ */
diff --git a/QDMA/DPDK/drivers/net/qdma/qdma_access/qdma_access_version.h b/QDMA/DPDK/drivers/net/qdma/qdma_access/qdma_access_version.h
index 72dc4f8..9503e5a 100755
--- a/QDMA/DPDK/drivers/net/qdma/qdma_access/qdma_access_version.h
+++ b/QDMA/DPDK/drivers/net/qdma/qdma_access/qdma_access_version.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2019-2022, Xilinx, Inc. All rights reserved.
- * Copyright (c) 2022, Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2022-2023, Advanced Micro Devices, Inc. All rights reserved.
  *
  * BSD LICENSE
  *
@@ -36,8 +36,8 @@
 
 
 #define QDMA_VERSION_MAJOR	2023
-#define QDMA_VERSION_MINOR	1
-#define QDMA_VERSION_PATCH	2
+#define QDMA_VERSION_MINOR	2
+#define QDMA_VERSION_PATCH	0
 
 #define QDMA_VERSION_STR	\
 	__stringify(QDMA_VERSION_MAJOR) "." \
diff --git a/QDMA/DPDK/drivers/net/qdma/qdma_access/qdma_cpm4_access/qdma_cpm4_access.c b/QDMA/DPDK/drivers/net/qdma/qdma_access/qdma_cpm4_access/qdma_cpm4_access.c
index 65b0d4c..b70e924 100755
--- a/QDMA/DPDK/drivers/net/qdma/qdma_access/qdma_cpm4_access/qdma_cpm4_access.c
+++ b/QDMA/DPDK/drivers/net/qdma/qdma_access/qdma_cpm4_access/qdma_cpm4_access.c
@@ -4939,6 +4939,15 @@ int qdma_cpm4_init_ctxt_memory(void *dev_hndl)
 		int sel = QDMA_CTXT_SEL_SW_C2H;
 		int rv;
 
+
+#ifdef TANDEM_BOOT_SUPPORTED
+		for (; sel <=  QDMA_CTXT_SEL_CR_H2C; sel++) {
+			rv = qdma_cpm4_indirect_reg_clear(dev_hndl,
+					(enum ind_ctxt_cmd_sel)sel, i);
+			if (rv < 0)
+				return rv;
+		}
+#else
 		for (; sel <= QDMA_CTXT_SEL_PFTCH; sel++) {
 			/** if the st mode(h2c/c2h) not enabled
 			 *  in the design, then skip the PFTCH
@@ -4958,6 +4967,7 @@ int qdma_cpm4_init_ctxt_memory(void *dev_hndl)
 			if (rv < 0)
 				return rv;
 		}
+#endif
 	}
 
 	/* fmap */
@@ -4973,6 +4983,60 @@ int qdma_cpm4_init_ctxt_memory(void *dev_hndl)
 	return 0;
 }
 
+#ifdef TANDEM_BOOT_SUPPORTED
+/*****************************************************************************/
+/**
+ * qdma_cpm4_init_st_ctxt() - Initialize the ST context
+ *
+ * @dev_hndl: device handle
+ *
+ * Return: returns the platform specific error code
+ *****************************************************************************/
+int qdma_cpm4_init_st_ctxt(void *dev_hndl)
+{
+	uint32_t data[QDMA_REG_IND_CTXT_REG_COUNT];
+	uint16_t i = 0;
+	struct qdma_dev_attributes dev_info;
+
+	if (!dev_hndl) {
+		qdma_log_error("%s: dev_handle is NULL, err:%d\n",
+					__func__, -QDMA_ERR_INV_PARAM);
+		return -QDMA_ERR_INV_PARAM;
+	}
+
+	qdma_memset(data, 0, sizeof(uint32_t) * QDMA_REG_IND_CTXT_REG_COUNT);
+	qdma_cpm4_get_device_attributes(dev_hndl, &dev_info);
+
+	for (; i < dev_info.num_qs; i++) {
+		int sel = QDMA_CTXT_SEL_CMPT;
+		int rv;
+
+		for (; sel <= QDMA_CTXT_SEL_PFTCH; sel++) {
+			/** if the st mode(h2c/c2h) not enabled
+			 *  in the design, then skip the PFTCH
+			 *  and CMPT context setup
+			 */
+			if ((dev_info.st_en == 0) &&
+				((sel == QDMA_CTXT_SEL_PFTCH) ||
+				(sel == QDMA_CTXT_SEL_CMPT))) {
+				qdma_log_debug("%s: ST context is skipped:",
+					__func__);
+				qdma_log_debug("sel = %d\n", sel);
+				continue;
+			}
+
+			rv = qdma_cpm4_indirect_reg_clear(dev_hndl,
+					(enum ind_ctxt_cmd_sel)sel, i);
+			if (rv < 0)
+				return rv;
+		}
+	}
+
+	return QDMA_SUCCESS;
+
+}
+#endif
+
 static int get_reg_entry(uint32_t reg_addr, int *reg_entry)
 {
 	uint32_t i = 0;
diff --git a/QDMA/DPDK/drivers/net/qdma/qdma_access/qdma_cpm4_access/qdma_cpm4_access.h b/QDMA/DPDK/drivers/net/qdma/qdma_access/qdma_cpm4_access/qdma_cpm4_access.h
index 33a1d06..a28f633 100755
--- a/QDMA/DPDK/drivers/net/qdma/qdma_access/qdma_cpm4_access/qdma_cpm4_access.h
+++ b/QDMA/DPDK/drivers/net/qdma/qdma_access/qdma_cpm4_access/qdma_cpm4_access.h
@@ -179,6 +179,10 @@ struct qdma_cpm4_hw_err_info {
 
 int qdma_cpm4_init_ctxt_memory(void *dev_hndl);
 
+#ifdef TANDEM_BOOT_SUPPORTED
+int qdma_cpm4_init_st_ctxt(void *dev_hndl);
+#endif
+
 int qdma_cpm4_qid2vec_conf(void *dev_hndl, uint8_t c2h, uint16_t hw_qid,
 			 struct qdma_qid2vec *ctxt,
 			 enum qdma_hw_access_type access_type);
diff --git a/QDMA/DPDK/drivers/net/qdma/qdma_devops.c b/QDMA/DPDK/drivers/net/qdma/qdma_devops.c
index b53027f..45c9b4f 100755
--- a/QDMA/DPDK/drivers/net/qdma/qdma_devops.c
+++ b/QDMA/DPDK/drivers/net/qdma/qdma_devops.c
@@ -379,13 +379,6 @@ int qdma_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
 	rxq->dev = dev;
 	rxq->st_mode = qdma_dev->q_info[rx_queue_id].queue_mode;
 
-	/* Override rx_pkt_burst with direct call based on st or mm mode */
-	if (rxq->st_mode) {
-		dev->rx_pkt_burst = (qdma_dev->rx_vec_allowed) ?
-			&qdma_recv_pkts_st_vec : &qdma_recv_pkts_st;
-	} else
-		dev->rx_pkt_burst = &qdma_recv_pkts_mm;
-
 	rxq->nb_rx_desc = (nb_rx_desc + 1);
 	/* <= 2018.2 IP
 	 * double the cmpl ring size to avoid run out of cmpl entry while
@@ -649,6 +642,16 @@ int qdma_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
 
 	dev->data->rx_queues[rx_queue_id] = rxq;
 
+#ifdef LATENCY_MEASUREMENT
+	err = qdma_rx_qstats_clear(dev, rx_queue_id);
+	if (err) {
+		PMD_DRV_LOG(ERR,
+			"Failed to clear QDMA Rx queue stats for qid: %d\n",
+			rx_queue_id);
+		return err;
+	}
+#endif
+
 	return 0;
 
 rx_setup_err:
@@ -759,13 +762,6 @@ int qdma_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
 
 	txq->st_mode = qdma_dev->q_info[tx_queue_id].queue_mode;
 
-	/* Override tx_pkt_burst with direct call based on st or mm mode */
-	if (txq->st_mode) {
-		dev->tx_pkt_burst = (qdma_dev->tx_vec_allowed) ?
-			&qdma_xmit_pkts_st_vec : &qdma_xmit_pkts_st;
-	} else
-		dev->tx_pkt_burst = &qdma_xmit_pkts_mm;
-
 	txq->en_bypass = (qdma_dev->q_info[tx_queue_id].tx_bypass_mode) ? 1 : 0;
 	txq->bypass_desc_sz = qdma_dev->q_info[tx_queue_id].tx_bypass_desc_sz;
 
@@ -900,6 +896,16 @@ int qdma_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
 	rte_spinlock_init(&txq->pidx_update_lock);
 	dev->data->tx_queues[tx_queue_id] = txq;
 
+#ifdef LATENCY_MEASUREMENT
+	err = qdma_tx_qstats_clear(dev, tx_queue_id);
+	if (err) {
+		PMD_DRV_LOG(ERR,
+			"Failed to clear QDMA Tx queue stats for qid: %d\n",
+			tx_queue_id);
+		return err;
+	}
+#endif
+
 	return 0;
 
 tx_setup_err:
diff --git a/QDMA/DPDK/drivers/net/qdma/qdma_ethdev.c b/QDMA/DPDK/drivers/net/qdma/qdma_ethdev.c
index cb7b997..21b83a8 100755
--- a/QDMA/DPDK/drivers/net/qdma/qdma_ethdev.c
+++ b/QDMA/DPDK/drivers/net/qdma/qdma_ethdev.c
@@ -63,6 +63,13 @@
 
 #define MAX_PCIE_CAPABILITY    (48)
 
+#ifdef LATENCY_MEASUREMENT
+const struct rte_memzone *txq_lat_buf_mz;
+const struct rte_memzone *rxq_lat_buf_mz;
+double (*h2c_pidx_to_hw_cidx_lat)[LATENCY_CNT] = NULL;
+double (*c2h_pidx_to_cmpt_pidx_lat)[LATENCY_CNT] = NULL;
+#endif
+
 static void qdma_device_attributes_get(struct rte_eth_dev *dev);
 
 /* Poll for any QDMA errors */
@@ -643,18 +650,8 @@ int qdma_eth_dev_init(struct rte_eth_dev *dev)
 	/* Getting the device attributes from the Hardware */
 	qdma_device_attributes_get(dev);
 
-	if (dma_priv->dev_cap.cmpt_trig_count_timer) {
-		/* Setting default Mode to
-		 * RTE_PMD_QDMA_TRIG_MODE_USER_TIMER_COUNT
-		 */
-		dma_priv->trigger_mode =
-					RTE_PMD_QDMA_TRIG_MODE_USER_TIMER_COUNT;
-	} else{
-		/* Setting default Mode to RTE_PMD_QDMA_TRIG_MODE_USER_TIMER */
-		dma_priv->trigger_mode = RTE_PMD_QDMA_TRIG_MODE_USER_TIMER;
-	}
-	if (dma_priv->trigger_mode == RTE_PMD_QDMA_TRIG_MODE_USER_TIMER_COUNT)
-		dma_priv->timer_count = DEFAULT_TIMER_CNT_TRIG_MODE_COUNT_TIMER;
+	/* Setting default Mode to RTE_PMD_QDMA_TRIG_MODE_USER_TIMER */
+	dma_priv->trigger_mode = RTE_PMD_QDMA_TRIG_MODE_USER_TIMER;
 
 	/* Create master resource node for queue management on the given
 	 * bus number. Node will be created only once per bus number.
@@ -774,6 +771,34 @@ int qdma_eth_dev_init(struct rte_eth_dev *dev)
 		}
 	}
 
+#ifdef LATENCY_MEASUREMENT
+	/* Create a shared memory zone for the txq latency buffer */
+	txq_lat_buf_mz = rte_memzone_reserve("TXQ_LAT_BUFFER_ZONE",
+		LATENCY_MAX_QUEUES * LATENCY_CNT * sizeof(double),
+		rte_socket_id(), 0);
+	if (txq_lat_buf_mz == NULL) {
+		PMD_DRV_LOG(ERR, "Failed to allocate txq latency buffer memzone\n");
+		return -1;
+	}
+
+	/* Get the virtual address of the txq latency buffer */
+	h2c_pidx_to_hw_cidx_lat =
+		(double(*)[LATENCY_CNT])txq_lat_buf_mz->addr;
+
+	/* Create a shared memory zone for the rxq latency buffer */
+	rxq_lat_buf_mz = rte_memzone_reserve("RXQ_LAT_BUFFER_ZONE",
+		LATENCY_MAX_QUEUES * LATENCY_CNT * sizeof(double),
+		rte_socket_id(), 0);
+	if (rxq_lat_buf_mz == NULL) {
+		PMD_DRV_LOG(ERR, "Failed to allocate rxq latency buffer memzone\n");
+		return -1;
+	}
+
+	/* Get the virtual address of the rxq latency buffer */
+	c2h_pidx_to_cmpt_pidx_lat =
+		(double(*)[LATENCY_CNT])rxq_lat_buf_mz->addr;
+#endif
+
 	dma_priv->reset_in_progress = 0;
 
 	return 0;
@@ -886,6 +911,12 @@ int qdma_eth_dev_uninit(struct rte_eth_dev *dev)
 		rte_free(qdma_dev->hw_access);
 		qdma_dev->hw_access = NULL;
 	}
+
+#ifdef LATENCY_MEASUREMENT
+	rte_memzone_free(txq_lat_buf_mz);
+	rte_memzone_free(rxq_lat_buf_mz);
+#endif
+
 	return 0;
 }
 
diff --git a/QDMA/DPDK/drivers/net/qdma/qdma_rxtx.c b/QDMA/DPDK/drivers/net/qdma/qdma_rxtx.c
index bf51411..186c1bb 100755
--- a/QDMA/DPDK/drivers/net/qdma/qdma_rxtx.c
+++ b/QDMA/DPDK/drivers/net/qdma/qdma_rxtx.c
@@ -124,7 +124,7 @@ static int dma_wb_monitor(void *xq, uint8_t dir, uint16_t expected_count)
 	return -1;
 }
 
-int qdma_extract_st_cmpt_info(void *ul_cmpt_entry, void *cmpt_info)
+static int qdma_extract_st_cmpt_info(void *ul_cmpt_entry, void *cmpt_info)
 {
 	union qdma_ul_st_cmpt_ring *cmpt_data, *cmpt_desc;
 
@@ -165,9 +165,12 @@ int reclaim_tx_mbuf(struct qdma_tx_queue *txq,
 			txq->sw_ring[id++] = NULL;
 
 		txq->tx_fl_tail = id;
+
 		return fl_desc;
 	}
 
+	txq->qstats.ring_wrap_cnt++;
+
 	/* Handle Tx queue ring wrap case */
 	fl_desc -= (txq->nb_tx_desc - 1 - id);
 	rte_pktmbuf_free_bulk(&txq->sw_ring[id], (txq->nb_tx_desc - 1 - id));
@@ -785,8 +788,10 @@ static int rearm_c2h_ring(struct qdma_rx_queue *rxq, uint16_t num_desc)
 	 */
 	if ((id + num_desc) < (rxq->nb_rx_desc - 1))
 		rearm_descs = num_desc;
-	else
+	else {
 		rearm_descs = (rxq->nb_rx_desc - 1) - id;
+		rxq->qstats.ring_wrap_cnt++;
+	}
 
 	/* allocate new buffer */
 	if (rte_mempool_get_bulk(rxq->mb_pool, (void *)&rxq->sw_ring[id],
@@ -867,8 +872,8 @@ static int rearm_c2h_ring(struct qdma_rx_queue *rxq, uint16_t num_desc)
 }
 
 /* Receive API for Streaming mode */
-uint16_t qdma_recv_pkts_st(void *rx_queue, struct rte_mbuf **rx_pkts,
-				uint16_t nb_pkts)
+uint16_t qdma_recv_pkts_st(struct qdma_rx_queue *rxq,
+		struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
 	uint16_t count_pkts;
 	struct wb_status *wb_status;
@@ -876,7 +881,6 @@ uint16_t qdma_recv_pkts_st(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint16_t rx_cmpt_tail = 0;
 	uint16_t cmpt_pidx, c2h_pidx;
 	uint16_t pending_desc;
-	struct qdma_rx_queue *rxq = rx_queue;
 #ifdef TEST_64B_DESC_BYPASS
 	int bypass_desc_sz_idx = qmda_get_desc_sz_idx(rxq->bypass_desc_sz);
 #endif
@@ -900,6 +904,16 @@ uint16_t qdma_recv_pkts_st(void *rx_queue, struct rte_mbuf **rx_pkts,
 #endif
 	cmpt_pidx = wb_status->pidx;
 
+#ifdef LATENCY_MEASUREMENT
+	if (cmpt_pidx != rxq->qstats.wrb_pidx) {
+		/* stop the timer */
+		rxq->qstats.pkt_lat.curr = rte_get_timer_cycles();
+		c2h_pidx_to_cmpt_pidx_lat[rxq->queue_id][rxq->qstats.lat_cnt] =
+			rxq->qstats.pkt_lat.curr - rxq->qstats.pkt_lat.prev;
+		rxq->qstats.lat_cnt = ((rxq->qstats.lat_cnt + 1) % LATENCY_CNT);
+	}
+#endif
+
 	if (rx_cmpt_tail < cmpt_pidx)
 		nb_pkts_avail = cmpt_pidx - rx_cmpt_tail;
 	else if (rx_cmpt_tail > cmpt_pidx)
@@ -947,6 +961,14 @@ uint16_t qdma_recv_pkts_st(void *rx_queue, struct rte_mbuf **rx_pkts,
 		pending_desc = rxq->nb_rx_desc - 2 + rxq->rx_tail -
 				c2h_pidx;
 
+	rxq->qstats.pidx = rxq->q_pidx_info.pidx;
+	rxq->qstats.wrb_pidx = rxq->wb_status->pidx;
+	rxq->qstats.wrb_cidx = rxq->wb_status->cidx;
+	rxq->qstats.rxq_cmpt_tail = rx_cmpt_tail;
+	rxq->qstats.pending_desc = pending_desc;
+	rxq->qstats.mbuf_avail_cnt = rte_mempool_avail_count(rxq->mb_pool);
+	rxq->qstats.mbuf_in_use_cnt = rte_mempool_in_use_count(rxq->mb_pool);
+
 	/* Batch the PIDX updates, this minimizes overhead on
 	 * descriptor engine
 	 */
@@ -970,15 +992,14 @@ uint16_t qdma_recv_pkts_st(void *rx_queue, struct rte_mbuf **rx_pkts,
 }
 
 /* Receive API for Memory mapped mode */
-uint16_t qdma_recv_pkts_mm(void *rx_queue, struct rte_mbuf **rx_pkts,
-			uint16_t nb_pkts)
+uint16_t qdma_recv_pkts_mm(struct qdma_rx_queue *rxq,
+		struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
 	struct rte_mbuf *mb;
 	uint32_t count, id;
 	struct qdma_ul_mm_desc *desc;
 	uint32_t len;
 	int ret;
-	struct qdma_rx_queue *rxq = rx_queue;
 	struct qdma_pci_dev *qdma_dev = rxq->dev->data->dev_private;
 #ifdef TEST_64B_DESC_BYPASS
 	int bypass_desc_sz_idx = qmda_get_desc_sz_idx(rxq->bypass_desc_sz);
@@ -1086,9 +1107,9 @@ uint16_t qdma_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint32_t count;
 
 	if (rxq->st_mode)
-		count = qdma_recv_pkts_st(rx_queue, rx_pkts, nb_pkts);
+		count = qdma_recv_pkts_st(rxq, rx_pkts, nb_pkts);
 	else
-		count = qdma_recv_pkts_mm(rx_queue, rx_pkts, nb_pkts);
+		count = qdma_recv_pkts_mm(rxq, rx_pkts, nb_pkts);
 
 	return count;
 }
@@ -1169,15 +1190,14 @@ qdma_dev_tx_descriptor_status(void *tx_queue, uint16_t offset)
 }
 
 /* Transmit API for Streaming mode */
-uint16_t qdma_xmit_pkts_st(void *tx_queue, struct rte_mbuf **tx_pkts,
-			uint16_t nb_pkts)
+uint16_t qdma_xmit_pkts_st(struct qdma_tx_queue *txq,
+		struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
 	struct rte_mbuf *mb = NULL;
 	uint64_t pkt_len = 0;
 	int avail, in_use, ret, nsegs;
 	uint16_t cidx = 0;
 	uint16_t count = 0, id;
-	struct qdma_tx_queue *txq = tx_queue;
 	struct qdma_pci_dev *qdma_dev = txq->dev->data->dev_private;
 
 #ifdef TEST_64B_DESC_BYPASS
@@ -1197,6 +1217,17 @@ uint16_t qdma_xmit_pkts_st(void *tx_queue, struct rte_mbuf **tx_pkts,
 	rte_rmb();
 
 	cidx = txq->wb_status->cidx;
+
+#ifdef LATENCY_MEASUREMENT
+	if (cidx != txq->qstats.wrb_cidx) {
+		/* stop the timer */
+		txq->qstats.pkt_lat.curr = rte_get_timer_cycles();
+		h2c_pidx_to_hw_cidx_lat[txq->queue_id][txq->qstats.lat_cnt] =
+			txq->qstats.pkt_lat.curr - txq->qstats.pkt_lat.prev;
+		txq->qstats.lat_cnt = ((txq->qstats.lat_cnt + 1) % LATENCY_CNT);
+	}
+#endif
+
 	PMD_DRV_LOG(DEBUG, "Xmit start on tx queue-id:%d, tail index:%d\n",
 			txq->queue_id, id);
 
@@ -1215,6 +1246,7 @@ uint16_t qdma_xmit_pkts_st(void *tx_queue, struct rte_mbuf **tx_pkts,
 	avail = txq->nb_tx_desc - 2 - in_use;
 
 	if (unlikely(!avail)) {
+		txq->qstats.txq_full_cnt++;
 		PMD_DRV_LOG(DEBUG, "Tx queue full, in_use = %d", in_use);
 		return 0;
 	}
@@ -1242,6 +1274,12 @@ uint16_t qdma_xmit_pkts_st(void *tx_queue, struct rte_mbuf **tx_pkts,
 	txq->stats.pkts += count;
 	txq->stats.bytes += pkt_len;
 
+	txq->qstats.pidx = id;
+	txq->qstats.wrb_cidx = cidx;
+	txq->qstats.txq_tail = txq->tx_fl_tail;
+	txq->qstats.in_use_desc = in_use;
+	txq->qstats.nb_pkts = nb_pkts;
+
 #if (MIN_TX_PIDX_UPDATE_THRESHOLD > 1)
 	rte_spinlock_lock(&txq->pidx_update_lock);
 #endif
@@ -1256,6 +1294,10 @@ uint16_t qdma_xmit_pkts_st(void *tx_queue, struct rte_mbuf **tx_pkts,
 			txq->queue_id, 0, &txq->q_pidx_info);
 
 		txq->tx_desc_pend = 0;
+#ifdef LATENCY_MEASUREMENT
+		/* start the timer */
+		txq->qstats.pkt_lat.prev = rte_get_timer_cycles();
+#endif
 	}
 #if (MIN_TX_PIDX_UPDATE_THRESHOLD > 1)
 	rte_spinlock_unlock(&txq->pidx_update_lock);
@@ -1266,15 +1308,14 @@ uint16_t qdma_xmit_pkts_st(void *tx_queue, struct rte_mbuf **tx_pkts,
 }
 
 /* Transmit API for Memory mapped mode */
-uint16_t qdma_xmit_pkts_mm(void *tx_queue, struct rte_mbuf **tx_pkts,
-			uint16_t nb_pkts)
+uint16_t qdma_xmit_pkts_mm(struct qdma_tx_queue *txq,
+		struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
 	struct rte_mbuf *mb;
 	uint32_t count, id;
 	uint64_t	len = 0;
 	int avail, in_use;
 	int ret;
-	struct qdma_tx_queue *txq = tx_queue;
 	struct qdma_pci_dev *qdma_dev = txq->dev->data->dev_private;
 	uint16_t cidx = 0;
 
@@ -1381,9 +1422,9 @@ uint16_t qdma_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		return 0;
 
 	if (txq->st_mode)
-		count =	qdma_xmit_pkts_st(tx_queue, tx_pkts, nb_pkts);
+		count =	qdma_xmit_pkts_st(txq, tx_pkts, nb_pkts);
 	else
-		count =	qdma_xmit_pkts_mm(tx_queue, tx_pkts, nb_pkts);
+		count =	qdma_xmit_pkts_mm(txq, tx_pkts, nb_pkts);
 
 	return count;
 }
diff --git a/QDMA/DPDK/drivers/net/qdma/qdma_rxtx_vec_sse.c b/QDMA/DPDK/drivers/net/qdma/qdma_rxtx_vec_sse.c
index 7dae459..34809dd 100755
--- a/QDMA/DPDK/drivers/net/qdma/qdma_rxtx_vec_sse.c
+++ b/QDMA/DPDK/drivers/net/qdma/qdma_rxtx_vec_sse.c
@@ -418,8 +418,10 @@ static int rearm_c2h_ring_vec(struct qdma_rx_queue *rxq, uint16_t num_desc)
 	 */
 	if ((id + num_desc) < (rxq->nb_rx_desc - 1))
 		rearm_descs = num_desc;
-	else
+	else {
 		rearm_descs = (rxq->nb_rx_desc - 1) - id;
+		rxq->qstats.ring_wrap_cnt++;
+	}
 
 	/* allocate new buffer */
 	if (rte_mempool_get_bulk(rxq->mb_pool, (void *)&rxq->sw_ring[id],
@@ -493,7 +495,10 @@ static int rearm_c2h_ring_vec(struct qdma_rx_queue *rxq, uint16_t num_desc)
 			qdma_dev->hw_access->qdma_queue_pidx_update(rxq->dev,
 				qdma_dev->is_vf,
 				rxq->queue_id, 1, &rxq->q_pidx_info);
-
+#ifdef LATENCY_MEASUREMENT
+			/* start the timer */
+			rxq->qstats.pkt_lat.prev = rte_get_timer_cycles();
+#endif
 			return -1;
 		}
 
@@ -525,11 +530,15 @@ static int rearm_c2h_ring_vec(struct qdma_rx_queue *rxq, uint16_t num_desc)
 		qdma_dev->is_vf,
 		rxq->queue_id, 1, &rxq->q_pidx_info);
 
+#ifdef LATENCY_MEASUREMENT
+	/* start the timer */
+	rxq->qstats.pkt_lat.prev = rte_get_timer_cycles();
+#endif
 	return 0;
 }
 
 /* Receive API for Streaming mode */
-uint16_t qdma_recv_pkts_st_vec(void *rx_queue,
+uint16_t qdma_recv_pkts_st_vec(struct qdma_rx_queue *rxq,
 		struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 {
 	uint16_t count_pkts;
@@ -538,7 +547,6 @@ uint16_t qdma_recv_pkts_st_vec(void *rx_queue,
 	uint16_t rx_cmpt_tail = 0;
 	uint16_t cmpt_pidx, c2h_pidx;
 	uint16_t pending_desc;
-	struct qdma_rx_queue *rxq = rx_queue;
 #ifdef TEST_64B_DESC_BYPASS
 	int bypass_desc_sz_idx = qmda_get_desc_sz_idx(rxq->bypass_desc_sz);
 #endif
@@ -562,6 +570,16 @@ uint16_t qdma_recv_pkts_st_vec(void *rx_queue,
 #endif
 	cmpt_pidx = wb_status->pidx;
 
+#ifdef LATENCY_MEASUREMENT
+	if (cmpt_pidx != rxq->qstats.wrb_pidx) {
+		/* stop the timer */
+		rxq->qstats.pkt_lat.curr = rte_get_timer_cycles();
+		c2h_pidx_to_cmpt_pidx_lat[rxq->queue_id][rxq->qstats.lat_cnt] =
+			rxq->qstats.pkt_lat.curr - rxq->qstats.pkt_lat.prev;
+		rxq->qstats.lat_cnt = ((rxq->qstats.lat_cnt + 1) % LATENCY_CNT);
+	}
+#endif
+
 	if (rx_cmpt_tail < cmpt_pidx)
 		nb_pkts_avail = cmpt_pidx - rx_cmpt_tail;
 	else if (rx_cmpt_tail > cmpt_pidx)
@@ -609,6 +627,14 @@ uint16_t qdma_recv_pkts_st_vec(void *rx_queue,
 		pending_desc = rxq->nb_rx_desc - 2 + rxq->rx_tail -
 				c2h_pidx;
 
+	rxq->qstats.pidx = rxq->q_pidx_info.pidx;
+	rxq->qstats.wrb_pidx = rxq->wb_status->pidx;
+	rxq->qstats.wrb_cidx = rxq->wb_status->cidx;
+	rxq->qstats.rxq_cmpt_tail = rx_cmpt_tail;
+	rxq->qstats.pending_desc = pending_desc;
+	rxq->qstats.mbuf_avail_cnt = rte_mempool_avail_count(rxq->mb_pool);
+	rxq->qstats.mbuf_in_use_cnt = rte_mempool_in_use_count(rxq->mb_pool);
+
 	/* Batch the PIDX updates, this minimizes overhead on
 	 * descriptor engine
 	 */
@@ -651,15 +677,15 @@ uint16_t qdma_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 	uint32_t count;
 
 	if (rxq->st_mode)
-		count = qdma_recv_pkts_st_vec(rx_queue, rx_pkts, nb_pkts);
+		count = qdma_recv_pkts_st_vec(rxq, rx_pkts, nb_pkts);
 	else
-		count = qdma_recv_pkts_mm(rx_queue, rx_pkts, nb_pkts);
+		count = qdma_recv_pkts_mm(rxq, rx_pkts, nb_pkts);
 
 	return count;
 }
 
 /* Transmit API for Streaming mode */
-uint16_t qdma_xmit_pkts_st_vec(void *tx_queue,
+uint16_t qdma_xmit_pkts_st_vec(struct qdma_tx_queue *txq,
 		struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
 	struct rte_mbuf *mb;
@@ -667,7 +693,6 @@ uint16_t qdma_xmit_pkts_st_vec(void *tx_queue,
 	int avail, in_use, ret, nsegs;
 	uint16_t cidx = 0;
 	uint16_t count = 0, id;
-	struct qdma_tx_queue *txq = tx_queue;
 	struct qdma_pci_dev *qdma_dev = txq->dev->data->dev_private;
 
 #ifdef TEST_64B_DESC_BYPASS
@@ -687,6 +712,33 @@ uint16_t qdma_xmit_pkts_st_vec(void *tx_queue,
 	rte_rmb();
 
 	cidx = txq->wb_status->cidx;
+
+#ifdef LATENCY_MEASUREMENT
+	uint32_t cidx_cnt = 0;
+	if (cidx != txq->qstats.wrb_cidx) {
+		if ((cidx - txq->qstats.wrb_cidx) > 0) {
+			cidx_cnt = cidx - txq->qstats.wrb_cidx;
+
+			if (cidx_cnt <= 8)
+				txq->qstats.wrb_cidx_cnt_lt_8++;
+			else if (cidx_cnt > 8 && cidx_cnt <= 32)
+				txq->qstats.wrb_cidx_cnt_8_to_32++;
+			else if (cidx_cnt > 32 && cidx_cnt <= 64)
+				txq->qstats.wrb_cidx_cnt_32_to_64++;
+			else
+				txq->qstats.wrb_cidx_cnt_gt_64++;
+		}
+
+		/* stop the timer */
+		txq->qstats.pkt_lat.curr = rte_get_timer_cycles();
+		h2c_pidx_to_hw_cidx_lat[txq->queue_id][txq->qstats.lat_cnt] =
+			txq->qstats.pkt_lat.curr - txq->qstats.pkt_lat.prev;
+		txq->qstats.lat_cnt = ((txq->qstats.lat_cnt + 1) % LATENCY_CNT);
+	} else {
+		txq->qstats.wrb_cidx_cnt_no_change++;
+	}
+#endif
+
 	PMD_DRV_LOG(DEBUG, "Xmit start on tx queue-id:%d, tail index:%d\n",
 			txq->queue_id, id);
 
@@ -705,6 +757,7 @@ uint16_t qdma_xmit_pkts_st_vec(void *tx_queue,
 	avail = txq->nb_tx_desc - 2 - in_use;
 
 	if (unlikely(!avail)) {
+		txq->qstats.txq_full_cnt++;
 		PMD_DRV_LOG(DEBUG, "Tx queue full, in_use = %d", in_use);
 		return 0;
 	}
@@ -733,6 +786,12 @@ uint16_t qdma_xmit_pkts_st_vec(void *tx_queue,
 	txq->stats.pkts += count;
 	txq->stats.bytes += pkt_len;
 
+	txq->qstats.pidx = id;
+	txq->qstats.wrb_cidx = cidx;
+	txq->qstats.txq_tail = txq->tx_fl_tail;
+	txq->qstats.in_use_desc = in_use;
+	txq->qstats.nb_pkts = nb_pkts;
+
 #if (MIN_TX_PIDX_UPDATE_THRESHOLD > 1)
 	rte_spinlock_lock(&txq->pidx_update_lock);
 #endif
@@ -747,6 +806,11 @@ uint16_t qdma_xmit_pkts_st_vec(void *tx_queue,
 			txq->queue_id, 0, &txq->q_pidx_info);
 
 		txq->tx_desc_pend = 0;
+
+#ifdef LATENCY_MEASUREMENT
+		/* start the timer */
+		txq->qstats.pkt_lat.prev = rte_get_timer_cycles();
+#endif
 	}
 #if (MIN_TX_PIDX_UPDATE_THRESHOLD > 1)
 	rte_spinlock_unlock(&txq->pidx_update_lock);
@@ -779,9 +843,9 @@ uint16_t qdma_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 		return 0;
 
 	if (txq->st_mode)
-		count =	qdma_xmit_pkts_st_vec(tx_queue, tx_pkts, nb_pkts);
+		count =	qdma_xmit_pkts_st_vec(txq, tx_pkts, nb_pkts);
 	else
-		count =	qdma_xmit_pkts_mm(tx_queue, tx_pkts, nb_pkts);
+		count =	qdma_xmit_pkts_mm(txq, tx_pkts, nb_pkts);
 
 	return count;
 }
diff --git a/QDMA/DPDK/drivers/net/qdma/qdma_vf_ethdev.c b/QDMA/DPDK/drivers/net/qdma/qdma_vf_ethdev.c
index 4c4fce5..f055e6d 100755
--- a/QDMA/DPDK/drivers/net/qdma/qdma_vf_ethdev.c
+++ b/QDMA/DPDK/drivers/net/qdma/qdma_vf_ethdev.c
@@ -1139,18 +1139,8 @@ static int eth_qdma_vf_dev_init(struct rte_eth_dev *dev)
 		return -EINVAL;
 	}
 
-	if (dma_priv->dev_cap.cmpt_trig_count_timer) {
-		/* Setting default Mode to
-		 * RTE_PMD_QDMA_TRIG_MODE_USER_TIMER_COUNT
-		 */
-		dma_priv->trigger_mode =
-				RTE_PMD_QDMA_TRIG_MODE_USER_TIMER_COUNT;
-	} else {
-		/* Setting default Mode to RTE_PMD_QDMA_TRIG_MODE_USER_TIMER */
-		dma_priv->trigger_mode = RTE_PMD_QDMA_TRIG_MODE_USER_TIMER;
-	}
-	if (dma_priv->trigger_mode == RTE_PMD_QDMA_TRIG_MODE_USER_TIMER_COUNT)
-		dma_priv->timer_count = DEFAULT_TIMER_CNT_TRIG_MODE_COUNT_TIMER;
+	/* Setting default Mode to RTE_PMD_QDMA_TRIG_MODE_USER_TIMER */
+	dma_priv->trigger_mode = RTE_PMD_QDMA_TRIG_MODE_USER_TIMER;
 
 	dma_priv->reset_state = RESET_STATE_IDLE;
 
diff --git a/QDMA/DPDK/drivers/net/qdma/qdma_xdebug.c b/QDMA/DPDK/drivers/net/qdma/qdma_xdebug.c
index c0f48b9..1811f21 100755
--- a/QDMA/DPDK/drivers/net/qdma/qdma_xdebug.c
+++ b/QDMA/DPDK/drivers/net/qdma/qdma_xdebug.c
@@ -445,6 +445,221 @@ static int qdma_device_dump(uint8_t port_id)
 	return 0;
 }
 
+static int qdma_tx_qstats_dump(struct qdma_tx_queue *txq)
+{
+	if (txq == NULL) {
+		xdebug_info("Caught NULL pointer for queue_id: %d\n",
+			txq->queue_id);
+		return -1;
+	}
+
+	xdebug_info("\n***** QDMA Tx Qstats on port_id: %d for qid: %d *****\n",
+		txq->port_id, txq->queue_id);
+	xdebug_info("\t\t txq_pidx             :%u\n",
+			txq->qstats.pidx);
+	xdebug_info("\t\t txq_wrb_cidx         :%u\n",
+			txq->qstats.wrb_cidx);
+	xdebug_info("\t\t txq_tail             :%u\n",
+			txq->qstats.txq_tail);
+	xdebug_info("\t\t in_use_desc          :%u\n",
+			txq->qstats.in_use_desc);
+	xdebug_info("\t\t nb_pkts              :%u\n",
+			txq->qstats.nb_pkts);
+	xdebug_info("\t\t ring_wrap_cnt        :%u\n",
+			txq->qstats.ring_wrap_cnt);
+	xdebug_info("\t\t txq_full_cnt         :%u\n",
+			txq->qstats.txq_full_cnt);
+
+#ifdef LATENCY_MEASUREMENT
+	xdebug_info("\n\t***** wrb cidx counts *****\n");
+	xdebug_info("\t\t wrb_cidx_cnt_no_change     :%u\n",
+			txq->qstats.wrb_cidx_cnt_no_change);
+	xdebug_info("\t\t wrb_cidx_cnt_lt_8          :%u\n",
+			txq->qstats.wrb_cidx_cnt_lt_8);
+	xdebug_info("\t\t wrb_cidx_cnt_8_to_32       :%u\n",
+			txq->qstats.wrb_cidx_cnt_8_to_32);
+	xdebug_info("\t\t wrb_cidx_cnt_32_to_64      :%u\n",
+			txq->qstats.wrb_cidx_cnt_32_to_64);
+	xdebug_info("\t\t wrb_cidx_cnt_gt_64         :%u\n",
+			txq->qstats.wrb_cidx_cnt_gt_64);
+#endif
+
+	return 0;
+}
+
+static int qdma_tx_qstats_latency_dump(struct rte_eth_dev *dev, uint16_t queue)
+{
+	struct qdma_tx_queue *txq;
+	int ret;
+#ifdef LATENCY_MEASUREMENT
+	double pkt_lat_val_ms = 0;
+	double txq_avg_lat_ms = 0;
+	const struct rte_memzone *memzone;
+	double (*lat_data)[LATENCY_CNT] = NULL;
+	uint64_t hz;
+	int i;
+#endif
+
+	if (dev == NULL) {
+		xdebug_error("Caught NULL pointer for dev\n");
+		return -EINVAL;
+	}
+
+	if (queue >= dev->data->nb_tx_queues) {
+		xdebug_info("TX queue_id=%d not configured\n", queue);
+		return -EINVAL;
+	}
+
+	txq = (struct qdma_tx_queue *)dev->data->tx_queues[queue];
+	if (txq == NULL) {
+		xdebug_info("Caught NULL pointer for queue_id: %d\n", queue);
+		return -1;
+	}
+
+	if (txq->status != RTE_ETH_QUEUE_STATE_STARTED) {
+		xdebug_info("Queue_id %d is not yet started\n", txq->queue_id);
+		return -1;
+	}
+
+	ret = qdma_tx_qstats_dump(txq);
+	if (ret < 0) {
+		xdebug_info("Failed to dump Tx qstats for queue_id: %d\n",
+			queue);
+		return -1;
+	}
+
+#ifdef LATENCY_MEASUREMENT
+	/* Find the memzone created by the primary application */
+	memzone = rte_memzone_lookup("TXQ_LAT_BUFFER_ZONE");
+	if (memzone == NULL) {
+		/* Handle memzone lookup failure */
+		return -1;
+	}
+
+	/* Get the virtual address of the shared rxq latency buffer memory */
+	lat_data = memzone->addr;
+
+	xdebug_info("\n\t**** TxQ SW PIDX to HW CIDX Latency for qid: %d ****\n",
+			queue);
+	hz = rte_get_timer_hz();
+	for (i = 0; i < LATENCY_CNT; i++) {
+		pkt_lat_val_ms =
+			((double)lat_data[queue][i]*1000000/hz);
+		txq_avg_lat_ms += pkt_lat_val_ms;
+		xdebug_info("\t\t h2c_sw_pidx_to_hw_cidx_latency[%d][%d] : %f ms\n",
+			queue, i, pkt_lat_val_ms);
+	}
+
+	xdebug_info(
+			"\n\t Avg h2c_sw_pidx_to_hw_cidx_latency for qid:%d is %f ms\n",
+			queue, (txq_avg_lat_ms/LATENCY_CNT));
+#endif
+
+	return 0;
+}
+
+static int qdma_rx_qstats_dump(struct qdma_rx_queue *rxq)
+{
+	if (rxq == NULL) {
+		xdebug_info("Caught NULL pointer for queue_id: %d\n",
+			rxq->queue_id);
+		return -1;
+	}
+
+	xdebug_info("\n***** QDMA Rx Qstats on port_id: %d for qid: %d *****\n",
+		rxq->port_id, rxq->queue_id);
+
+	xdebug_info("\t\t rxq_pidx             :%u\n",
+			rxq->qstats.pidx);
+	xdebug_info("\t\t rxq_wrb_pidx         :%u\n",
+			rxq->qstats.wrb_pidx);
+	xdebug_info("\t\t rxq_wrb_cidx         :%u\n",
+			rxq->qstats.wrb_cidx);
+	xdebug_info("\t\t rxq_cmpt_tail        :%u\n",
+			rxq->qstats.rxq_cmpt_tail);
+	xdebug_info("\t\t pending_desc         :%u\n",
+			rxq->qstats.pending_desc);
+	xdebug_info("\t\t ring_wrap_cnt        :%u\n",
+			rxq->qstats.ring_wrap_cnt);
+	xdebug_info("\t\t mbuf_avail_cnt       :%u\n",
+			rxq->qstats.mbuf_avail_cnt);
+	xdebug_info("\t\t mbuf_in_use_cnt      :%u\n",
+			rxq->qstats.mbuf_in_use_cnt);
+
+	return 0;
+}
+
+static int qdma_rx_qstats_latency_dump(struct rte_eth_dev *dev, uint16_t queue)
+{
+	struct qdma_rx_queue *rxq;
+	int ret;
+#ifdef LATENCY_MEASUREMENT
+	double pkt_lat_val_ms = 0;
+	double rxq_avg_lat_ms = 0;
+	const struct rte_memzone *memzone;
+	double (*lat_data)[LATENCY_CNT] = NULL;
+	uint64_t hz;
+	int i;
+#endif
+
+	if (dev == NULL) {
+		xdebug_error("Caught NULL pointer for dev\n");
+		return -EINVAL;
+	}
+
+	if (queue >= dev->data->nb_rx_queues) {
+		xdebug_info("RX queue_id=%d not configured\n", queue);
+		return -EINVAL;
+	}
+
+	rxq = (struct qdma_rx_queue *)dev->data->rx_queues[queue];
+	if (rxq == NULL) {
+		xdebug_info("Caught NULL pointer for queue_id: %d\n", queue);
+		return -1;
+	}
+
+	if (rxq->status != RTE_ETH_QUEUE_STATE_STARTED) {
+		xdebug_info("Queue_id %d is not yet started\n", rxq->queue_id);
+		return -1;
+	}
+
+	ret = qdma_rx_qstats_dump(rxq);
+	if (ret < 0) {
+		xdebug_info("Failed to dump Rx qstats for queue_id: %d\n",
+			queue);
+		return -1;
+	}
+
+#ifdef LATENCY_MEASUREMENT
+	/* Find the memzone created by the primary application */
+	memzone = rte_memzone_lookup("RXQ_LAT_BUFFER_ZONE");
+	if (memzone == NULL) {
+		/* Handle memzone lookup failure */
+		return -1;
+	}
+
+	/* Get the virtual address of the shared txq latency buffer memory */
+	lat_data = memzone->addr;
+
+	xdebug_info("\n\t*** RxQ SW PIDX to CMPT PIDX Latency for qid: %d ***\n",
+		queue);
+	hz = rte_get_timer_hz();
+	for (i = 0; i < LATENCY_CNT; i++) {
+		pkt_lat_val_ms =
+			((double)lat_data[queue][i]*1000000/hz);
+		rxq_avg_lat_ms += pkt_lat_val_ms;
+		xdebug_info("\t\t c2h_sw_pidx_to_cmpt_pidx_latency[%d][%d] : %f ms\n",
+			queue, i, pkt_lat_val_ms);
+	}
+
+	xdebug_info(
+			"\n\t Avg c2h_sw_pidx_to_cmpt_pidx_latency for qid:%d is %f ms\n",
+			queue, (rxq_avg_lat_ms/LATENCY_CNT));
+#endif
+
+	return 0;
+}
+
 static int qdma_descq_context_read_vf(struct rte_eth_dev *dev,
 	unsigned int qid_hw, bool st_mode,
 	enum qdma_dev_q_type q_type,
@@ -1033,6 +1248,126 @@ int rte_pmd_qdma_dbg_qdevice(uint8_t port_id)
 	return 0;
 }
 
+int rte_pmd_qdma_qstats(uint8_t port_id, uint16_t queue)
+{
+	struct rte_eth_dev *dev;
+	int ret;
+
+	if (port_id >= rte_eth_dev_count_avail()) {
+		xdebug_error("Wrong port id %d\n", port_id);
+		return -EINVAL;
+	}
+
+	dev = &rte_eth_devices[port_id];
+	if (dev == NULL) {
+		xdebug_error("Caught NULL pointer for dev\n");
+		return -EINVAL;
+	}
+
+	ret = qdma_tx_qstats_latency_dump(dev, queue);
+	if (ret) {
+		xdebug_error("Error dumping QDMA Tx queue stats\n");
+		return ret;
+	}
+
+	ret = qdma_rx_qstats_latency_dump(dev, queue);
+	if (ret) {
+		xdebug_error("Error dumping QDMA Rx queue stats\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+int qdma_tx_qstats_clear(struct rte_eth_dev *dev, uint16_t queue)
+{
+	struct qdma_tx_queue *txq;
+	int ret;
+
+	if (queue >= dev->data->nb_tx_queues) {
+		xdebug_info("TX queue_id=%d not configured\n", queue);
+		return -EINVAL;
+	}
+
+	txq = (struct qdma_tx_queue *)dev->data->tx_queues[queue];
+	if (txq == NULL) {
+		xdebug_info("Caught NULL pointer for queue_id: %d\n", queue);
+		return -EINVAL;
+	}
+
+	memset(&txq->qstats, 0, sizeof(struct qdma_txq_stats));
+
+	xdebug_info("\nCleared Tx queue stats for  qid: %d\n",
+		queue);
+
+	ret = qdma_tx_qstats_dump(txq);
+	if (ret < 0) {
+		xdebug_info("Failed to dump Tx qstats for queue_id: %d\n",
+			queue);
+		return -1;
+	}
+
+	return 0;
+}
+
+int qdma_rx_qstats_clear(struct rte_eth_dev *dev, uint16_t queue)
+{
+	struct qdma_rx_queue *rxq;
+	int ret;
+
+	if (queue >= dev->data->nb_rx_queues) {
+		xdebug_info("RX queue_id=%d not configured\n", queue);
+		return -EINVAL;
+	}
+
+	rxq = (struct qdma_rx_queue *)dev->data->rx_queues[queue];
+	if (rxq == NULL) {
+		xdebug_info("Caught NULL pointer for queue_id: %d\n", queue);
+		return -EINVAL;
+	}
+
+	memset(&rxq->qstats, 0, sizeof(struct qdma_rxq_stats));
+
+	xdebug_info("\nCleared Rx queue stats for  qid: %d\n",
+		queue);
+
+	ret = qdma_rx_qstats_dump(rxq);
+	if (ret < 0) {
+		xdebug_info("Failed to dump Rx qstats for queue_id: %d\n",
+			queue);
+		return -1;
+	}
+
+	return 0;
+}
+
+int rte_pmd_qdma_qstats_clear(uint8_t port_id, uint16_t queue)
+{
+	struct rte_eth_dev *dev;
+	int ret;
+
+	if (port_id >= rte_eth_dev_count_avail()) {
+		xdebug_error("Wrong port id %d\n", port_id);
+		return -EINVAL;
+	}
+
+	dev = &rte_eth_devices[port_id];
+
+	ret = qdma_tx_qstats_clear(dev, queue);
+	if (ret) {
+		xdebug_error("Failed to clear QDMA Tx queue stats\n");
+		return ret;
+	}
+
+	ret = qdma_rx_qstats_clear(dev, queue);
+	if (ret) {
+		xdebug_error("Failed to clear QDMA Rx queue stats\n");
+		return ret;
+	}
+
+	return 0;
+}
+
 int rte_pmd_qdma_dbg_qinfo(uint8_t port_id, uint16_t queue)
 {
 	struct rte_eth_dev *dev;
diff --git a/QDMA/DPDK/drivers/net/qdma/rte_pmd_qdma.h b/QDMA/DPDK/drivers/net/qdma/rte_pmd_qdma.h
index e1774c5..8a6e95c 100755
--- a/QDMA/DPDK/drivers/net/qdma/rte_pmd_qdma.h
+++ b/QDMA/DPDK/drivers/net/qdma/rte_pmd_qdma.h
@@ -335,6 +335,34 @@ int rte_pmd_qdma_dbg_reg_info_dump(uint8_t port_id,
  ******************************************************************************/
 int rte_pmd_qdma_dbg_qdevice(uint8_t port_id);
 
+/******************************************************************************/
+/**
+ * Dumps the qdma Tx and Rx queue stats for the given queue ID
+ *
+ * @param	port_id Port ID
+ * @param	queue  Queue ID relative to the Port
+ *
+ * @return	'0' on success and "< 0" on failure
+ *
+ * @note	None
+ * @ingroup rte_pmd_qdma_func
+ ******************************************************************************/
+int rte_pmd_qdma_qstats(uint8_t port_id, uint16_t queue);
+
+/******************************************************************************/
+/**
+ * Clear the qdma Tx and Rx queue stats for the given queue ID
+ *
+ * @param	port_id Port ID
+ * @param	queue  Queue ID relative to the Port
+ *
+ * @return	'0' on success and "< 0" on failure
+ *
+ * @note	None
+ * @ingroup rte_pmd_qdma_func
+ ******************************************************************************/
+int rte_pmd_qdma_qstats_clear(uint8_t port_id, uint16_t queue);
+
 /******************************************************************************/
 /**
  * Dumps the queue contexts and queue specific SW
diff --git a/QDMA/DPDK/drivers/net/qdma/version.h b/QDMA/DPDK/drivers/net/qdma/version.h
index 9b4233c..e17690d 100755
--- a/QDMA/DPDK/drivers/net/qdma/version.h
+++ b/QDMA/DPDK/drivers/net/qdma/version.h
@@ -38,8 +38,8 @@
 #define qdma_stringify(x...)	qdma_stringify1(x)
 
 #define QDMA_PMD_MAJOR		2023
-#define QDMA_PMD_MINOR		1
-#define QDMA_PMD_PATCHLEVEL	2
+#define QDMA_PMD_MINOR		2
+#define QDMA_PMD_PATCHLEVEL	0
 
 #define QDMA_PMD_VERSION      \
 	qdma_stringify(QDMA_PMD_MAJOR) "." \
diff --git a/QDMA/DPDK/drivers/net/qdma/version.map b/QDMA/DPDK/drivers/net/qdma/version.map
index 1b9c22d..2944145 100755
--- a/QDMA/DPDK/drivers/net/qdma/version.map
+++ b/QDMA/DPDK/drivers/net/qdma/version.map
@@ -61,10 +61,11 @@ DPDK_21 {
 	rte_pmd_qdma_mm_cmpt_process;
 	rte_pmd_qdma_dev_cmptq_stop;
 	rte_pmd_qdma_dbg_qdevice;
+	rte_pmd_qdma_qstats;
+	rte_pmd_qdma_qstats_clear;
 	rte_pmd_qdma_dev_close;
 	rte_pmd_qdma_dev_fp_ops_config;
 
-
 	local: *;
 };
 
@@ -98,6 +99,8 @@ DPDK_22 {
         rte_pmd_qdma_mm_cmpt_process;
         rte_pmd_qdma_dev_cmptq_stop;
         rte_pmd_qdma_dbg_qdevice;
+        rte_pmd_qdma_qstats;
+        rte_pmd_qdma_qstats_clear;
         rte_pmd_qdma_dev_close;
         rte_pmd_qdma_dev_fp_ops_config;
         rte_pmd_qdma_compat_pci_write_reg;
@@ -108,10 +111,9 @@ DPDK_22 {
         rte_pmd_qdma_get_dev_id;
         rte_pmd_qdma_dev_started;
         rte_pmd_qdma_eth_dev_to_pci;
-	rte_pmd_qdma_get_device;
+		rte_pmd_qdma_get_device;
         rte_pmd_qdma_validate_dev;
 
-
         local: *;
 };
 
@@ -145,6 +147,8 @@ DPDK_23 {
         rte_pmd_qdma_mm_cmpt_process;
         rte_pmd_qdma_dev_cmptq_stop;
         rte_pmd_qdma_dbg_qdevice;
+        rte_pmd_qdma_qstats;
+        rte_pmd_qdma_qstats_clear;
         rte_pmd_qdma_dev_close;
         rte_pmd_qdma_dev_fp_ops_config;
         rte_pmd_qdma_compat_pci_write_reg;
@@ -155,9 +159,8 @@ DPDK_23 {
         rte_pmd_qdma_get_dev_id;
         rte_pmd_qdma_dev_started;
         rte_pmd_qdma_eth_dev_to_pci;
-	rte_pmd_qdma_get_device;
+		rte_pmd_qdma_get_device;
         rte_pmd_qdma_validate_dev;
 
-
         local: *;
 };
diff --git a/QDMA/DPDK/examples/qdma_testapp/testapp.c b/QDMA/DPDK/examples/qdma_testapp/testapp.c
index 8ce09c8..e8d7703 100755
--- a/QDMA/DPDK/examples/qdma_testapp/testapp.c
+++ b/QDMA/DPDK/examples/qdma_testapp/testapp.c
@@ -219,6 +219,10 @@ int do_recv_st(int port_id, int fd, int queueid, int input_size)
 	user_bar_idx = pinfo[port_id].user_bar_idx;
 	PciWrite(user_bar_idx, C2H_ST_QID_REG, (queueid + qbase), port_id);
 
+	reg_val = PciRead(user_bar_idx, C2H_CONTROL_REG, port_id);
+	reg_val &= C2H_CONTROL_REG_MASK;
+	loopback_en = reg_val & ST_LOOPBACK_EN;
+
 	/* As per  hardware design a single completion will point to atmost
 	 * 7 descriptors. So If the size of the buffer in descriptor is 4KB ,
 	 * then a single completion which corresponds a packet can  give you
@@ -231,7 +235,14 @@ int do_recv_st(int port_id, int fd, int queueid, int input_size)
 	 * packets, which needs to be combined as one in application.
 	 */
 
-	max_completion_size = pinfo[port_id].buff_size * 7;
+	if (!loopback_en)
+		max_completion_size = pinfo[port_id].buff_size * 7;
+	else {
+		/* For loopback case, each packet handles 4KB only,
+		 * so limiting to buffer size.
+		 */
+		max_completion_size = pinfo[port_id].buff_size;
+	}
 
 	/* Calculate number of packets to receive and programming AXI Master Lite bar(user bar) */
 	if (input_size == 0) /* zerobyte support uses one descriptor */
@@ -247,9 +258,6 @@ int do_recv_st(int port_id, int fd, int queueid, int input_size)
 		only_pkt = 1;
 	}
 
-	reg_val = PciRead(user_bar_idx, C2H_CONTROL_REG, port_id);
-	reg_val &= C2H_CONTROL_REG_MASK;
-	loopback_en = reg_val & ST_LOOPBACK_EN;
 	if (!loopback_en) {
 		PciWrite(user_bar_idx, C2H_PACKET_COUNT_REG, num_pkts, port_id);
 
@@ -877,6 +885,14 @@ int port_init(int port_id, int num_queues, int st_queues,
 	 */
 	nb_buff += ((NUM_TX_PKTS) * num_queues);
 
+	/*
+	* rte_mempool_create_empty() has sanity check to refuse large cache
+	* size compared to the number of elements.
+	* CACHE_FLUSHTHRESH_MULTIPLIER (1.5) is defined in a C file, so using a
+	* constant number 2 instead.
+	*/
+	nb_buff = RTE_MAX(nb_buff, MP_CACHE_SZ * 2);
+
 	mbuf_pool = rte_pktmbuf_pool_create(pinfo[port_id].mem_pool, nb_buff,
 			MP_CACHE_SZ, 0, buff_size +
 			RTE_PKTMBUF_HEADROOM,
diff --git a/QDMA/DPDK/tools/0001-Add-QDMA-xdebug-to-proc-info-of-dpdk-22.11.patch b/QDMA/DPDK/tools/0001-Add-QDMA-xdebug-to-proc-info-of-dpdk-22.11.patch
new file mode 100755
index 0000000..e3501a4
--- /dev/null
+++ b/QDMA/DPDK/tools/0001-Add-QDMA-xdebug-to-proc-info-of-dpdk-22.11.patch
@@ -0,0 +1,362 @@
+From aa6931cc32ac9c7baea453717a34816f01803536 Mon Sep 17 00:00:00 2001
+From: Suryanarayana Raju Sangani <ssangani@xilinx.com>
+Date: Mon, 19 Jun 2023 12:44:20 +0530
+Subject: [PATCH] Add QDMA xdebug to proc-info of dpdk-22.11
+
+Signed-off-by: Nikhil agarwal <nagarwal@xilinx.com>
+Signed-off-by: Prasad Pardeshi <prasadp@xilinx.com>
+Signed-off-by: Sangani Suryanarayana Raju <ssangani@xilinx.com>
+---
+ app/proc-info/main.c      | 195 ++++++++++++++++++++++++++++++++++++--
+ app/proc-info/meson.build |   2 +-
+ app/test-pmd/config.c     |   2 +
+ 3 files changed, 189 insertions(+), 10 deletions(-)
+ mode change 100644 => 100755 app/proc-info/main.c
+ mode change 100644 => 100755 app/proc-info/meson.build
+ mode change 100644 => 100755 app/test-pmd/config.c
+
+diff --git a/app/proc-info/main.c b/app/proc-info/main.c
+old mode 100644
+new mode 100755
+index 53e852a..eeb20e7
+--- a/app/proc-info/main.c
++++ b/app/proc-info/main.c
+@@ -32,6 +32,7 @@
+ #ifdef RTE_LIB_METRICS
+ #include <rte_metrics.h>
+ #endif
++#include <rte_pmd_qdma.h>
+ #include <rte_cycles.h>
+ #ifdef RTE_LIB_SECURITY
+ #include <rte_security.h>
+@@ -55,9 +56,31 @@
+ 	STATS_BDR_FMT, s, w, STATS_BDR_FMT)
+ 
+ /* mask of enabled ports */
+-static unsigned long enabled_port_mask;
++static uint64_t enabled_port_mask;
++/**< QID for queue context */
++static uint32_t qid;
++/**< desc dump type */
++static uint32_t desc_type;
++/**< QID for desc start value */
++static uint32_t start;
++/**< QID for desc end value */
++static uint32_t end;
++/**< Enable desc dump. */
++static uint32_t enable_desc_dump;
+ /* Enable stats. */
+ static uint32_t enable_stats;
++/**< Enable Device Structs */
++static uint32_t qdma_device;
++/**< Enable QDMA Tx and Rx queue stats */
++static uint32_t qdma_qstats;
++/**< Clear QDMA Tx and Rx queue stats */
++static uint32_t qdma_qstats_clear;
++/**< Enable Queue context and Queue structs. */
++static uint32_t queue_info;
++/**< Enable register field information. */
++static uint32_t reg_info;
++/**< Register address */
++static uint32_t reg_addr;
+ /* Enable xstats. */
+ static uint32_t enable_xstats;
+ /* Enable collectd format */
+@@ -76,6 +99,8 @@ static uint32_t reset_stats;
+ static uint32_t reset_xstats;
+ /* Enable memory info. */
+ static uint32_t mem_info;
++/**< Enable Global Errors . */
++static uint32_t qdma_csr_info;
+ /* Enable displaying xstat name. */
+ static uint32_t enable_xstats_name;
+ static char *xstats_name;
+@@ -138,8 +163,15 @@ proc_info_usage(const char *prgname)
+ {
+ 	printf("%s [EAL options] -- -p PORTMASK\n"
+ 		"  -m to display DPDK memory zones, segments and TAILQ information\n"
++		"  -g to display DPDK QDMA PMD global CSR info\n"
+ 		"  -p PORTMASK: hexadecimal bitmask of ports to retrieve stats for\n"
+ 		"  --stats: to display port statistics, enabled by default\n"
++		"  --qdevice: to display QDMA device structure\n"
++		"  --qstats: to display QDMA Tx and Rx queue stats\n"
++		"  --qstats_clr: to clear QDMA Tx and Rx queue stats\n"
++		"  --qinfo: to display QDMA queue context and queue structures\n"
++		"  --reg-info {reg_addr}: to display field info of a register at reg_addr offset\n"
++		"  --desc-dump {rx | tx | cmpt}: to dump QDMA queue descriptors\n"
+ 		"  --xstats: to display extended port statistics, disabled by "
+ 			"default\n"
+ #ifdef RTE_LIB_METRICS
+@@ -179,7 +211,7 @@ proc_info_usage(const char *prgname)
+ /*
+  * Parse the portmask provided at run time.
+  */
+-static int
++static int64_t
+ parse_portmask(const char *portmask)
+ {
+ 	char *end = NULL;
+@@ -196,6 +228,71 @@ parse_portmask(const char *portmask)
+ 	return 0;
+ }
+ 
++/*
++ *  * Parse the QID provided at run time.
++ **/
++static int
++parse_int(const char *id)
++{
++	char *end = NULL;
++	unsigned long val;
++
++	errno = 0;
++
++	/* parse hexadecimal string */
++	val = strtoul(id, &end, 10);
++	if ((id[0] == '\0') || (end == NULL) || (*end != '\0') ||
++			(errno != 0)) {
++		printf("%s ERROR parsing the QID\n", __func__);
++		return -1;
++	}
++
++	return val;
++}
++
++/*
++ * Parse the register address provided at run time.
++ */
++static int32_t
++parse_reg_addr(const char *addr)
++{
++	char *end = NULL;
++
++	errno = 0;
++
++	/* parse hexadecimal string */
++	reg_addr = strtoul(addr, &end, 16);
++	if ((addr[0] == '\0') || (end == NULL) || (*end != '\0') ||
++		(errno != 0)) {
++		printf("%s ERROR parsing the register address\n", __func__);
++		return -1;
++	}
++
++	if (reg_addr == 0)
++		return -1;
++
++	return reg_addr;
++}
++
++/*
++ *  * Parse the desc dump type provided at run time.
++ **/
++static int
++parse_desc_type(const char *type)
++{
++	if (!strcmp(type, "rx")) {
++		desc_type = RTE_PMD_QDMA_XDEBUG_DESC_C2H;
++	} else if (!strcmp(type, "tx")) {
++		desc_type = RTE_PMD_QDMA_XDEBUG_DESC_H2C;
++	} else if (!strcmp(type, "cmpt")) {
++		desc_type = RTE_PMD_QDMA_XDEBUG_DESC_CMPT;
++	} else {
++		printf("%s ERROR parsing the desc type\n", __func__);
++		return -1;
++	}
++	return 0;
++}
++
+ /*
+  * Parse ids value list into array
+  */
+@@ -273,11 +370,17 @@ proc_info_preparse_args(int argc, char **argv)
+ static int
+ proc_info_parse_args(int argc, char **argv)
+ {
+-	int opt;
++	int opt, ret;
+ 	int option_index;
+ 	char *prgname = argv[0];
+ 	static struct option long_option[] = {
+ 		{"stats", 0, NULL, 0},
++		{"qdevice", 0, NULL, 0},
++		{"qstats", 0, NULL, 0},
++		{"qstats_clr", 0, NULL, 0},
++		{"qinfo", 0, NULL, 0},
++		{"reg-info", required_argument, NULL, 1},
++		{"desc-dump", required_argument, NULL, 1},
+ 		{"stats-reset", 0, NULL, 0},
+ 		{"xstats", 0, NULL, 0},
+ #ifdef RTE_LIB_METRICS
+@@ -309,7 +412,7 @@ proc_info_parse_args(int argc, char **argv)
+ 		proc_info_usage(prgname);
+ 
+ 	/* Parse command line */
+-	while ((opt = getopt_long(argc, argv, "p:m",
++	while ((opt = getopt_long(argc, argv, "p:mq:gs:e:",
+ 			long_option, &option_index)) != EOF) {
+ 		switch (opt) {
+ 		/* portmask */
+@@ -322,11 +425,54 @@ proc_info_parse_args(int argc, char **argv)
+ 		case 'm':
+ 			mem_info = 1;
+ 			break;
++		case 'g':
++			qdma_csr_info = 1;
++			break;
++		case 'q':
++			ret = parse_int(optarg);
++			if (ret < 0) {
++				printf("Invalid queue\n");
++				return -1;
++			}
++			qid = ret;
++			break;
++		case 's':
++			ret = parse_int(optarg);
++			if (ret < 0) {
++				printf("Invalid start value\n");
++				return -1;
++			}
++			start = ret;
++			break;
++		case 'e':
++			ret = parse_int(optarg);
++			if (ret < 0) {
++				printf("Invalid end value\n");
++				return -1;
++			}
++			end = ret;
++			break;
+ 		case 0:
+ 			/* Print stats */
+ 			if (!strncmp(long_option[option_index].name, "stats",
+ 					MAX_LONG_OPT_SZ))
+ 				enable_stats = 1;
++			/* Print qdma device */
++			if (!strncmp(long_option[option_index].name, "qdevice",
++					MAX_LONG_OPT_SZ))
++				qdma_device = 1;
++			/* Print qdma Tx and Rx queue stats */
++			if (!strncmp(long_option[option_index].name, "qstats",
++					MAX_LONG_OPT_SZ))
++				qdma_qstats = 1;
++                        /* Clear qdma Tx and Rx queue stats */
++                        if (!strncmp(long_option[option_index].name, "qstats_clr",
++                                        MAX_LONG_OPT_SZ))
++                                qdma_qstats_clear = 1;
++			/* Print queue context and queue Structures*/
++			if (!strncmp(long_option[option_index].name, "qinfo",
++					MAX_LONG_OPT_SZ))
++				queue_info = 1;
+ 			/* Print xstats */
+ 			else if (!strncmp(long_option[option_index].name, "xstats",
+ 					MAX_LONG_OPT_SZ))
+@@ -425,6 +571,24 @@ proc_info_parse_args(int argc, char **argv)
+ 					return -1;
+ 				}
+ 				enable_shw_tx_desc_dump = 1;
++			} else if (!strncmp(long_option[option_index].name,
++					"desc-dump",
++					MAX_LONG_OPT_SZ)) {
++				if (parse_desc_type(optarg) < 0) {
++					printf("desc-dump parse error.\n");
++					proc_info_usage(prgname);
++					return -1;
++				}
++				enable_desc_dump = 1;
++			} else if (!strncmp(long_option[option_index].name,
++					"reg-info",
++					MAX_LONG_OPT_SZ)) {
++				if (parse_reg_addr(optarg) < 0) {
++					printf("reg-info parse error.\n");
++					proc_info_usage(prgname);
++					return -1;
++				}
++				reg_info = 1;
+ 			}
+ 			break;
+ 		default:
+@@ -1791,7 +1955,6 @@ main(int argc, char **argv)
+ 
+ 	if (mem_info) {
+ 		meminfo_display();
+-		return 0;
+ 	}
+ 
+ 	nb_ports = rte_eth_dev_count_avail();
+@@ -1807,7 +1970,7 @@ main(int argc, char **argv)
+ 	for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+ 
+ 		/* Skip if port is not in mask */
+-		if ((enabled_port_mask & (1ul << i)) == 0)
++		if ((enabled_port_mask & ((uint64_t)1 << i)) == 0)
+ 			continue;
+ 
+ 		/* Skip if port is unused */
+@@ -1832,6 +1995,23 @@ main(int argc, char **argv)
+ 			metrics_display(i);
+ #endif
+ 
++		else if (qdma_csr_info)
++			rte_pmd_qdma_dbg_regdump(i);
++		else if (qdma_device)
++			rte_pmd_qdma_dbg_qdevice(i);
++		else if (qdma_qstats)
++			rte_pmd_qdma_qstats(i, qid);
++                else if (qdma_qstats_clear)
++                        rte_pmd_qdma_qstats_clear(i, qid);
++		else if (queue_info)
++			rte_pmd_qdma_dbg_qinfo(i, qid);
++		else if (reg_info)
++			rte_pmd_qdma_dbg_reg_info_dump(i,
++				1, reg_addr);
++		else if (enable_desc_dump)
++			rte_pmd_qdma_dbg_qdesc(i, qid,
++				start, end, desc_type);
++
+ 		if (enable_shw_rx_desc_dump)
+ 			nic_rx_descriptor_display(i, &rx_desc_param);
+ 		if (enable_shw_tx_desc_dump)
+@@ -1870,9 +2050,6 @@ main(int argc, char **argv)
+ 	if (enable_shw_module_eeprom)
+ 		show_module_eeprom_info();
+ 
+-	RTE_ETH_FOREACH_DEV(i)
+-		rte_eth_dev_close(i);
+-
+ 	ret = rte_eal_cleanup();
+ 	if (ret)
+ 		printf("Error from rte_eal_cleanup(), %d\n", ret);
+diff --git a/app/proc-info/meson.build b/app/proc-info/meson.build
+old mode 100644
+new mode 100755
+index 1563ce6..340b9ab
+--- a/app/proc-info/meson.build
++++ b/app/proc-info/meson.build
+@@ -8,7 +8,7 @@ if is_windows
+ endif
+ 
+ sources = files('main.c')
+-deps += ['ethdev', 'security']
++deps += ['ethdev', 'metrics', 'security', 'net_qdma']
+ if dpdk_conf.has('RTE_LIB_METRICS')
+     deps += 'metrics'
+ endif
+diff --git a/app/test-pmd/config.c b/app/test-pmd/config.c
+old mode 100644
+new mode 100755
+index acccb6b..6d51d62
+--- a/app/test-pmd/config.c
++++ b/app/test-pmd/config.c
+@@ -58,6 +58,8 @@
+ #include "testpmd.h"
+ #include "cmdline_mtr.h"
+ 
++#include <ethdev_driver.h>
++
+ #define ETHDEV_FWVERS_LEN 32
+ 
+ #ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */
+-- 
+2.25.1
+
-- 
GitLab