Patchwork [dpdk-dev,v2,16/20] net/ice: support basic RX/TX

login
register
mail settings
Submitter Wenzhuo Lu
Date Dec. 3, 2018, 7:06 a.m.
Message ID <1543820821-108122-17-git-send-email-wenzhuo.lu@intel.com>
Download mbox | patch
Permalink /patch/670213/
State New
Headers show

Comments

Wenzhuo Lu - Dec. 3, 2018, 7:06 a.m.
Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
Signed-off-by: Qiming Yang <qiming.yang@intel.com>
Signed-off-by: Xiaoyun Li <xiaoyun.li@intel.com>
Signed-off-by: Jingjing Wu <jingjing.wu@intel.com>
---
 drivers/net/ice/ice_ethdev.c   |  17 ++
 drivers/net/ice/ice_lan_rxtx.c | 568 ++++++++++++++++++++++++++++++++++++++++-
 drivers/net/ice/ice_rxtx.h     |   8 +
 3 files changed, 591 insertions(+), 2 deletions(-)
Varghese, Vipin - Dec. 4, 2018, 5:42 a.m.
snipped
> +uint16_t
> +ice_recv_pkts(void *rx_queue,
> +	      struct rte_mbuf **rx_pkts,
> +	      uint16_t nb_pkts)
> +{
> +	struct ice_rx_queue *rxq = rx_queue;
> +	volatile union ice_rx_desc *rx_ring = rxq->rx_ring;
> +	volatile union ice_rx_desc *rxdp;
> +	union ice_rx_desc rxd;
> +	struct ice_rx_entry *sw_ring = rxq->sw_ring;
> +	struct ice_rx_entry *rxe;
> +	struct rte_mbuf *nmb; /* new allocated mbuf */
> +	struct rte_mbuf *rxm; /* pointer to store old mbuf in SW ring */
> +	uint16_t rx_id = rxq->rx_tail;
> +	uint16_t nb_rx = 0;
> +	uint16_t nb_hold = 0;
> +	uint16_t rx_packet_len;
> +	uint32_t rx_status;
> +	uint64_t qword1;
> +	uint64_t dma_addr;
> +	uint64_t pkt_flags = 0;
> +	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
> +	struct rte_eth_dev *dev;
> +
> +	while (nb_rx < nb_pkts) {
> +		rxdp = &rx_ring[rx_id];
> +		qword1 = rte_le_to_cpu_64(rxdp-
> >wb.qword1.status_error_len);
> +		rx_status = (qword1 & ICE_RXD_QW1_STATUS_M) >>
> +			    ICE_RXD_QW1_STATUS_S;
> +
> +		/* Check the DD bit first */
> +		if (!(rx_status & (1 << ICE_RX_DESC_STATUS_DD_S)))
> +			break;
> +
> +		/* allocate mbuf */
> +		nmb = rte_mbuf_raw_alloc(rxq->mp);
> +		if (unlikely(!nmb)) {
> +			dev = ICE_VSI_TO_ETH_DEV(rxq->vsi);
> +			dev->data->rx_mbuf_alloc_failed++;
> +			break;
> +		}

Should we check if the received packet length is greater than mbug pkt_len then we need bulk alloc with n_segs?

> +		rxd = *rxdp; /* copy descriptor in ring to temp variable*/
> +
> +		nb_hold++;
> +		rxe = &sw_ring[rx_id]; /* get corresponding mbuf in SW ring */
> +		rx_id++;
> +		if (unlikely(rx_id == rxq->nb_rx_desc))
> +			rx_id = 0;
> +		rxm = rxe->mbuf;
> +		rxe->mbuf = nmb;
> +		dma_addr =
> +			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
> +
> +		/**
> +		 * fill the read format of descriptor with physic address in
> +		 * new allocated mbuf: nmb
> +		 */
> +		rxdp->read.hdr_addr = 0;
> +		rxdp->read.pkt_addr = dma_addr;
> +
> +		/* calculate rx_packet_len of the received pkt */
> +		rx_packet_len = ((qword1 & ICE_RXD_QW1_LEN_PBUF_M) >>
> +				ICE_RXD_QW1_LEN_PBUF_S) - rxq->crc_len;
> +
> +		/* fill old mbuf with received descriptor: rxd */
> +		rxm->data_off = RTE_PKTMBUF_HEADROOM;
> +		rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr,
> RTE_PKTMBUF_HEADROOM));
> +		rxm->nb_segs = 1;

Same comment for above for multi segment alloc for larger packets or smaller pkt_len in mempool?

Snipped
Varghese, Vipin - Dec. 4, 2018, 5:44 a.m.
May this logic is just to allocate the initial descriptor and not actual, if yes please ignore my comments.

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Varghese, Vipin
> Sent: Tuesday, December 4, 2018 11:12 AM
> To: Lu, Wenzhuo <wenzhuo.lu@intel.com>; dev@dpdk.org
> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; Yang, Qiming
> <qiming.yang@intel.com>; Li, Xiaoyun <xiaoyun.li@intel.com>; Wu, Jingjing
> <jingjing.wu@intel.com>
> Subject: Re: [dpdk-dev] [PATCH v2 16/20] net/ice: support basic RX/TX
> 
> snipped
> > +uint16_t
> > +ice_recv_pkts(void *rx_queue,
> > +	      struct rte_mbuf **rx_pkts,
> > +	      uint16_t nb_pkts)
> > +{
> > +	struct ice_rx_queue *rxq = rx_queue;
> > +	volatile union ice_rx_desc *rx_ring = rxq->rx_ring;
> > +	volatile union ice_rx_desc *rxdp;
> > +	union ice_rx_desc rxd;
> > +	struct ice_rx_entry *sw_ring = rxq->sw_ring;
> > +	struct ice_rx_entry *rxe;
> > +	struct rte_mbuf *nmb; /* new allocated mbuf */
> > +	struct rte_mbuf *rxm; /* pointer to store old mbuf in SW ring */
> > +	uint16_t rx_id = rxq->rx_tail;
> > +	uint16_t nb_rx = 0;
> > +	uint16_t nb_hold = 0;
> > +	uint16_t rx_packet_len;
> > +	uint32_t rx_status;
> > +	uint64_t qword1;
> > +	uint64_t dma_addr;
> > +	uint64_t pkt_flags = 0;
> > +	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
> > +	struct rte_eth_dev *dev;
> > +
> > +	while (nb_rx < nb_pkts) {
> > +		rxdp = &rx_ring[rx_id];
> > +		qword1 = rte_le_to_cpu_64(rxdp-
> > >wb.qword1.status_error_len);
> > +		rx_status = (qword1 & ICE_RXD_QW1_STATUS_M) >>
> > +			    ICE_RXD_QW1_STATUS_S;
> > +
> > +		/* Check the DD bit first */
> > +		if (!(rx_status & (1 << ICE_RX_DESC_STATUS_DD_S)))
> > +			break;
> > +
> > +		/* allocate mbuf */
> > +		nmb = rte_mbuf_raw_alloc(rxq->mp);
> > +		if (unlikely(!nmb)) {
> > +			dev = ICE_VSI_TO_ETH_DEV(rxq->vsi);
> > +			dev->data->rx_mbuf_alloc_failed++;
> > +			break;
> > +		}
> 
> Should we check if the received packet length is greater than mbug pkt_len then
> we need bulk alloc with n_segs?
> 
> > +		rxd = *rxdp; /* copy descriptor in ring to temp variable*/
> > +
> > +		nb_hold++;
> > +		rxe = &sw_ring[rx_id]; /* get corresponding mbuf in SW ring */
> > +		rx_id++;
> > +		if (unlikely(rx_id == rxq->nb_rx_desc))
> > +			rx_id = 0;
> > +		rxm = rxe->mbuf;
> > +		rxe->mbuf = nmb;
> > +		dma_addr =
> > +			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
> > +
> > +		/**
> > +		 * fill the read format of descriptor with physic address in
> > +		 * new allocated mbuf: nmb
> > +		 */
> > +		rxdp->read.hdr_addr = 0;
> > +		rxdp->read.pkt_addr = dma_addr;
> > +
> > +		/* calculate rx_packet_len of the received pkt */
> > +		rx_packet_len = ((qword1 & ICE_RXD_QW1_LEN_PBUF_M) >>
> > +				ICE_RXD_QW1_LEN_PBUF_S) - rxq->crc_len;
> > +
> > +		/* fill old mbuf with received descriptor: rxd */
> > +		rxm->data_off = RTE_PKTMBUF_HEADROOM;
> > +		rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr,
> > RTE_PKTMBUF_HEADROOM));
> > +		rxm->nb_segs = 1;
> 
> Same comment for above for multi segment alloc for larger packets or smaller
> pkt_len in mempool?
> 
> Snipped
Wenzhuo Lu - Dec. 6, 2018, 5:39 a.m.
Hi Vipin,


> -----Original Message-----
> From: Varghese, Vipin
> Sent: Tuesday, December 4, 2018 1:42 PM
> To: Lu, Wenzhuo <wenzhuo.lu@intel.com>; dev@dpdk.org
> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; Yang, Qiming
> <qiming.yang@intel.com>; Li, Xiaoyun <xiaoyun.li@intel.com>; Wu, Jingjing
> <jingjing.wu@intel.com>
> Subject: RE: [dpdk-dev] [PATCH v2 16/20] net/ice: support basic RX/TX
> 
> snipped
> > +uint16_t
> > +ice_recv_pkts(void *rx_queue,
> > +	      struct rte_mbuf **rx_pkts,
> > +	      uint16_t nb_pkts)
> > +{
> > +	struct ice_rx_queue *rxq = rx_queue;
> > +	volatile union ice_rx_desc *rx_ring = rxq->rx_ring;
> > +	volatile union ice_rx_desc *rxdp;
> > +	union ice_rx_desc rxd;
> > +	struct ice_rx_entry *sw_ring = rxq->sw_ring;
> > +	struct ice_rx_entry *rxe;
> > +	struct rte_mbuf *nmb; /* new allocated mbuf */
> > +	struct rte_mbuf *rxm; /* pointer to store old mbuf in SW ring */
> > +	uint16_t rx_id = rxq->rx_tail;
> > +	uint16_t nb_rx = 0;
> > +	uint16_t nb_hold = 0;
> > +	uint16_t rx_packet_len;
> > +	uint32_t rx_status;
> > +	uint64_t qword1;
> > +	uint64_t dma_addr;
> > +	uint64_t pkt_flags = 0;
> > +	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
> > +	struct rte_eth_dev *dev;
> > +
> > +	while (nb_rx < nb_pkts) {
> > +		rxdp = &rx_ring[rx_id];
> > +		qword1 = rte_le_to_cpu_64(rxdp-
> > >wb.qword1.status_error_len);
> > +		rx_status = (qword1 & ICE_RXD_QW1_STATUS_M) >>
> > +			    ICE_RXD_QW1_STATUS_S;
> > +
> > +		/* Check the DD bit first */
> > +		if (!(rx_status & (1 << ICE_RX_DESC_STATUS_DD_S)))
> > +			break;
> > +
> > +		/* allocate mbuf */
> > +		nmb = rte_mbuf_raw_alloc(rxq->mp);
> > +		if (unlikely(!nmb)) {
> > +			dev = ICE_VSI_TO_ETH_DEV(rxq->vsi);
> > +			dev->data->rx_mbuf_alloc_failed++;
> > +			break;
> > +		}
> 
> Should we check if the received packet length is greater than mbug pkt_len
> then we need bulk alloc with n_segs?
We cannot do it here. It's fast path. It hurts performance badly. So we do the check before and choose the right RX function.
Normally by default the n_segs is supported.

> 
> > +		rxd = *rxdp; /* copy descriptor in ring to temp variable*/
> > +
> > +		nb_hold++;
> > +		rxe = &sw_ring[rx_id]; /* get corresponding mbuf in SW ring
> */
> > +		rx_id++;
> > +		if (unlikely(rx_id == rxq->nb_rx_desc))
> > +			rx_id = 0;
> > +		rxm = rxe->mbuf;
> > +		rxe->mbuf = nmb;
> > +		dma_addr =
> > +
> 	rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
> > +
> > +		/**
> > +		 * fill the read format of descriptor with physic address in
> > +		 * new allocated mbuf: nmb
> > +		 */
> > +		rxdp->read.hdr_addr = 0;
> > +		rxdp->read.pkt_addr = dma_addr;
> > +
> > +		/* calculate rx_packet_len of the received pkt */
> > +		rx_packet_len = ((qword1 & ICE_RXD_QW1_LEN_PBUF_M) >>
> > +				ICE_RXD_QW1_LEN_PBUF_S) - rxq->crc_len;
> > +
> > +		/* fill old mbuf with received descriptor: rxd */
> > +		rxm->data_off = RTE_PKTMBUF_HEADROOM;
> > +		rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr,
> > RTE_PKTMBUF_HEADROOM));
> > +		rxm->nb_segs = 1;
> 
> Same comment for above for multi segment alloc for larger packets or
> smaller pkt_len in mempool?
> 
> Snipped
Varghese, Vipin - Dec. 6, 2018, 5:55 a.m.
> >
> > snipped
> > > +uint16_t
> > > +ice_recv_pkts(void *rx_queue,
> > > +	      struct rte_mbuf **rx_pkts,
> > > +	      uint16_t nb_pkts)
> > > +{
> > > +	struct ice_rx_queue *rxq = rx_queue;
> > > +	volatile union ice_rx_desc *rx_ring = rxq->rx_ring;
> > > +	volatile union ice_rx_desc *rxdp;
> > > +	union ice_rx_desc rxd;
> > > +	struct ice_rx_entry *sw_ring = rxq->sw_ring;
> > > +	struct ice_rx_entry *rxe;
> > > +	struct rte_mbuf *nmb; /* new allocated mbuf */
> > > +	struct rte_mbuf *rxm; /* pointer to store old mbuf in SW ring */
> > > +	uint16_t rx_id = rxq->rx_tail;
> > > +	uint16_t nb_rx = 0;
> > > +	uint16_t nb_hold = 0;
> > > +	uint16_t rx_packet_len;
> > > +	uint32_t rx_status;
> > > +	uint64_t qword1;
> > > +	uint64_t dma_addr;
> > > +	uint64_t pkt_flags = 0;
> > > +	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
> > > +	struct rte_eth_dev *dev;
> > > +
> > > +	while (nb_rx < nb_pkts) {
> > > +		rxdp = &rx_ring[rx_id];
> > > +		qword1 = rte_le_to_cpu_64(rxdp-
> > > >wb.qword1.status_error_len);
> > > +		rx_status = (qword1 & ICE_RXD_QW1_STATUS_M) >>
> > > +			    ICE_RXD_QW1_STATUS_S;
> > > +
> > > +		/* Check the DD bit first */
> > > +		if (!(rx_status & (1 << ICE_RX_DESC_STATUS_DD_S)))
> > > +			break;
> > > +
> > > +		/* allocate mbuf */
> > > +		nmb = rte_mbuf_raw_alloc(rxq->mp);
> > > +		if (unlikely(!nmb)) {
> > > +			dev = ICE_VSI_TO_ETH_DEV(rxq->vsi);
> > > +			dev->data->rx_mbuf_alloc_failed++;
> > > +			break;
> > > +		}
> >
> > Should we check if the received packet length is greater than mbug
> > pkt_len then we need bulk alloc with n_segs?
> We cannot do it here. It's fast path. It hurts performance badly. So we do the
> check before and choose the right RX function.
> Normally by default the n_segs is supported.
Maybe I am not clear with this approach, lets assume packet of length 6000 bytes comes in. The mempool mbuf data size is 2000bytes, hence requires 3 segs for storing 6000bytes pkt_len where each data size is 2000bytes.

As per your update, as performance is affected for a 6000byte packet you will pick only 1 segment which 2000bytes and rest are discarded. Is this correct understanding?

Snipped.

Patch

diff --git a/drivers/net/ice/ice_ethdev.c b/drivers/net/ice/ice_ethdev.c
index 5fafcb4..b78a342 100644
--- a/drivers/net/ice/ice_ethdev.c
+++ b/drivers/net/ice/ice_ethdev.c
@@ -1267,7 +1267,22 @@  struct ice_xstats_name_off {
 	int ret;
 
 	dev->dev_ops = &ice_eth_dev_ops;
+	dev->rx_pkt_burst = ice_recv_pkts;
+	dev->tx_pkt_burst = ice_xmit_pkts;
+	dev->tx_pkt_prepare = ice_prep_pkts;
 
+	/* For secondary processes, we don't initialise any further as primary
+	 * has already done this work. Only check we don't need a different
+	 * RX function.
+	 */
+	if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
+		ice_set_rx_function(dev);
+		ice_set_tx_function(dev);
+		PMD_INIT_LOG(ERR,
+			     "Control plane functions not "
+			     "supported by secondary process.");
+		return 0;
+	}
 	ice_set_default_ptype_table(dev);
 	pci_dev = RTE_DEV_TO_PCI(dev->device);
 	intr_handle = &pci_dev->intr_handle;
@@ -1733,6 +1748,8 @@  static int ice_init_rss(struct ice_pf *pf)
 		goto rx_err;
 	}
 
+	ice_set_rx_function(dev);
+
 	/* enable Rx interrput and mapping Rx queue to interrupt vector */
 	if (ice_rxq_intr_setup(dev))
 		return -EIO;
diff --git a/drivers/net/ice/ice_lan_rxtx.c b/drivers/net/ice/ice_lan_rxtx.c
index e0c5d4b..5aa0ab6 100644
--- a/drivers/net/ice/ice_lan_rxtx.c
+++ b/drivers/net/ice/ice_lan_rxtx.c
@@ -900,8 +900,81 @@ 
 	rte_free(q);
 }
 
+/* Translate the rx descriptor status to pkt flags */
+static inline uint64_t
+ice_rxd_status_to_pkt_flags(uint64_t qword)
+{
+	uint64_t flags;
+
+	/* Check if RSS_HASH */
+	flags = (((qword >> ICE_RX_DESC_STATUS_FLTSTAT_S) &
+		  ICE_RX_DESC_FLTSTAT_RSS_HASH) ==
+		 ICE_RX_DESC_FLTSTAT_RSS_HASH) ? PKT_RX_RSS_HASH : 0;
+
+	return flags;
+}
+
+/* Rx L3/L4 checksum */
+static inline uint64_t
+ice_rxd_error_to_pkt_flags(uint64_t qword)
+{
+	uint64_t flags = 0;
+	uint64_t error_bits = (qword >> ICE_RXD_QW1_ERROR_S);
+
+	if (likely((error_bits & ICE_RX_ERR_BITS) == 0)) {
+		flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD);
+		return flags;
+	}
+
+	if (unlikely(error_bits & (1 << ICE_RX_DESC_ERROR_IPE_S)))
+		flags |= PKT_RX_IP_CKSUM_BAD;
+	else
+		flags |= PKT_RX_IP_CKSUM_GOOD;
+
+	if (unlikely(error_bits & (1 << ICE_RX_DESC_ERROR_L4E_S)))
+		flags |= PKT_RX_L4_CKSUM_BAD;
+	else
+		flags |= PKT_RX_L4_CKSUM_GOOD;
+
+	if (unlikely(error_bits & (1 << ICE_RX_DESC_ERROR_EIPE_S)))
+		flags |= PKT_RX_EIP_CKSUM_BAD;
+
+	return flags;
+}
+
+static inline void
+ice_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union ice_rx_desc *rxdp)
+{
+	if (rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) &
+	    (1 << ICE_RX_DESC_STATUS_L2TAG1P_S)) {
+		mb->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED;
+		mb->vlan_tci =
+			rte_le_to_cpu_16(rxdp->wb.qword0.lo_dword.l2tag1);
+		PMD_RX_LOG(DEBUG, "Descriptor l2tag1: %u",
+			   rte_le_to_cpu_16(rxdp->wb.qword0.lo_dword.l2tag1));
+	} else {
+		mb->vlan_tci = 0;
+	}
+
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+	if (rte_le_to_cpu_16(rxdp->wb.qword2.ext_status) &
+	    (1 << ICE_RX_DESC_EXT_STATUS_L2TAG2P_S)) {
+		mb->ol_flags |= PKT_RX_QINQ_STRIPPED | PKT_RX_QINQ |
+				PKT_RX_VLAN_STRIPPED | PKT_RX_VLAN;
+		mb->vlan_tci_outer = mb->vlan_tci;
+		mb->vlan_tci = rte_le_to_cpu_16(rxdp->wb.qword2.l2tag2_2);
+		PMD_RX_LOG(DEBUG, "Descriptor l2tag2_1: %u, l2tag2_2: %u",
+			   rte_le_to_cpu_16(rxdp->wb.qword2.l2tag2_1),
+			   rte_le_to_cpu_16(rxdp->wb.qword2.l2tag2_2));
+	} else {
+		mb->vlan_tci_outer = 0;
+	}
+#endif
+	PMD_RX_LOG(DEBUG, "Mbuf vlan_tci: %u, vlan_tci_outer: %u",
+		   mb->vlan_tci, mb->vlan_tci_outer);
+}
 const uint32_t *
-ice_dev_supported_ptypes_get(struct rte_eth_dev *dev __rte_unused)
+ice_dev_supported_ptypes_get(struct rte_eth_dev *dev)
 {
 	static const uint32_t ptypes[] = {
 		/* refers to ice_get_default_pkt_type() */
@@ -933,7 +1006,9 @@ 
 		RTE_PTYPE_UNKNOWN
 	};
 
-	return ptypes;
+	if (dev->rx_pkt_burst == ice_recv_pkts)
+		return ptypes;
+	return NULL;
 }
 
 void
@@ -1044,6 +1119,495 @@ 
 	dev->data->nb_tx_queues = 0;
 }
 
+uint16_t
+ice_recv_pkts(void *rx_queue,
+	      struct rte_mbuf **rx_pkts,
+	      uint16_t nb_pkts)
+{
+	struct ice_rx_queue *rxq = rx_queue;
+	volatile union ice_rx_desc *rx_ring = rxq->rx_ring;
+	volatile union ice_rx_desc *rxdp;
+	union ice_rx_desc rxd;
+	struct ice_rx_entry *sw_ring = rxq->sw_ring;
+	struct ice_rx_entry *rxe;
+	struct rte_mbuf *nmb; /* new allocated mbuf */
+	struct rte_mbuf *rxm; /* pointer to store old mbuf in SW ring */
+	uint16_t rx_id = rxq->rx_tail;
+	uint16_t nb_rx = 0;
+	uint16_t nb_hold = 0;
+	uint16_t rx_packet_len;
+	uint32_t rx_status;
+	uint64_t qword1;
+	uint64_t dma_addr;
+	uint64_t pkt_flags = 0;
+	uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl;
+	struct rte_eth_dev *dev;
+
+	while (nb_rx < nb_pkts) {
+		rxdp = &rx_ring[rx_id];
+		qword1 = rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len);
+		rx_status = (qword1 & ICE_RXD_QW1_STATUS_M) >>
+			    ICE_RXD_QW1_STATUS_S;
+
+		/* Check the DD bit first */
+		if (!(rx_status & (1 << ICE_RX_DESC_STATUS_DD_S)))
+			break;
+
+		/* allocate mbuf */
+		nmb = rte_mbuf_raw_alloc(rxq->mp);
+		if (unlikely(!nmb)) {
+			dev = ICE_VSI_TO_ETH_DEV(rxq->vsi);
+			dev->data->rx_mbuf_alloc_failed++;
+			break;
+		}
+		rxd = *rxdp; /* copy descriptor in ring to temp variable*/
+
+		nb_hold++;
+		rxe = &sw_ring[rx_id]; /* get corresponding mbuf in SW ring */
+		rx_id++;
+		if (unlikely(rx_id == rxq->nb_rx_desc))
+			rx_id = 0;
+		rxm = rxe->mbuf;
+		rxe->mbuf = nmb;
+		dma_addr =
+			rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb));
+
+		/**
+		 * fill the read format of descriptor with physic address in
+		 * new allocated mbuf: nmb
+		 */
+		rxdp->read.hdr_addr = 0;
+		rxdp->read.pkt_addr = dma_addr;
+
+		/* calculate rx_packet_len of the received pkt */
+		rx_packet_len = ((qword1 & ICE_RXD_QW1_LEN_PBUF_M) >>
+				ICE_RXD_QW1_LEN_PBUF_S) - rxq->crc_len;
+
+		/* fill old mbuf with received descriptor: rxd */
+		rxm->data_off = RTE_PKTMBUF_HEADROOM;
+		rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM));
+		rxm->nb_segs = 1;
+		rxm->next = NULL;
+		rxm->pkt_len = rx_packet_len;
+		rxm->data_len = rx_packet_len;
+		rxm->port = rxq->port_id;
+		ice_rxd_to_vlan_tci(rxm, rxdp);
+		rxm->packet_type = ptype_tbl[(uint8_t)((qword1 &
+							ICE_RXD_QW1_PTYPE_M) >>
+						       ICE_RXD_QW1_PTYPE_S)];
+		pkt_flags = ice_rxd_status_to_pkt_flags(qword1);
+		pkt_flags |= ice_rxd_error_to_pkt_flags(qword1);
+		if (pkt_flags & PKT_RX_RSS_HASH)
+			rxm->hash.rss =
+				rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss);
+		rxm->ol_flags |= pkt_flags;
+		/* copy old mbuf to rx_pkts */
+		rx_pkts[nb_rx++] = rxm;
+	}
+	rxq->rx_tail = rx_id;
+	/**
+	 * If the number of free RX descriptors is greater than the RX free
+	 * threshold of the queue, advance the receive tail register of queue.
+	 * Update that register with the value of the last processed RX
+	 * descriptor minus 1.
+	 */
+	nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold);
+	if (nb_hold > rxq->rx_free_thresh) {
+		rx_id = (uint16_t)(rx_id == 0 ?
+				   (rxq->nb_rx_desc - 1) : (rx_id - 1));
+		/* write TAIL register */
+		ICE_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+		nb_hold = 0;
+	}
+	rxq->nb_rx_hold = nb_hold;
+
+	/* return received packet in the burst */
+	return nb_rx;
+}
+
+static inline void
+ice_txd_enable_checksum(uint64_t ol_flags,
+			uint32_t *td_cmd,
+			uint32_t *td_offset,
+			union ice_tx_offload tx_offload)
+{
+	/* L2 length must be set. */
+	*td_offset |= (tx_offload.l2_len >> 1) <<
+		      ICE_TX_DESC_LEN_MACLEN_S;
+
+	/* Enable L3 checksum offloads */
+	if (ol_flags & PKT_TX_IP_CKSUM) {
+		*td_cmd |= ICE_TX_DESC_CMD_IIPT_IPV4_CSUM;
+		*td_offset |= (tx_offload.l3_len >> 2) <<
+			      ICE_TX_DESC_LEN_IPLEN_S;
+	} else if (ol_flags & PKT_TX_IPV4) {
+		*td_cmd |= ICE_TX_DESC_CMD_IIPT_IPV4;
+		*td_offset |= (tx_offload.l3_len >> 2) <<
+			      ICE_TX_DESC_LEN_IPLEN_S;
+	} else if (ol_flags & PKT_TX_IPV6) {
+		*td_cmd |= ICE_TX_DESC_CMD_IIPT_IPV6;
+		*td_offset |= (tx_offload.l3_len >> 2) <<
+			      ICE_TX_DESC_LEN_IPLEN_S;
+	}
+
+	if (ol_flags & PKT_TX_TCP_SEG) {
+		*td_cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP;
+		*td_offset |= (tx_offload.l4_len >> 2) <<
+			      ICE_TX_DESC_LEN_L4_LEN_S;
+		return;
+	}
+
+	/* Enable L4 checksum offloads */
+	switch (ol_flags & PKT_TX_L4_MASK) {
+	case PKT_TX_TCP_CKSUM:
+		*td_cmd |= ICE_TX_DESC_CMD_L4T_EOFT_TCP;
+		*td_offset |= (sizeof(struct tcp_hdr) >> 2) <<
+			      ICE_TX_DESC_LEN_L4_LEN_S;
+		break;
+	case PKT_TX_SCTP_CKSUM:
+		*td_cmd |= ICE_TX_DESC_CMD_L4T_EOFT_SCTP;
+		*td_offset |= (sizeof(struct sctp_hdr) >> 2) <<
+			      ICE_TX_DESC_LEN_L4_LEN_S;
+		break;
+	case PKT_TX_UDP_CKSUM:
+		*td_cmd |= ICE_TX_DESC_CMD_L4T_EOFT_UDP;
+		*td_offset |= (sizeof(struct udp_hdr) >> 2) <<
+			      ICE_TX_DESC_LEN_L4_LEN_S;
+		break;
+	default:
+		break;
+	}
+}
+
+static inline int
+ice_xmit_cleanup(struct ice_tx_queue *txq)
+{
+	struct ice_tx_entry *sw_ring = txq->sw_ring;
+	volatile struct ice_tx_desc *txd = txq->tx_ring;
+	uint16_t last_desc_cleaned = txq->last_desc_cleaned;
+	uint16_t nb_tx_desc = txq->nb_tx_desc;
+	uint16_t desc_to_clean_to;
+	uint16_t nb_tx_to_clean;
+
+	/* Determine the last descriptor needing to be cleaned */
+	desc_to_clean_to = (uint16_t)(last_desc_cleaned + txq->tx_rs_thresh);
+	if (desc_to_clean_to >= nb_tx_desc)
+		desc_to_clean_to = (uint16_t)(desc_to_clean_to - nb_tx_desc);
+
+	/* Check to make sure the last descriptor to clean is done */
+	desc_to_clean_to = sw_ring[desc_to_clean_to].last_id;
+	if (!(txd[desc_to_clean_to].cmd_type_offset_bsz &
+	    rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DESC_DONE))) {
+		PMD_TX_FREE_LOG(DEBUG, "TX descriptor %4u is not done "
+				"(port=%d queue=%d) value=0x%lx\n",
+				desc_to_clean_to,
+				txq->port_id, txq->queue_id,
+				txd[desc_to_clean_to].cmd_type_offset_bsz);
+		/* Failed to clean any descriptors */
+		return -1;
+	}
+
+	/* Figure out how many descriptors will be cleaned */
+	if (last_desc_cleaned > desc_to_clean_to)
+		nb_tx_to_clean = (uint16_t)((nb_tx_desc - last_desc_cleaned) +
+					    desc_to_clean_to);
+	else
+		nb_tx_to_clean = (uint16_t)(desc_to_clean_to -
+					    last_desc_cleaned);
+
+	/* The last descriptor to clean is done, so that means all the
+	 * descriptors from the last descriptor that was cleaned
+	 * up to the last descriptor with the RS bit set
+	 * are done. Only reset the threshold descriptor.
+	 */
+	txd[desc_to_clean_to].cmd_type_offset_bsz = 0;
+
+	/* Update the txq to reflect the last descriptor that was cleaned */
+	txq->last_desc_cleaned = desc_to_clean_to;
+	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + nb_tx_to_clean);
+
+	return 0;
+}
+
+/* Check if the context descriptor is needed for TX offloading */
+static inline uint16_t
+ice_calc_context_desc(uint64_t flags)
+{
+	static uint64_t mask = PKT_TX_TCP_SEG | PKT_TX_QINQ_PKT;
+
+	return (flags & mask) ? 1 : 0;
+}
+
+/* set ice TSO context descriptor */
+static inline uint64_t
+ice_set_tso_ctx(struct rte_mbuf *mbuf, union ice_tx_offload tx_offload)
+{
+	uint64_t ctx_desc = 0;
+	uint32_t cd_cmd, hdr_len, cd_tso_len;
+
+	if (!tx_offload.l4_len) {
+		PMD_TX_LOG(DEBUG, "L4 length set to 0");
+		return ctx_desc;
+	}
+
+	/**
+	 * in case of non tunneling packet, the outer_l2_len and
+	 * outer_l3_len must be 0.
+	 */
+	hdr_len = tx_offload.outer_l2_len +
+		  tx_offload.outer_l3_len +
+		  tx_offload.l2_len +
+		  tx_offload.l3_len +
+		  tx_offload.l4_len;
+
+	cd_cmd = ICE_TX_CTX_DESC_TSO;
+	cd_tso_len = mbuf->pkt_len - hdr_len;
+	ctx_desc |= ((uint64_t)cd_cmd << ICE_TXD_CTX_QW1_CMD_S) |
+		    ((uint64_t)cd_tso_len << ICE_TXD_CTX_QW1_TSO_LEN_S) |
+		    ((uint64_t)mbuf->tso_segsz << ICE_TXD_CTX_QW1_MSS_S);
+
+	return ctx_desc;
+}
+
+uint16_t
+ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	struct ice_tx_queue *txq;
+	volatile struct ice_tx_desc *tx_ring;
+	volatile struct ice_tx_desc *txd;
+	struct ice_tx_entry *sw_ring;
+	struct ice_tx_entry *txe, *txn;
+	struct rte_mbuf *tx_pkt;
+	struct rte_mbuf *m_seg;
+	uint16_t tx_id;
+	uint16_t nb_tx;
+	uint16_t nb_used;
+	uint16_t nb_ctx;
+	uint32_t td_cmd = 0;
+	uint32_t td_offset = 0;
+	uint32_t td_tag = 0;
+	uint16_t tx_last;
+	uint64_t buf_dma_addr;
+	uint64_t ol_flags;
+	union ice_tx_offload tx_offload = {0};
+
+	txq = tx_queue;
+	sw_ring = txq->sw_ring;
+	tx_ring = txq->tx_ring;
+	tx_id = txq->tx_tail;
+	txe = &sw_ring[tx_id];
+
+	/* Check if the descriptor ring needs to be cleaned. */
+	if (txq->nb_tx_free < txq->tx_free_thresh)
+		ice_xmit_cleanup(txq);
+
+	for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) {
+		tx_pkt = *tx_pkts++;
+
+		td_cmd = 0;
+		ol_flags = tx_pkt->ol_flags;
+		tx_offload.l2_len = tx_pkt->l2_len;
+		tx_offload.l3_len = tx_pkt->l3_len;
+		tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
+		tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
+		tx_offload.l4_len = tx_pkt->l4_len;
+		tx_offload.tso_segsz = tx_pkt->tso_segsz;
+		/* Calculate the number of context descriptors needed. */
+		nb_ctx = ice_calc_context_desc(ol_flags);
+
+		/* The number of descriptors that must be allocated for
+		 * a packet equals to the number of the segments of that
+		 * packet plus the number of context descriptor if needed.
+		 */
+		nb_used = (uint16_t)(tx_pkt->nb_segs + nb_ctx);
+		tx_last = (uint16_t)(tx_id + nb_used - 1);
+
+		/* Circular ring */
+		if (tx_last >= txq->nb_tx_desc)
+			tx_last = (uint16_t)(tx_last - txq->nb_tx_desc);
+
+		if (nb_used > txq->nb_tx_free) {
+			if (ice_xmit_cleanup(txq) != 0) {
+				if (nb_tx == 0)
+					return 0;
+				goto end_of_tx;
+			}
+			if (unlikely(nb_used > txq->tx_rs_thresh)) {
+				while (nb_used > txq->nb_tx_free) {
+					if (ice_xmit_cleanup(txq) != 0) {
+						if (nb_tx == 0)
+							return 0;
+						goto end_of_tx;
+					}
+				}
+			}
+		}
+
+		/* Descriptor based VLAN insertion */
+		if (ol_flags & (PKT_TX_VLAN_PKT | PKT_TX_QINQ_PKT)) {
+			td_cmd |= ICE_TX_DESC_CMD_IL2TAG1;
+			td_tag = tx_pkt->vlan_tci;
+		}
+
+		/* Enable checksum offloading */
+		if (ol_flags & ICE_TX_CKSUM_OFFLOAD_MASK) {
+			ice_txd_enable_checksum(ol_flags, &td_cmd,
+						&td_offset, tx_offload);
+		}
+
+		if (nb_ctx) {
+			/* Setup TX context descriptor if required */
+			volatile struct ice_tx_ctx_desc *ctx_txd =
+				(volatile struct ice_tx_ctx_desc *)
+					&tx_ring[tx_id];
+			uint16_t cd_l2tag2 = 0;
+			uint64_t cd_type_cmd_tso_mss = ICE_TX_DESC_DTYPE_CTX;
+
+			txn = &sw_ring[txe->next_id];
+			RTE_MBUF_PREFETCH_TO_FREE(txn->mbuf);
+			if (txe->mbuf) {
+				rte_pktmbuf_free_seg(txe->mbuf);
+				txe->mbuf = NULL;
+			}
+
+			if (ol_flags & PKT_TX_TCP_SEG)
+				cd_type_cmd_tso_mss |=
+					ice_set_tso_ctx(tx_pkt, tx_offload);
+
+			/* TX context descriptor based double VLAN insert */
+			if (ol_flags & PKT_TX_QINQ_PKT) {
+				cd_l2tag2 = tx_pkt->vlan_tci_outer;
+				cd_type_cmd_tso_mss |=
+					((uint64_t)ICE_TX_CTX_DESC_IL2TAG2 <<
+					 ICE_TXD_CTX_QW1_CMD_S);
+			}
+			ctx_txd->l2tag2 = rte_cpu_to_le_16(cd_l2tag2);
+			ctx_txd->qw1 =
+				rte_cpu_to_le_64(cd_type_cmd_tso_mss);
+
+			txe->last_id = tx_last;
+			tx_id = txe->next_id;
+			txe = txn;
+		}
+		m_seg = tx_pkt;
+
+		do {
+			txd = &tx_ring[tx_id];
+			txn = &sw_ring[txe->next_id];
+
+			if (txe->mbuf)
+				rte_pktmbuf_free_seg(txe->mbuf);
+			txe->mbuf = m_seg;
+
+			/* Setup TX Descriptor */
+			buf_dma_addr = rte_mbuf_data_iova(m_seg);
+			txd->buf_addr = rte_cpu_to_le_64(buf_dma_addr);
+			txd->cmd_type_offset_bsz =
+				rte_cpu_to_le_64(ICE_TX_DESC_DTYPE_DATA |
+				((uint64_t)td_cmd  << ICE_TXD_QW1_CMD_S) |
+				((uint64_t)td_offset << ICE_TXD_QW1_OFFSET_S) |
+				((uint64_t)m_seg->data_len  <<
+				 ICE_TXD_QW1_TX_BUF_SZ_S) |
+				((uint64_t)td_tag  << ICE_TXD_QW1_L2TAG1_S));
+
+			txe->last_id = tx_last;
+			tx_id = txe->next_id;
+			txe = txn;
+			m_seg = m_seg->next;
+		} while (m_seg);
+
+		/* fill the last descriptor with End of Packet (EOP) bit */
+		td_cmd |= ICE_TX_DESC_CMD_EOP;
+		txq->nb_tx_used = (uint16_t)(txq->nb_tx_used + nb_used);
+		txq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_used);
+
+		/* set RS bit on the last descriptor of one packet */
+		if (txq->nb_tx_used >= txq->tx_rs_thresh) {
+			PMD_TX_FREE_LOG(DEBUG,
+					"Setting RS bit on TXD id="
+					"%4u (port=%d queue=%d)",
+					tx_last, txq->port_id, txq->queue_id);
+
+			td_cmd |= ICE_TX_DESC_CMD_RS;
+
+			/* Update txq RS bit counters */
+			txq->nb_tx_used = 0;
+		}
+		txd->cmd_type_offset_bsz |=
+			rte_cpu_to_le_64(((uint64_t)td_cmd) <<
+					 ICE_TXD_QW1_CMD_S);
+	}
+end_of_tx:
+	rte_wmb();
+
+	/* update Tail register */
+	ICE_PCI_REG_WRITE(txq->qtx_tail, tx_id);
+	txq->tx_tail = tx_id;
+
+	return nb_tx;
+}
+
+void __attribute__((cold))
+ice_set_rx_function(struct rte_eth_dev *dev)
+{
+	dev->rx_pkt_burst = ice_recv_pkts;
+}
+
+/*********************************************************************
+ *
+ *  TX prep functions
+ *
+ **********************************************************************/
+/* The default values of TSO MSS */
+#define ICE_MIN_TSO_MSS            64
+#define ICE_MAX_TSO_MSS            9728
+#define ICE_MAX_TSO_FRAME_SIZE     262144
+uint16_t
+ice_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+	      uint16_t nb_pkts)
+{
+	int i, ret;
+	uint64_t ol_flags;
+	struct rte_mbuf *m;
+
+	for (i = 0; i < nb_pkts; i++) {
+		m = tx_pkts[i];
+		ol_flags = m->ol_flags;
+
+		if (ol_flags & PKT_TX_TCP_SEG &&
+		    (m->tso_segsz < ICE_MIN_TSO_MSS ||
+		     m->tso_segsz > ICE_MAX_TSO_MSS ||
+		     m->pkt_len > ICE_MAX_TSO_FRAME_SIZE)) {
+			/**
+			 * MSS outside the range are considered malicious
+			 */
+			rte_errno = -EINVAL;
+			return i;
+		}
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+		ret = rte_validate_tx_offload(m);
+		if (ret != 0) {
+			rte_errno = ret;
+			return i;
+		}
+#endif
+		ret = rte_net_intel_cksum_prepare(m);
+		if (ret != 0) {
+			rte_errno = ret;
+			return i;
+		}
+	}
+	return i;
+}
+
+void __attribute__((cold))
+ice_set_tx_function(struct rte_eth_dev *dev)
+{
+		dev->tx_pkt_burst = ice_xmit_pkts;
+		dev->tx_pkt_prepare = ice_prep_pkts;
+}
+
 /* For each value it means, datasheet of hardware can tell more details
  *
  * @note: fix ice_dev_supported_ptypes_get() if any change here.
diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h
index bad2b89..e0218b3 100644
--- a/drivers/net/ice/ice_rxtx.h
+++ b/drivers/net/ice/ice_rxtx.h
@@ -134,6 +134,14 @@  int ice_tx_queue_setup(struct rte_eth_dev *dev,
 void ice_tx_queue_release(void *txq);
 void ice_clear_queues(struct rte_eth_dev *dev);
 void ice_free_queues(struct rte_eth_dev *dev);
+uint16_t ice_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+		       uint16_t nb_pkts);
+uint16_t ice_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
+		       uint16_t nb_pkts);
+void ice_set_rx_function(struct rte_eth_dev *dev);
+uint16_t ice_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
+		       uint16_t nb_pkts);
+void ice_set_tx_function(struct rte_eth_dev *dev);
 uint32_t ice_rx_queue_count(struct rte_eth_dev *dev, uint16_t rx_queue_id);
 void ice_set_default_ptype_table(struct rte_eth_dev *dev);
 const uint32_t *ice_dev_supported_ptypes_get(struct rte_eth_dev *dev);