diff mbox

[net-next,1/8] qed: LL2 to use packed information for tx

Message ID 20170608161323.7597-2-Yuval.Mintz@cavium.com
State Changes Requested, archived
Delegated to: David Miller
Headers show

Commit Message

Mintz, Yuval June 8, 2017, 4:13 p.m. UTC
First step in revising the LL2 interface, this declares
qed_ll2_tx_pkt_info as part of the ll2 interface, and uses it for
transmission instead of receiving lots of parameters.

Signed-off-by: Yuval Mintz <Yuval.Mintz@cavium.com>
---
 drivers/net/ethernet/qlogic/qed/qed_ll2.c  | 127 +++++++++++++----------------
 drivers/net/ethernet/qlogic/qed/qed_ll2.h  |  41 ++--------
 drivers/net/ethernet/qlogic/qed/qed_roce.c |  18 ++--
 include/linux/qed/qed_ll2_if.h             |  27 ++++++
 4 files changed, 102 insertions(+), 111 deletions(-)

Comments

David Miller June 8, 2017, 11:23 p.m. UTC | #1
From: Yuval Mintz <Yuval.Mintz@cavium.com>
Date: Thu, 8 Jun 2017 19:13:16 +0300

> @@ -67,6 +79,21 @@ struct qed_ll2_stats {
>  	u64 sent_bcast_pkts;
>  };
>  
> +struct qed_ll2_tx_pkt_info {
> +	u8 num_of_bds;
> +	u16 vlan;
> +	u8 bd_flags;
> +	u16 l4_hdr_offset_w;	/* from start of packet */
> +	enum qed_ll2_tx_dest tx_dest;
> +	enum qed_ll2_roce_flavor_type qed_roce_flavor;
> +	dma_addr_t first_frag;
> +	u16 first_frag_len;
> +	bool enable_ip_cksum;
> +	bool enable_l4_cksum;
> +	bool calc_ip_len;
> +	void *cookie;
> +};
> +

This layout is extremely inefficient, with lots of padding in between
struct members.

Group small u8 members and u16 members together so that they consume
full 32-bit areas so you can eliminate all of the padding.
Mintz, Yuval June 9, 2017, 7:08 a.m. UTC | #2
> > +struct qed_ll2_tx_pkt_info {
> > +	u8 num_of_bds;
> > +	u16 vlan;
> > +	u8 bd_flags;
> > +	u16 l4_hdr_offset_w;	/* from start of packet */
> > +	enum qed_ll2_tx_dest tx_dest;
> > +	enum qed_ll2_roce_flavor_type qed_roce_flavor;
> > +	dma_addr_t first_frag;
> > +	u16 first_frag_len;
> > +	bool enable_ip_cksum;
> > +	bool enable_l4_cksum;
> > +	bool calc_ip_len;
> > +	void *cookie;
> > +};
> > +
> 
> This layout is extremely inefficient, with lots of padding in between struct
> members.
> 
> Group small u8 members and u16 members together so that they consume
> full 32-bit areas so you can eliminate all of the padding.

While I can mend the holes, the total structure size doesn't really change:

struct qed_ll2_tx_pkt_info {
        u8                         num_of_bds;           /*     0     1 */

        /* XXX 1 byte hole, try to pack */

        u16                        vlan;                 /*     2     2 */
        u8                         bd_flags;             /*     4     1 */

        /* XXX 1 byte hole, try to pack */

        u16                        l4_hdr_offset_w;      /*     6     2 */
        enum qed_ll2_tx_dest       tx_dest;              /*     8     4 */
        enum qed_ll2_roce_flavor_type qed_roce_flavor;   /*    12     4 */
        dma_addr_t                 first_frag;           /*    16     8 */
        u16                        first_frag_len;       /*    24     2 */
        bool                       enable_ip_cksum;      /*    26     1 */
        bool                       enable_l4_cksum;      /*    27     1 */
        bool                       calc_ip_len;          /*    28     1 */

        /* XXX 3 bytes hole, try to pack */

        void *                     cookie;               /*    32     8 */

        /* size: 40, cachelines: 1, members: 12 */
        /* sum members: 35, holes: 3, sum holes: 5 */
        /* last cacheline: 40 bytes */
};

Becomes:

struct qed_ll2_tx_pkt_info {
        void *                     cookie;               /*     0     8 */
        dma_addr_t                 first_frag;           /*     8     8 */
        enum qed_ll2_tx_dest       tx_dest;              /*    16     4 */
        enum qed_ll2_roce_flavor_type qed_roce_flavor;   /*    20     4 */
        u16                        vlan;                 /*    24     2 */
        u16                        l4_hdr_offset_w;      /*    26     2 */
        u16                        first_frag_len;       /*    28     2 */
        u8                         num_of_bds;           /*    30     1 */
        u8                         bd_flags;             /*    31     1 */
        bool                       enable_ip_cksum;      /*    32     1 */
        bool                       enable_l4_cksum;      /*    33     1 */
        bool                       calc_ip_len;          /*    34     1 */

        /* size: 40, cachelines: 1, members: 12 */
        /* padding: 5 */
        /* last cacheline: 40 bytes */
};

I'm going to send the changed version in V2 as as there's no harm to it,
[+ we *can* reduce the size of qed_ll2_comp_rx_data you commented
for patch #2 in series]

But one thing I thought of asking - do we consider layouts of relatively
insignificant structures to be some golden coding standard?
David Laight June 9, 2017, 7:28 a.m. UTC | #3
From: David Miller
> Sent: 09 June 2017 00:24
> 
> From: Yuval Mintz <Yuval.Mintz@cavium.com>
> Date: Thu, 8 Jun 2017 19:13:16 +0300
> 
> > @@ -67,6 +79,21 @@ struct qed_ll2_stats {
> >  	u64 sent_bcast_pkts;
> >  };
> >
> > +struct qed_ll2_tx_pkt_info {
> > +	u8 num_of_bds;
> > +	u16 vlan;
> > +	u8 bd_flags;
> > +	u16 l4_hdr_offset_w;	/* from start of packet */
> > +	enum qed_ll2_tx_dest tx_dest;
> > +	enum qed_ll2_roce_flavor_type qed_roce_flavor;
> > +	dma_addr_t first_frag;
> > +	u16 first_frag_len;
> > +	bool enable_ip_cksum;
> > +	bool enable_l4_cksum;
> > +	bool calc_ip_len;
> > +	void *cookie;
> > +};
> > +
> 
> This layout is extremely inefficient, with lots of padding in between
> struct members.
> 
> Group small u8 members and u16 members together so that they consume
> full 32-bit areas so you can eliminate all of the padding.

I'd also query the use of u16 sizes/lengths, any arithmetic on u16 (and u8)
variables is likely to generate extra code (on non-x86).
You are using 32 bits for the 'enum' - I bet the values fit in 8 bits,
so aren't really worried about size.

If size did matter you can easily get the above down to 32 bytes.

	David
Mintz, Yuval June 9, 2017, 7:51 a.m. UTC | #4
> -----Original Message-----
> From: David Laight [mailto:David.Laight@ACULAB.COM]
> Sent: Friday, June 09, 2017 10:28 AM
> To: 'David Miller' <davem@davemloft.net>; Mintz, Yuval
> <Yuval.Mintz@cavium.com>
> Cc: netdev@vger.kernel.org; linux-rdma@vger.kernel.org; Kalderon, Michal
> <Michal.Kalderon@cavium.com>
> Subject: RE: [PATCH net-next 1/8] qed: LL2 to use packed information for tx
> 
> From: David Miller
> > Sent: 09 June 2017 00:24
> >
> > From: Yuval Mintz <Yuval.Mintz@cavium.com>
> > Date: Thu, 8 Jun 2017 19:13:16 +0300
> >
> > > @@ -67,6 +79,21 @@ struct qed_ll2_stats {
> > >  	u64 sent_bcast_pkts;
> > >  };
> > >
> > > +struct qed_ll2_tx_pkt_info {
> > > +	u8 num_of_bds;
> > > +	u16 vlan;
> > > +	u8 bd_flags;
> > > +	u16 l4_hdr_offset_w;	/* from start of packet */
> > > +	enum qed_ll2_tx_dest tx_dest;
> > > +	enum qed_ll2_roce_flavor_type qed_roce_flavor;
> > > +	dma_addr_t first_frag;
> > > +	u16 first_frag_len;
> > > +	bool enable_ip_cksum;
> > > +	bool enable_l4_cksum;
> > > +	bool calc_ip_len;
> > > +	void *cookie;
> > > +};
> > > +
> >
> > This layout is extremely inefficient, with lots of padding in between
> > struct members.
> >
> > Group small u8 members and u16 members together so that they consume
> > full 32-bit areas so you can eliminate all of the padding.
> 
> I'd also query the use of u16 sizes/lengths, any arithmetic on u16 (and u8)
> variables is likely to generate extra code (on non-x86).
> You are using 32 bits for the 'enum' - I bet the values fit in 8 bits, so aren't
> really worried about size.
> 
> If size did matter you can easily get the above down to 32 bytes.

You're right, and that's exactly the point -  since this is not data-path critical
I don't see why the size/efficiency should matter [greatly].
David Laight June 9, 2017, 8:23 a.m. UTC | #5
From: Mintz, Yuval
> Sent: 09 June 2017 08:52
> > From: David Laight [mailto:David.Laight@ACULAB.COM]
> > Sent: Friday, June 09, 2017 10:28 AM
> > To: 'David Miller' <davem@davemloft.net>; Mintz, Yuval
> > <Yuval.Mintz@cavium.com>
> > Cc: netdev@vger.kernel.org; linux-rdma@vger.kernel.org; Kalderon, Michal
> > <Michal.Kalderon@cavium.com>
> > Subject: RE: [PATCH net-next 1/8] qed: LL2 to use packed information for tx
> >
> > From: David Miller
> > > Sent: 09 June 2017 00:24
> > >
> > > From: Yuval Mintz <Yuval.Mintz@cavium.com>
> > > Date: Thu, 8 Jun 2017 19:13:16 +0300
> > >
> > > > @@ -67,6 +79,21 @@ struct qed_ll2_stats {
> > > >  	u64 sent_bcast_pkts;
> > > >  };
> > > >
> > > > +struct qed_ll2_tx_pkt_info {
> > > > +	u8 num_of_bds;
> > > > +	u16 vlan;
> > > > +	u8 bd_flags;
> > > > +	u16 l4_hdr_offset_w;	/* from start of packet */
> > > > +	enum qed_ll2_tx_dest tx_dest;
> > > > +	enum qed_ll2_roce_flavor_type qed_roce_flavor;
> > > > +	dma_addr_t first_frag;
> > > > +	u16 first_frag_len;
> > > > +	bool enable_ip_cksum;
> > > > +	bool enable_l4_cksum;
> > > > +	bool calc_ip_len;
> > > > +	void *cookie;
> > > > +};
> > > > +
> > >
> > > This layout is extremely inefficient, with lots of padding in between
> > > struct members.
> > >
> > > Group small u8 members and u16 members together so that they consume
> > > full 32-bit areas so you can eliminate all of the padding.
> >
> > I'd also query the use of u16 sizes/lengths, any arithmetic on u16 (and u8)
> > variables is likely to generate extra code (on non-x86).
> > You are using 32 bits for the 'enum' - I bet the values fit in 8 bits, so aren't
> > really worried about size.
> >
> > If size did matter you can easily get the above down to 32 bytes.
> 
> You're right, and that's exactly the point -  since this is not data-path critical
> I don't see why the size/efficiency should matter [greatly].

It is just good practise so that it happens automatically when it does matter.
Just swapping 'vlan' and 'bd_flags' would make it look much better.

	David
David Miller June 9, 2017, 1:59 p.m. UTC | #6
From: "Mintz, Yuval" <Yuval.Mintz@cavium.com>
Date: Fri, 9 Jun 2017 07:08:54 +0000

> But one thing I thought of asking - do we consider layouts of relatively
> insignificant structures to be some golden coding standard?

It just shows lack of thought when writing things.

I want to see code that shows the author put some care into it
and didn't just slap things together.

Thank you.
diff mbox

Patch

diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index f67ed6d..0b75f5d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -740,12 +740,13 @@  static void
 qed_ooo_submit_tx_buffers(struct qed_hwfn *p_hwfn,
 			  struct qed_ll2_info *p_ll2_conn)
 {
+	struct qed_ll2_tx_pkt_info tx_pkt;
 	struct qed_ooo_buffer *p_buffer;
-	int rc;
 	u16 l4_hdr_offset_w;
 	dma_addr_t first_frag;
 	u16 parse_flags;
 	u8 bd_flags;
+	int rc;
 
 	/* Submit Tx buffers here */
 	while ((p_buffer = qed_ooo_get_ready_buffer(p_hwfn,
@@ -760,13 +761,18 @@  qed_ooo_submit_tx_buffers(struct qed_hwfn *p_hwfn,
 		SET_FIELD(bd_flags, CORE_TX_BD_DATA_FORCE_VLAN_MODE, 1);
 		SET_FIELD(bd_flags, CORE_TX_BD_DATA_L4_PROTOCOL, 1);
 
-		rc = qed_ll2_prepare_tx_packet(p_hwfn, p_ll2_conn->my_id, 1,
-					       p_buffer->vlan, bd_flags,
-					       l4_hdr_offset_w,
-					       p_ll2_conn->conn.tx_dest, 0,
-					       first_frag,
-					       p_buffer->packet_length,
-					       p_buffer, true);
+		memset(&tx_pkt, 0, sizeof(tx_pkt));
+		tx_pkt.num_of_bds = 1;
+		tx_pkt.vlan = p_buffer->vlan;
+		tx_pkt.bd_flags = bd_flags;
+		tx_pkt.l4_hdr_offset_w = l4_hdr_offset_w;
+		tx_pkt.tx_dest = p_ll2_conn->conn.tx_dest;
+		tx_pkt.first_frag = first_frag;
+		tx_pkt.first_frag_len = p_buffer->packet_length;
+		tx_pkt.cookie = p_buffer;
+
+		rc = qed_ll2_prepare_tx_packet(p_hwfn, p_ll2_conn->my_id,
+					       &tx_pkt, true);
 		if (rc) {
 			qed_ooo_put_ready_buffer(p_hwfn, p_hwfn->p_ooo_info,
 						 p_buffer, false);
@@ -1593,20 +1599,18 @@  int qed_ll2_post_rx_buffer(struct qed_hwfn *p_hwfn,
 static void qed_ll2_prepare_tx_packet_set(struct qed_hwfn *p_hwfn,
 					  struct qed_ll2_tx_queue *p_tx,
 					  struct qed_ll2_tx_packet *p_curp,
-					  u8 num_of_bds,
-					  dma_addr_t first_frag,
-					  u16 first_frag_len, void *p_cookie,
+					  struct qed_ll2_tx_pkt_info *pkt,
 					  u8 notify_fw)
 {
 	list_del(&p_curp->list_entry);
-	p_curp->cookie = p_cookie;
-	p_curp->bd_used = num_of_bds;
+	p_curp->cookie = pkt->cookie;
+	p_curp->bd_used = pkt->num_of_bds;
 	p_curp->notify_fw = notify_fw;
 	p_tx->cur_send_packet = p_curp;
 	p_tx->cur_send_frag_num = 0;
 
-	p_curp->bds_set[p_tx->cur_send_frag_num].tx_frag = first_frag;
-	p_curp->bds_set[p_tx->cur_send_frag_num].frag_len = first_frag_len;
+	p_curp->bds_set[p_tx->cur_send_frag_num].tx_frag = pkt->first_frag;
+	p_curp->bds_set[p_tx->cur_send_frag_num].frag_len = pkt->first_frag_len;
 	p_tx->cur_send_frag_num++;
 }
 
@@ -1614,32 +1618,33 @@  static void
 qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn,
 				 struct qed_ll2_info *p_ll2,
 				 struct qed_ll2_tx_packet *p_curp,
-				 u8 num_of_bds,
-				 enum core_tx_dest tx_dest,
-				 u16 vlan,
-				 u8 bd_flags,
-				 u16 l4_hdr_offset_w,
-				 enum core_roce_flavor_type roce_flavor,
-				 dma_addr_t first_frag,
-				 u16 first_frag_len)
+				 struct qed_ll2_tx_pkt_info *pkt)
 {
 	struct qed_chain *p_tx_chain = &p_ll2->tx_queue.txq_chain;
 	u16 prod_idx = qed_chain_get_prod_idx(p_tx_chain);
 	struct core_tx_bd *start_bd = NULL;
+	enum core_roce_flavor_type roce_flavor;
+	enum core_tx_dest tx_dest;
 	u16 bd_data = 0, frag_idx;
 
+	roce_flavor = (pkt->qed_roce_flavor == QED_LL2_ROCE) ? CORE_ROCE
+							     : CORE_RROCE;
+
+	tx_dest = (pkt->tx_dest == QED_LL2_TX_DEST_NW) ? CORE_TX_DEST_NW
+						       : CORE_TX_DEST_LB;
+
 	start_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain);
-	start_bd->nw_vlan_or_lb_echo = cpu_to_le16(vlan);
+	start_bd->nw_vlan_or_lb_echo = cpu_to_le16(pkt->vlan);
 	SET_FIELD(start_bd->bitfield1, CORE_TX_BD_L4_HDR_OFFSET_W,
-		  cpu_to_le16(l4_hdr_offset_w));
+		  cpu_to_le16(pkt->l4_hdr_offset_w));
 	SET_FIELD(start_bd->bitfield1, CORE_TX_BD_TX_DST, tx_dest);
-	bd_data |= bd_flags;
+	bd_data |= pkt->bd_flags;
 	SET_FIELD(bd_data, CORE_TX_BD_DATA_START_BD, 0x1);
-	SET_FIELD(bd_data, CORE_TX_BD_DATA_NBDS, num_of_bds);
+	SET_FIELD(bd_data, CORE_TX_BD_DATA_NBDS, pkt->num_of_bds);
 	SET_FIELD(bd_data, CORE_TX_BD_DATA_ROCE_FLAV, roce_flavor);
 	start_bd->bd_data.as_bitfield = cpu_to_le16(bd_data);
-	DMA_REGPAIR_LE(start_bd->addr, first_frag);
-	start_bd->nbytes = cpu_to_le16(first_frag_len);
+	DMA_REGPAIR_LE(start_bd->addr, pkt->first_frag);
+	start_bd->nbytes = cpu_to_le16(pkt->first_frag_len);
 
 	DP_VERBOSE(p_hwfn,
 		   (NETIF_MSG_TX_QUEUED | QED_MSG_LL2),
@@ -1648,17 +1653,17 @@  qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn,
 		   p_ll2->cid,
 		   p_ll2->conn.conn_type,
 		   prod_idx,
-		   first_frag_len,
-		   num_of_bds,
+		   pkt->first_frag_len,
+		   pkt->num_of_bds,
 		   le32_to_cpu(start_bd->addr.hi),
 		   le32_to_cpu(start_bd->addr.lo));
 
-	if (p_ll2->tx_queue.cur_send_frag_num == num_of_bds)
+	if (p_ll2->tx_queue.cur_send_frag_num == pkt->num_of_bds)
 		return;
 
 	/* Need to provide the packet with additional BDs for frags */
 	for (frag_idx = p_ll2->tx_queue.cur_send_frag_num;
-	     frag_idx < num_of_bds; frag_idx++) {
+	     frag_idx < pkt->num_of_bds; frag_idx++) {
 		struct core_tx_bd **p_bd = &p_curp->bds_set[frag_idx].txq_bd;
 
 		*p_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain);
@@ -1726,21 +1731,13 @@  static void qed_ll2_tx_packet_notify(struct qed_hwfn *p_hwfn,
 
 int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
 			      u8 connection_handle,
-			      u8 num_of_bds,
-			      u16 vlan,
-			      u8 bd_flags,
-			      u16 l4_hdr_offset_w,
-			      enum qed_ll2_tx_dest e_tx_dest,
-			      enum qed_ll2_roce_flavor_type qed_roce_flavor,
-			      dma_addr_t first_frag,
-			      u16 first_frag_len, void *cookie, u8 notify_fw)
+			      struct qed_ll2_tx_pkt_info *pkt,
+			      bool notify_fw)
 {
 	struct qed_ll2_tx_packet *p_curp = NULL;
 	struct qed_ll2_info *p_ll2_conn = NULL;
-	enum core_roce_flavor_type roce_flavor;
 	struct qed_ll2_tx_queue *p_tx;
 	struct qed_chain *p_tx_chain;
-	enum core_tx_dest tx_dest;
 	unsigned long flags;
 	int rc = 0;
 
@@ -1750,7 +1747,7 @@  int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
 	p_tx = &p_ll2_conn->tx_queue;
 	p_tx_chain = &p_tx->txq_chain;
 
-	if (num_of_bds > CORE_LL2_TX_MAX_BDS_PER_PACKET)
+	if (pkt->num_of_bds > CORE_LL2_TX_MAX_BDS_PER_PACKET)
 		return -EIO;
 
 	spin_lock_irqsave(&p_tx->lock, flags);
@@ -1763,7 +1760,7 @@  int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
 	if (!list_empty(&p_tx->free_descq))
 		p_curp = list_first_entry(&p_tx->free_descq,
 					  struct qed_ll2_tx_packet, list_entry);
-	if (p_curp && qed_chain_get_elem_left(p_tx_chain) < num_of_bds)
+	if (p_curp && qed_chain_get_elem_left(p_tx_chain) < pkt->num_of_bds)
 		p_curp = NULL;
 
 	if (!p_curp) {
@@ -1771,26 +1768,10 @@  int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
 		goto out;
 	}
 
-	tx_dest = e_tx_dest == QED_LL2_TX_DEST_NW ? CORE_TX_DEST_NW :
-						    CORE_TX_DEST_LB;
-	if (qed_roce_flavor == QED_LL2_ROCE) {
-		roce_flavor = CORE_ROCE;
-	} else if (qed_roce_flavor == QED_LL2_RROCE) {
-		roce_flavor = CORE_RROCE;
-	} else {
-		rc = -EINVAL;
-		goto out;
-	}
-
 	/* Prepare packet and BD, and perhaps send a doorbell to FW */
-	qed_ll2_prepare_tx_packet_set(p_hwfn, p_tx, p_curp,
-				      num_of_bds, first_frag,
-				      first_frag_len, cookie, notify_fw);
-	qed_ll2_prepare_tx_packet_set_bd(p_hwfn, p_ll2_conn, p_curp,
-					 num_of_bds, tx_dest,
-					 vlan, bd_flags, l4_hdr_offset_w,
-					 roce_flavor,
-					 first_frag, first_frag_len);
+	qed_ll2_prepare_tx_packet_set(p_hwfn, p_tx, p_curp, pkt, notify_fw);
+
+	qed_ll2_prepare_tx_packet_set_bd(p_hwfn, p_ll2_conn, p_curp, pkt);
 
 	qed_ll2_tx_packet_notify(p_hwfn, p_ll2_conn);
 
@@ -2245,6 +2226,7 @@  static int qed_ll2_stop(struct qed_dev *cdev)
 
 static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb)
 {
+	struct qed_ll2_tx_pkt_info pkt;
 	const skb_frag_t *frag;
 	int rc = -EINVAL, i;
 	dma_addr_t mapping;
@@ -2279,12 +2261,17 @@  static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb)
 		flags |= BIT(CORE_TX_BD_DATA_VLAN_INSERTION_SHIFT);
 	}
 
-	rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev),
-				       cdev->ll2->handle,
-				       1 + skb_shinfo(skb)->nr_frags,
-				       vlan, flags, 0, QED_LL2_TX_DEST_NW,
-				       0 /* RoCE FLAVOR */,
-				       mapping, skb->len, skb, 1);
+	memset(&pkt, 0, sizeof(pkt));
+	pkt.num_of_bds = 1 + skb_shinfo(skb)->nr_frags;
+	pkt.vlan = vlan;
+	pkt.bd_flags = flags;
+	pkt.tx_dest = QED_LL2_TX_DEST_NW;
+	pkt.first_frag = mapping;
+	pkt.first_frag_len = skb->len;
+	pkt.cookie = skb;
+
+	rc = qed_ll2_prepare_tx_packet(&cdev->hwfns[0], cdev->ll2->handle,
+				       &pkt, 1);
 	if (rc)
 		goto err;
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
index 2c07d0e..96af50b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
@@ -47,12 +47,6 @@ 
 
 #define QED_MAX_NUM_OF_LL2_CONNECTIONS                    (4)
 
-enum qed_ll2_roce_flavor_type {
-	QED_LL2_ROCE,
-	QED_LL2_RROCE,
-	MAX_QED_LL2_ROCE_FLAVOR_TYPE
-};
-
 enum qed_ll2_conn_type {
 	QED_LL2_TYPE_FCOE,
 	QED_LL2_TYPE_ISCSI,
@@ -64,12 +58,6 @@  enum qed_ll2_conn_type {
 	MAX_QED_LL2_RX_CONN_TYPE
 };
 
-enum qed_ll2_tx_dest {
-	QED_LL2_TX_DEST_NW, /* Light L2 TX Destination to the Network */
-	QED_LL2_TX_DEST_LB, /* Light L2 TX Destination to the Loopback */
-	QED_LL2_TX_DEST_MAX
-};
-
 struct qed_ll2_rx_packet {
 	struct list_head list_entry;
 	struct core_rx_bd_with_buff_len *rxq_bd;
@@ -216,35 +204,16 @@  int qed_ll2_post_rx_buffer(struct qed_hwfn *p_hwfn,
  *				      to prepare Tx packet submission to FW.
  *
  * @param p_hwfn
- * @param connection_handle	LL2 connection's handle obtained from
- *				qed_ll2_require_connection
- * @param num_of_bds		a number of requested BD equals a number of
- *				fragments in Tx packet
- * @param vlan			VLAN to insert to packet (if insertion set)
- * @param bd_flags
- * @param l4_hdr_offset_w	L4 Header Offset from start of packet
- *				(in words). This is needed if both l4_csum
- *				and ipv6_ext are set
- * @param e_tx_dest             indicates if the packet is to be transmitted via
- *                              loopback or to the network
- * @param first_frag
- * @param first_frag_len
- * @param cookie
- *
- * @param notify_fw
+ * @param connection_handle
+ * @param pkt - info regarding the tx packet
+ * @param notify_fw - issue doorbell to fw for this packet
  *
  * @return 0 on success, failure otherwise
  */
 int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
 			      u8 connection_handle,
-			      u8 num_of_bds,
-			      u16 vlan,
-			      u8 bd_flags,
-			      u16 l4_hdr_offset_w,
-			      enum qed_ll2_tx_dest e_tx_dest,
-			      enum qed_ll2_roce_flavor_type qed_roce_flavor,
-			      dma_addr_t first_frag,
-			      u16 first_frag_len, void *cookie, u8 notify_fw);
+			      struct qed_ll2_tx_pkt_info *pkt,
+			      bool notify_fw);
 
 /**
  * @brief qed_ll2_release_connection -	releases resources
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index b9434b7..afc63c0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -2937,6 +2937,7 @@  static int qed_roce_ll2_tx(struct qed_dev *cdev,
 	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
 	struct qed_roce_ll2_info *roce_ll2 = hwfn->ll2;
 	enum qed_ll2_roce_flavor_type qed_roce_flavor;
+	struct qed_ll2_tx_pkt_info ll2_pkt;
 	u8 flags = 0;
 	int rc;
 	int i;
@@ -2955,11 +2956,18 @@  static int qed_roce_ll2_tx(struct qed_dev *cdev,
 		flags |= BIT(CORE_TX_BD_DATA_IP_CSUM_SHIFT);
 
 	/* Tx header */
-	rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev), roce_ll2->handle,
-				       1 + pkt->n_seg, 0, flags, 0,
-				       QED_LL2_TX_DEST_NW,
-				       qed_roce_flavor, pkt->header.baddr,
-				       pkt->header.len, pkt, 1);
+	memset(&ll2_pkt, 0, sizeof(ll2_pkt));
+	ll2_pkt.num_of_bds = 1 + pkt->n_seg;
+	ll2_pkt.bd_flags = flags;
+	ll2_pkt.tx_dest = QED_LL2_TX_DEST_NW;
+	ll2_pkt.qed_roce_flavor = qed_roce_flavor;
+	ll2_pkt.first_frag = pkt->header.baddr;
+	ll2_pkt.first_frag_len = pkt->header.len;
+	ll2_pkt.cookie = pkt;
+
+	rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev),
+				       roce_ll2->handle,
+				       &ll2_pkt, 1);
 	if (rc) {
 		DP_ERR(cdev, "roce ll2 tx: header failed (rc=%d)\n", rc);
 		return QED_ROCE_TX_HEAD_FAILURE;
diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h
index 4fb4666..1a9a5b9 100644
--- a/include/linux/qed/qed_ll2_if.h
+++ b/include/linux/qed/qed_ll2_if.h
@@ -43,6 +43,18 @@ 
 #include <linux/slab.h>
 #include <linux/qed/qed_if.h>
 
+enum qed_ll2_roce_flavor_type {
+	QED_LL2_ROCE,
+	QED_LL2_RROCE,
+	MAX_QED_LL2_ROCE_FLAVOR_TYPE
+};
+
+enum qed_ll2_tx_dest {
+	QED_LL2_TX_DEST_NW, /* Light L2 TX Destination to the Network */
+	QED_LL2_TX_DEST_LB, /* Light L2 TX Destination to the Loopback */
+	QED_LL2_TX_DEST_MAX
+};
+
 struct qed_ll2_stats {
 	u64 gsi_invalid_hdr;
 	u64 gsi_invalid_pkt_length;
@@ -67,6 +79,21 @@  struct qed_ll2_stats {
 	u64 sent_bcast_pkts;
 };
 
+struct qed_ll2_tx_pkt_info {
+	u8 num_of_bds;
+	u16 vlan;
+	u8 bd_flags;
+	u16 l4_hdr_offset_w;	/* from start of packet */
+	enum qed_ll2_tx_dest tx_dest;
+	enum qed_ll2_roce_flavor_type qed_roce_flavor;
+	dma_addr_t first_frag;
+	u16 first_frag_len;
+	bool enable_ip_cksum;
+	bool enable_l4_cksum;
+	bool calc_ip_len;
+	void *cookie;
+};
+
 #define QED_LL2_UNUSED_HANDLE   (0xff)
 
 struct qed_ll2_cb_ops {