diff mbox series

[unstable,SRU,cosmic,SRU,bionic] net-next/hinic: add checksum offload and TSO support

Message ID 20181030140428.GA14128@xps13.dannf
State New
Headers show
Series [unstable,SRU,cosmic,SRU,bionic] net-next/hinic: add checksum offload and TSO support | expand

Commit Message

dann frazier Oct. 30, 2018, 2:04 p.m. UTC
From: Zhao Chen <zhaochen6@huawei.com>

BugLink: https://bugs.launchpad.net/bugs/1800664

This patch adds checksum offload and TSO support for the HiNIC
driver. Perfomance test (Iperf) shows more than 100% improvement
in TCP streams.

Signed-off-by: Zhao Chen <zhaochen6@huawei.com>
Signed-off-by: Xue Chaojing <xuechaojing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit cc18a7543d2f63a2c93fc61cfa7fd8be5464f75e)
Signed-off-by: dann frazier <dann.frazier@canonical.com>
---
 .../net/ethernet/huawei/hinic/hinic_hw_dev.h  |   2 +
 .../net/ethernet/huawei/hinic/hinic_hw_qp.c   | 121 +++++--
 .../net/ethernet/huawei/hinic/hinic_hw_qp.h   |  27 ++
 .../net/ethernet/huawei/hinic/hinic_hw_wq.c   |  14 +
 .../net/ethernet/huawei/hinic/hinic_hw_wq.h   |   2 +
 .../net/ethernet/huawei/hinic/hinic_hw_wqe.h  |  97 ++++--
 .../net/ethernet/huawei/hinic/hinic_main.c    |  23 +-
 .../net/ethernet/huawei/hinic/hinic_port.c    |  32 ++
 .../net/ethernet/huawei/hinic/hinic_port.h    |  18 ++
 drivers/net/ethernet/huawei/hinic/hinic_tx.c  | 295 +++++++++++++++++-
 10 files changed, 571 insertions(+), 60 deletions(-)

Comments

Seth Forshee Oct. 31, 2018, 1:53 p.m. UTC | #1
On Tue, Oct 30, 2018 at 08:04:28AM -0600, dann frazier wrote:
> From: Zhao Chen <zhaochen6@huawei.com>
> 
> BugLink: https://bugs.launchpad.net/bugs/1800664
> 
> This patch adds checksum offload and TSO support for the HiNIC
> driver. Perfomance test (Iperf) shows more than 100% improvement
> in TCP streams.
> 
> Signed-off-by: Zhao Chen <zhaochen6@huawei.com>
> Signed-off-by: Xue Chaojing <xuechaojing@huawei.com>
> Signed-off-by: David S. Miller <davem@davemloft.net>
> (cherry picked from commit cc18a7543d2f63a2c93fc61cfa7fd8be5464f75e)
> Signed-off-by: dann frazier <dann.frazier@canonical.com>

Clean cherry pick, impact limited to a single platform-specific driver.

Acked-by: Seth Forshee <seth.forshee@canonical.com>

Applied to unstable/master, thanks!
Kleber Sacilotto de Souza Nov. 6, 2018, 1:54 p.m. UTC | #2
On 10/30/18 15:04, dann frazier wrote:
> From: Zhao Chen <zhaochen6@huawei.com>
>
> BugLink: https://bugs.launchpad.net/bugs/1800664
>
> This patch adds checksum offload and TSO support for the HiNIC
> driver. Perfomance test (Iperf) shows more than 100% improvement
> in TCP streams.
>
> Signed-off-by: Zhao Chen <zhaochen6@huawei.com>
> Signed-off-by: Xue Chaojing <xuechaojing@huawei.com>
> Signed-off-by: David S. Miller <davem@davemloft.net>
> (cherry picked from commit cc18a7543d2f63a2c93fc61cfa7fd8be5464f75e)
> Signed-off-by: dann frazier <dann.frazier@canonical.com>

Clean cherry-pick, limited scope and tested.

Acked-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com>

> ---
>  .../net/ethernet/huawei/hinic/hinic_hw_dev.h  |   2 +
>  .../net/ethernet/huawei/hinic/hinic_hw_qp.c   | 121 +++++--
>  .../net/ethernet/huawei/hinic/hinic_hw_qp.h   |  27 ++
>  .../net/ethernet/huawei/hinic/hinic_hw_wq.c   |  14 +
>  .../net/ethernet/huawei/hinic/hinic_hw_wq.h   |   2 +
>  .../net/ethernet/huawei/hinic/hinic_hw_wqe.h  |  97 ++++--
>  .../net/ethernet/huawei/hinic/hinic_main.c    |  23 +-
>  .../net/ethernet/huawei/hinic/hinic_port.c    |  32 ++
>  .../net/ethernet/huawei/hinic/hinic_port.h    |  18 ++
>  drivers/net/ethernet/huawei/hinic/hinic_tx.c  | 295 +++++++++++++++++-
>  10 files changed, 571 insertions(+), 60 deletions(-)
>
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
> index 0f5563f3b7798..097b5502603fc 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
> @@ -58,6 +58,8 @@ enum hinic_port_cmd {
>  
>  	HINIC_PORT_CMD_GET_GLOBAL_QPN   = 102,
>  
> +	HINIC_PORT_CMD_SET_TSO          = 112,
> +
>  	HINIC_PORT_CMD_GET_CAP          = 170,
>  };
>  
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
> index cb239627770f4..967c993d5303a 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
> @@ -70,8 +70,6 @@
>  #define SQ_MASKED_IDX(sq, idx)  ((idx) & (sq)->wq->mask)
>  #define RQ_MASKED_IDX(rq, idx)  ((idx) & (rq)->wq->mask)
>  
> -#define TX_MAX_MSS_DEFAULT      0x3E00
> -
>  enum sq_wqe_type {
>  	SQ_NORMAL_WQE = 0,
>  };
> @@ -494,33 +492,16 @@ static void sq_prepare_ctrl(struct hinic_sq_ctrl *ctrl, u16 prod_idx,
>  			  HINIC_SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT)     |
>  			  HINIC_SQ_CTRL_SET(ctrl_size, LEN);
>  
> -	ctrl->queue_info = HINIC_SQ_CTRL_SET(TX_MAX_MSS_DEFAULT,
> -					     QUEUE_INFO_MSS);
> +	ctrl->queue_info = HINIC_SQ_CTRL_SET(HINIC_MSS_DEFAULT,
> +					     QUEUE_INFO_MSS) |
> +			   HINIC_SQ_CTRL_SET(1, QUEUE_INFO_UC);
>  }
>  
>  static void sq_prepare_task(struct hinic_sq_task *task)
>  {
> -	task->pkt_info0 =
> -		HINIC_SQ_TASK_INFO0_SET(0, L2HDR_LEN) |
> -		HINIC_SQ_TASK_INFO0_SET(HINIC_L4_OFF_DISABLE, L4_OFFLOAD) |
> -		HINIC_SQ_TASK_INFO0_SET(HINIC_OUTER_L3TYPE_UNKNOWN,
> -					INNER_L3TYPE) |
> -		HINIC_SQ_TASK_INFO0_SET(HINIC_VLAN_OFF_DISABLE,
> -					VLAN_OFFLOAD) |
> -		HINIC_SQ_TASK_INFO0_SET(HINIC_PKT_NOT_PARSED, PARSE_FLAG);
> -
> -	task->pkt_info1 =
> -		HINIC_SQ_TASK_INFO1_SET(HINIC_MEDIA_UNKNOWN, MEDIA_TYPE) |
> -		HINIC_SQ_TASK_INFO1_SET(0, INNER_L4_LEN) |
> -		HINIC_SQ_TASK_INFO1_SET(0, INNER_L3_LEN);
> -
> -	task->pkt_info2 =
> -		HINIC_SQ_TASK_INFO2_SET(0, TUNNEL_L4_LEN) |
> -		HINIC_SQ_TASK_INFO2_SET(0, OUTER_L3_LEN)  |
> -		HINIC_SQ_TASK_INFO2_SET(HINIC_TUNNEL_L4TYPE_UNKNOWN,
> -					TUNNEL_L4TYPE)    |
> -		HINIC_SQ_TASK_INFO2_SET(HINIC_OUTER_L3TYPE_UNKNOWN,
> -					OUTER_L3TYPE);
> +	task->pkt_info0 = 0;
> +	task->pkt_info1 = 0;
> +	task->pkt_info2 = 0;
>  
>  	task->ufo_v6_identify = 0;
>  
> @@ -529,6 +510,86 @@ static void sq_prepare_task(struct hinic_sq_task *task)
>  	task->zero_pad = 0;
>  }
>  
> +void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len)
> +{
> +	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(len, L2HDR_LEN);
> +}
> +
> +void hinic_task_set_outter_l3(struct hinic_sq_task *task,
> +			      enum hinic_l3_offload_type l3_type,
> +			      u32 network_len)
> +{
> +	task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l3_type, OUTER_L3TYPE) |
> +			   HINIC_SQ_TASK_INFO2_SET(network_len, OUTER_L3LEN);
> +}
> +
> +void hinic_task_set_inner_l3(struct hinic_sq_task *task,
> +			     enum hinic_l3_offload_type l3_type,
> +			     u32 network_len)
> +{
> +	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l3_type, INNER_L3TYPE);
> +	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(network_len, INNER_L3LEN);
> +}
> +
> +void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
> +			      enum hinic_l4_offload_type l4_type,
> +			      u32 tunnel_len)
> +{
> +	task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l4_type, TUNNEL_L4TYPE) |
> +			   HINIC_SQ_TASK_INFO2_SET(tunnel_len, TUNNEL_L4LEN);
> +}
> +
> +void hinic_set_cs_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
> +			   enum hinic_l4_offload_type l4_offload,
> +			   u32 l4_len, u32 offset)
> +{
> +	u32 tcp_udp_cs = 0, sctp = 0;
> +	u32 mss = HINIC_MSS_DEFAULT;
> +
> +	if (l4_offload == TCP_OFFLOAD_ENABLE ||
> +	    l4_offload == UDP_OFFLOAD_ENABLE)
> +		tcp_udp_cs = 1;
> +	else if (l4_offload == SCTP_OFFLOAD_ENABLE)
> +		sctp = 1;
> +
> +	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
> +	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
> +
> +	*queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
> +		       HINIC_SQ_CTRL_SET(tcp_udp_cs, QUEUE_INFO_TCPUDP_CS) |
> +		       HINIC_SQ_CTRL_SET(sctp, QUEUE_INFO_SCTP);
> +
> +	*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
> +	*queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
> +}
> +
> +void hinic_set_tso_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
> +			    enum hinic_l4_offload_type l4_offload,
> +			    u32 l4_len, u32 offset, u32 ip_ident, u32 mss)
> +{
> +	u32 tso = 0, ufo = 0;
> +
> +	if (l4_offload == TCP_OFFLOAD_ENABLE)
> +		tso = 1;
> +	else if (l4_offload == UDP_OFFLOAD_ENABLE)
> +		ufo = 1;
> +
> +	task->ufo_v6_identify = ip_ident;
> +
> +	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
> +	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(tso || ufo, TSO_FLAG);
> +	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
> +
> +	*queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
> +		       HINIC_SQ_CTRL_SET(tso, QUEUE_INFO_TSO) |
> +		       HINIC_SQ_CTRL_SET(ufo, QUEUE_INFO_UFO) |
> +		       HINIC_SQ_CTRL_SET(!!l4_offload, QUEUE_INFO_TCPUDP_CS);
> +
> +	/* set MSS value */
> +	*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
> +	*queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
> +}
> +
>  /**
>   * hinic_sq_prepare_wqe - prepare wqe before insert to the queue
>   * @sq: send queue
> @@ -612,6 +673,16 @@ struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
>  	return &hw_wqe->sq_wqe;
>  }
>  
> +/**
> + * hinic_sq_return_wqe - return the wqe to the sq
> + * @sq: send queue
> + * @wqe_size: the size of the wqe
> + **/
> +void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size)
> +{
> +	hinic_return_wqe(sq->wq, wqe_size);
> +}
> +
>  /**
>   * hinic_sq_write_wqe - write the wqe to the sq
>   * @sq: send queue
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
> index 6c84f83ec2831..a0dc63a4bfc7a 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
> @@ -149,6 +149,31 @@ int hinic_get_sq_free_wqebbs(struct hinic_sq *sq);
>  
>  int hinic_get_rq_free_wqebbs(struct hinic_rq *rq);
>  
> +void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len);
> +
> +void hinic_task_set_outter_l3(struct hinic_sq_task *task,
> +			      enum hinic_l3_offload_type l3_type,
> +			      u32 network_len);
> +
> +void hinic_task_set_inner_l3(struct hinic_sq_task *task,
> +			     enum hinic_l3_offload_type l3_type,
> +			     u32 network_len);
> +
> +void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
> +			      enum hinic_l4_offload_type l4_type,
> +			      u32 tunnel_len);
> +
> +void hinic_set_cs_inner_l4(struct hinic_sq_task *task,
> +			   u32 *queue_info,
> +			   enum hinic_l4_offload_type l4_offload,
> +			   u32 l4_len, u32 offset);
> +
> +void hinic_set_tso_inner_l4(struct hinic_sq_task *task,
> +			    u32 *queue_info,
> +			    enum hinic_l4_offload_type l4_offload,
> +			    u32 l4_len,
> +			    u32 offset, u32 ip_ident, u32 mss);
> +
>  void hinic_sq_prepare_wqe(struct hinic_sq *sq, u16 prod_idx,
>  			  struct hinic_sq_wqe *wqe, struct hinic_sge *sges,
>  			  int nr_sges);
> @@ -159,6 +184,8 @@ void hinic_sq_write_db(struct hinic_sq *sq, u16 prod_idx, unsigned int wqe_size,
>  struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
>  				      unsigned int wqe_size, u16 *prod_idx);
>  
> +void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size);
> +
>  void hinic_sq_write_wqe(struct hinic_sq *sq, u16 prod_idx,
>  			struct hinic_sq_wqe *wqe, struct sk_buff *skb,
>  			unsigned int wqe_size);
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
> index 3e3181c089bdc..f92f1bf3901a7 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
> @@ -774,6 +774,20 @@ struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size,
>  	return WQ_PAGE_ADDR(wq, *prod_idx) + WQE_PAGE_OFF(wq, *prod_idx);
>  }
>  
> +/**
> + * hinic_return_wqe - return the wqe when transmit failed
> + * @wq: wq to return wqe
> + * @wqe_size: wqe size
> + **/
> +void hinic_return_wqe(struct hinic_wq *wq, unsigned int wqe_size)
> +{
> +	int num_wqebbs = ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
> +
> +	atomic_sub(num_wqebbs, &wq->prod_idx);
> +
> +	atomic_add(num_wqebbs, &wq->delta);
> +}
> +
>  /**
>   * hinic_put_wqe - return the wqe place to use for a new wqe
>   * @wq: wq to return wqe
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
> index 9c030a0f035e2..9b66545ba563c 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
> @@ -104,6 +104,8 @@ void hinic_wq_free(struct hinic_wqs *wqs, struct hinic_wq *wq);
>  struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size,
>  				   u16 *prod_idx);
>  
> +void hinic_return_wqe(struct hinic_wq *wq, unsigned int wqe_size);
> +
>  void hinic_put_wqe(struct hinic_wq *wq, unsigned int wqe_size);
>  
>  struct hinic_hw_wqe *hinic_read_wqe(struct hinic_wq *wq, unsigned int wqe_size,
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
> index bc73485483c59..9754d6ed5f4ac 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
> @@ -62,19 +62,33 @@
>  			(((val) >> HINIC_CMDQ_WQE_HEADER_##member##_SHIFT) \
>  			 & HINIC_CMDQ_WQE_HEADER_##member##_MASK)
>  
> -#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_SHIFT    0
> -#define HINIC_SQ_CTRL_TASKSECT_LEN_SHIFT        16
> -#define HINIC_SQ_CTRL_DATA_FORMAT_SHIFT         22
> -#define HINIC_SQ_CTRL_LEN_SHIFT                 29
> -
> -#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_MASK     0xFF
> -#define HINIC_SQ_CTRL_TASKSECT_LEN_MASK         0x1F
> -#define HINIC_SQ_CTRL_DATA_FORMAT_MASK          0x1
> -#define HINIC_SQ_CTRL_LEN_MASK                  0x3
> -
> -#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_SHIFT      13
> -
> -#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_MASK       0x3FFF
> +#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_SHIFT           0
> +#define HINIC_SQ_CTRL_TASKSECT_LEN_SHIFT               16
> +#define HINIC_SQ_CTRL_DATA_FORMAT_SHIFT                22
> +#define HINIC_SQ_CTRL_LEN_SHIFT                        29
> +
> +#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_MASK            0xFF
> +#define HINIC_SQ_CTRL_TASKSECT_LEN_MASK                0x1F
> +#define HINIC_SQ_CTRL_DATA_FORMAT_MASK                 0x1
> +#define HINIC_SQ_CTRL_LEN_MASK                         0x3
> +
> +#define HINIC_SQ_CTRL_QUEUE_INFO_PLDOFF_SHIFT          2
> +#define HINIC_SQ_CTRL_QUEUE_INFO_UFO_SHIFT             10
> +#define HINIC_SQ_CTRL_QUEUE_INFO_TSO_SHIFT             11
> +#define HINIC_SQ_CTRL_QUEUE_INFO_TCPUDP_CS_SHIFT       12
> +#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_SHIFT             13
> +#define HINIC_SQ_CTRL_QUEUE_INFO_SCTP_SHIFT            27
> +#define HINIC_SQ_CTRL_QUEUE_INFO_UC_SHIFT              28
> +#define HINIC_SQ_CTRL_QUEUE_INFO_PRI_SHIFT             29
> +
> +#define HINIC_SQ_CTRL_QUEUE_INFO_PLDOFF_MASK           0xFF
> +#define HINIC_SQ_CTRL_QUEUE_INFO_UFO_MASK              0x1
> +#define HINIC_SQ_CTRL_QUEUE_INFO_TSO_MASK              0x1
> +#define HINIC_SQ_CTRL_QUEUE_INFO_TCPUDP_CS_MASK	       0x1
> +#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_MASK              0x3FFF
> +#define HINIC_SQ_CTRL_QUEUE_INFO_SCTP_MASK             0x1
> +#define HINIC_SQ_CTRL_QUEUE_INFO_UC_MASK               0x1
> +#define HINIC_SQ_CTRL_QUEUE_INFO_PRI_MASK              0x7
>  
>  #define HINIC_SQ_CTRL_SET(val, member)          \
>  		(((u32)(val) & HINIC_SQ_CTRL_##member##_MASK) \
> @@ -84,6 +98,10 @@
>  		(((val) >> HINIC_SQ_CTRL_##member##_SHIFT) \
>  		 & HINIC_SQ_CTRL_##member##_MASK)
>  
> +#define HINIC_SQ_CTRL_CLEAR(val, member)	\
> +		((u32)(val) & (~(HINIC_SQ_CTRL_##member##_MASK \
> +		 << HINIC_SQ_CTRL_##member##_SHIFT)))
> +
>  #define HINIC_SQ_TASK_INFO0_L2HDR_LEN_SHIFT     0
>  #define HINIC_SQ_TASK_INFO0_L4_OFFLOAD_SHIFT    8
>  #define HINIC_SQ_TASK_INFO0_INNER_L3TYPE_SHIFT  10
> @@ -108,28 +126,28 @@
>  
>  /* 8 bits reserved */
>  #define HINIC_SQ_TASK_INFO1_MEDIA_TYPE_SHIFT    8
> -#define HINIC_SQ_TASK_INFO1_INNER_L4_LEN_SHIFT  16
> -#define HINIC_SQ_TASK_INFO1_INNER_L3_LEN_SHIFT  24
> +#define HINIC_SQ_TASK_INFO1_INNER_L4LEN_SHIFT   16
> +#define HINIC_SQ_TASK_INFO1_INNER_L3LEN_SHIFT   24
>  
>  /* 8 bits reserved */
>  #define HINIC_SQ_TASK_INFO1_MEDIA_TYPE_MASK     0xFF
> -#define HINIC_SQ_TASK_INFO1_INNER_L4_LEN_MASK   0xFF
> -#define HINIC_SQ_TASK_INFO1_INNER_L3_LEN_MASK   0xFF
> +#define HINIC_SQ_TASK_INFO1_INNER_L4LEN_MASK    0xFF
> +#define HINIC_SQ_TASK_INFO1_INNER_L3LEN_MASK    0xFF
>  
>  #define HINIC_SQ_TASK_INFO1_SET(val, member)    \
>  		(((u32)(val) & HINIC_SQ_TASK_INFO1_##member##_MASK) <<  \
>  		 HINIC_SQ_TASK_INFO1_##member##_SHIFT)
>  
> -#define HINIC_SQ_TASK_INFO2_TUNNEL_L4_LEN_SHIFT 0
> -#define HINIC_SQ_TASK_INFO2_OUTER_L3_LEN_SHIFT  12
> -#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 19
> +#define HINIC_SQ_TASK_INFO2_TUNNEL_L4LEN_SHIFT  0
> +#define HINIC_SQ_TASK_INFO2_OUTER_L3LEN_SHIFT   8
> +#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 16
>  /* 1 bit reserved */
> -#define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT  22
> +#define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT  24
>  /* 8 bits reserved */
>  
> -#define HINIC_SQ_TASK_INFO2_TUNNEL_L4_LEN_MASK  0xFFF
> -#define HINIC_SQ_TASK_INFO2_OUTER_L3_LEN_MASK   0x7F
> -#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK  0x3
> +#define HINIC_SQ_TASK_INFO2_TUNNEL_L4LEN_MASK   0xFF
> +#define HINIC_SQ_TASK_INFO2_OUTER_L3LEN_MASK    0xFF
> +#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK  0x7
>  /* 1 bit reserved */
>  #define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_MASK   0x3
>  /* 8 bits reserved */
> @@ -187,12 +205,15 @@
>  		 sizeof(struct hinic_sq_task) + \
>  		 (nr_sges) * sizeof(struct hinic_sq_bufdesc))
>  
> -#define HINIC_SCMD_DATA_LEN             16
> +#define HINIC_SCMD_DATA_LEN                     16
> +
> +#define HINIC_MAX_SQ_BUFDESCS                   17
>  
> -#define HINIC_MAX_SQ_BUFDESCS           17
> +#define HINIC_SQ_WQE_MAX_SIZE                   320
> +#define HINIC_RQ_WQE_SIZE                       32
>  
> -#define HINIC_SQ_WQE_MAX_SIZE           320
> -#define HINIC_RQ_WQE_SIZE               32
> +#define HINIC_MSS_DEFAULT		        0x3E00
> +#define HINIC_MSS_MIN		                0x50
>  
>  enum hinic_l4offload_type {
>  	HINIC_L4_OFF_DISABLE            = 0,
> @@ -211,6 +232,26 @@ enum hinic_pkt_parsed {
>  	HINIC_PKT_PARSED     = 1,
>  };
>  
> +enum hinic_l3_offload_type {
> +	L3TYPE_UNKNOWN = 0,
> +	IPV6_PKT = 1,
> +	IPV4_PKT_NO_CHKSUM_OFFLOAD = 2,
> +	IPV4_PKT_WITH_CHKSUM_OFFLOAD = 3,
> +};
> +
> +enum hinic_l4_offload_type {
> +	OFFLOAD_DISABLE     = 0,
> +	TCP_OFFLOAD_ENABLE  = 1,
> +	SCTP_OFFLOAD_ENABLE = 2,
> +	UDP_OFFLOAD_ENABLE  = 3,
> +};
> +
> +enum hinic_l4_tunnel_type {
> +	NOT_TUNNEL,
> +	TUNNEL_UDP_NO_CSUM,
> +	TUNNEL_UDP_CSUM,
> +};
> +
>  enum hinic_outer_l3type {
>  	HINIC_OUTER_L3TYPE_UNKNOWN              = 0,
>  	HINIC_OUTER_L3TYPE_IPV6                 = 1,
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
> index 4a8f82938ed5b..fdf2bdb6b0d06 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
> @@ -805,7 +805,8 @@ static const struct net_device_ops hinic_netdev_ops = {
>  
>  static void netdev_features_init(struct net_device *netdev)
>  {
> -	netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA;
> +	netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM |
> +			      NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6;
>  
>  	netdev->vlan_features = netdev->hw_features;
>  
> @@ -863,6 +864,20 @@ static void link_status_event_handler(void *handle, void *buf_in, u16 in_size,
>  	*out_size = sizeof(*ret_link_status);
>  }
>  
> +static int set_features(struct hinic_dev *nic_dev,
> +			netdev_features_t pre_features,
> +			netdev_features_t features, bool force_change)
> +{
> +	netdev_features_t changed = force_change ? ~0 : pre_features ^ features;
> +	int err = 0;
> +
> +	if (changed & NETIF_F_TSO)
> +		err = hinic_port_set_tso(nic_dev, (features & NETIF_F_TSO) ?
> +					 HINIC_TSO_ENABLE : HINIC_TSO_DISABLE);
> +
> +	return err;
> +}
> +
>  /**
>   * nic_dev_init - Initialize the NIC device
>   * @pdev: the NIC pci device
> @@ -963,7 +978,12 @@ static int nic_dev_init(struct pci_dev *pdev)
>  	hinic_hwdev_cb_register(nic_dev->hwdev, HINIC_MGMT_MSG_CMD_LINK_STATUS,
>  				nic_dev, link_status_event_handler);
>  
> +	err = set_features(nic_dev, 0, nic_dev->netdev->features, true);
> +	if (err)
> +		goto err_set_features;
> +
>  	SET_NETDEV_DEV(netdev, &pdev->dev);
> +
>  	err = register_netdev(netdev);
>  	if (err) {
>  		dev_err(&pdev->dev, "Failed to register netdev\n");
> @@ -973,6 +993,7 @@ static int nic_dev_init(struct pci_dev *pdev)
>  	return 0;
>  
>  err_reg_netdev:
> +err_set_features:
>  	hinic_hwdev_cb_unregister(nic_dev->hwdev,
>  				  HINIC_MGMT_MSG_CMD_LINK_STATUS);
>  	cancel_work_sync(&rx_mode_work->work);
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c
> index 4d4e3f05fb5fb..7575a7d3bd9f6 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_port.c
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c
> @@ -377,3 +377,35 @@ int hinic_port_get_cap(struct hinic_dev *nic_dev,
>  
>  	return 0;
>  }
> +
> +/**
> + * hinic_port_set_tso - set port tso configuration
> + * @nic_dev: nic device
> + * @state: the tso state to set
> + *
> + * Return 0 - Success, negative - Failure
> + **/
> +int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state)
> +{
> +	struct hinic_hwdev *hwdev = nic_dev->hwdev;
> +	struct hinic_hwif *hwif = hwdev->hwif;
> +	struct hinic_tso_config tso_cfg = {0};
> +	struct pci_dev *pdev = hwif->pdev;
> +	u16 out_size;
> +	int err;
> +
> +	tso_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
> +	tso_cfg.tso_en = state;
> +
> +	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_TSO,
> +				 &tso_cfg, sizeof(tso_cfg),
> +				 &tso_cfg, &out_size);
> +	if (err || out_size != sizeof(tso_cfg) || tso_cfg.status) {
> +		dev_err(&pdev->dev,
> +			"Failed to set port tso, ret = %d\n",
> +			tso_cfg.status);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.h b/drivers/net/ethernet/huawei/hinic/hinic_port.h
> index 9404365195ddf..f6e3220fe28fc 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_port.h
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_port.h
> @@ -72,6 +72,11 @@ enum hinic_speed {
>  	HINIC_SPEED_UNKNOWN = 0xFF,
>  };
>  
> +enum hinic_tso_state {
> +	HINIC_TSO_DISABLE = 0,
> +	HINIC_TSO_ENABLE  = 1,
> +};
> +
>  struct hinic_port_mac_cmd {
>  	u8              status;
>  	u8              version;
> @@ -167,6 +172,17 @@ struct hinic_port_cap {
>  	u8      rsvd2[3];
>  };
>  
> +struct hinic_tso_config {
> +	u8	status;
> +	u8	version;
> +	u8	rsvd0[6];
> +
> +	u16	func_id;
> +	u16	rsvd1;
> +	u8	tso_en;
> +	u8	resv2[3];
> +};
> +
>  int hinic_port_add_mac(struct hinic_dev *nic_dev, const u8 *addr,
>  		       u16 vlan_id);
>  
> @@ -195,4 +211,6 @@ int hinic_port_set_func_state(struct hinic_dev *nic_dev,
>  int hinic_port_get_cap(struct hinic_dev *nic_dev,
>  		       struct hinic_port_cap *port_cap);
>  
> +int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state);
> +
>  #endif
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
> index c5fca0356c9c9..11e73e67358d1 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
> @@ -26,6 +26,13 @@
>  #include <linux/skbuff.h>
>  #include <linux/smp.h>
>  #include <asm/byteorder.h>
> +#include <linux/ip.h>
> +#include <linux/tcp.h>
> +#include <linux/sctp.h>
> +#include <linux/ipv6.h>
> +#include <net/ipv6.h>
> +#include <net/checksum.h>
> +#include <net/ip6_checksum.h>
>  
>  #include "hinic_common.h"
>  #include "hinic_hw_if.h"
> @@ -45,9 +52,31 @@
>  #define CI_UPDATE_NO_PENDING            0
>  #define CI_UPDATE_NO_COALESC            0
>  
> -#define HW_CONS_IDX(sq)         be16_to_cpu(*(u16 *)((sq)->hw_ci_addr))
> +#define HW_CONS_IDX(sq)                 be16_to_cpu(*(u16 *)((sq)->hw_ci_addr))
>  
> -#define MIN_SKB_LEN             64
> +#define MIN_SKB_LEN                     17
> +
> +#define	MAX_PAYLOAD_OFFSET	        221
> +#define TRANSPORT_OFFSET(l4_hdr, skb)	((u32)((l4_hdr) - (skb)->data))
> +
> +union hinic_l3 {
> +	struct iphdr *v4;
> +	struct ipv6hdr *v6;
> +	unsigned char *hdr;
> +};
> +
> +union hinic_l4 {
> +	struct tcphdr *tcp;
> +	struct udphdr *udp;
> +	unsigned char *hdr;
> +};
> +
> +enum hinic_offload_type {
> +	TX_OFFLOAD_TSO     = BIT(0),
> +	TX_OFFLOAD_CSUM    = BIT(1),
> +	TX_OFFLOAD_VLAN    = BIT(2),
> +	TX_OFFLOAD_INVALID = BIT(3),
> +};
>  
>  /**
>   * hinic_txq_clean_stats - Clean the statistics of specific queue
> @@ -175,18 +204,263 @@ static void tx_unmap_skb(struct hinic_dev *nic_dev, struct sk_buff *skb,
>  			 DMA_TO_DEVICE);
>  }
>  
> +static void get_inner_l3_l4_type(struct sk_buff *skb, union hinic_l3 *ip,
> +				 union hinic_l4 *l4,
> +				 enum hinic_offload_type offload_type,
> +				 enum hinic_l3_offload_type *l3_type,
> +				 u8 *l4_proto)
> +{
> +	u8 *exthdr;
> +
> +	if (ip->v4->version == 4) {
> +		*l3_type = (offload_type == TX_OFFLOAD_CSUM) ?
> +			   IPV4_PKT_NO_CHKSUM_OFFLOAD :
> +			   IPV4_PKT_WITH_CHKSUM_OFFLOAD;
> +		*l4_proto = ip->v4->protocol;
> +	} else if (ip->v4->version == 6) {
> +		*l3_type = IPV6_PKT;
> +		exthdr = ip->hdr + sizeof(*ip->v6);
> +		*l4_proto = ip->v6->nexthdr;
> +		if (exthdr != l4->hdr) {
> +			int start = exthdr - skb->data;
> +			__be16 frag_off;
> +
> +			ipv6_skip_exthdr(skb, start, l4_proto, &frag_off);
> +		}
> +	} else {
> +		*l3_type = L3TYPE_UNKNOWN;
> +		*l4_proto = 0;
> +	}
> +}
> +
> +static void get_inner_l4_info(struct sk_buff *skb, union hinic_l4 *l4,
> +			      enum hinic_offload_type offload_type, u8 l4_proto,
> +			      enum hinic_l4_offload_type *l4_offload,
> +			      u32 *l4_len, u32 *offset)
> +{
> +	*l4_offload = OFFLOAD_DISABLE;
> +	*offset = 0;
> +	*l4_len = 0;
> +
> +	switch (l4_proto) {
> +	case IPPROTO_TCP:
> +		*l4_offload = TCP_OFFLOAD_ENABLE;
> +		/* doff in unit of 4B */
> +		*l4_len = l4->tcp->doff * 4;
> +		*offset = *l4_len + TRANSPORT_OFFSET(l4->hdr, skb);
> +		break;
> +
> +	case IPPROTO_UDP:
> +		*l4_offload = UDP_OFFLOAD_ENABLE;
> +		*l4_len = sizeof(struct udphdr);
> +		*offset = TRANSPORT_OFFSET(l4->hdr, skb);
> +		break;
> +
> +	case IPPROTO_SCTP:
> +		/* only csum offload support sctp */
> +		if (offload_type != TX_OFFLOAD_CSUM)
> +			break;
> +
> +		*l4_offload = SCTP_OFFLOAD_ENABLE;
> +		*l4_len = sizeof(struct sctphdr);
> +		*offset = TRANSPORT_OFFSET(l4->hdr, skb);
> +		break;
> +
> +	default:
> +		break;
> +	}
> +}
> +
> +static __sum16 csum_magic(union hinic_l3 *ip, unsigned short proto)
> +{
> +	return (ip->v4->version == 4) ?
> +		csum_tcpudp_magic(ip->v4->saddr, ip->v4->daddr, 0, proto, 0) :
> +		csum_ipv6_magic(&ip->v6->saddr, &ip->v6->daddr, 0, proto, 0);
> +}
> +
> +static int offload_tso(struct hinic_sq_task *task, u32 *queue_info,
> +		       struct sk_buff *skb)
> +{
> +	u32 offset, l4_len, ip_identify, network_hdr_len;
> +	enum hinic_l3_offload_type l3_offload;
> +	enum hinic_l4_offload_type l4_offload;
> +	union hinic_l3 ip;
> +	union hinic_l4 l4;
> +	u8 l4_proto;
> +
> +	if (!skb_is_gso(skb))
> +		return 0;
> +
> +	if (skb_cow_head(skb, 0) < 0)
> +		return -EPROTONOSUPPORT;
> +
> +	if (skb->encapsulation) {
> +		u32 gso_type = skb_shinfo(skb)->gso_type;
> +		u32 tunnel_type = 0;
> +		u32 l4_tunnel_len;
> +
> +		ip.hdr = skb_network_header(skb);
> +		l4.hdr = skb_transport_header(skb);
> +		network_hdr_len = skb_inner_network_header_len(skb);
> +
> +		if (ip.v4->version == 4) {
> +			ip.v4->tot_len = 0;
> +			l3_offload = IPV4_PKT_WITH_CHKSUM_OFFLOAD;
> +		} else if (ip.v4->version == 6) {
> +			l3_offload = IPV6_PKT;
> +		} else {
> +			l3_offload = 0;
> +		}
> +
> +		hinic_task_set_outter_l3(task, l3_offload,
> +					 skb_network_header_len(skb));
> +
> +		if (gso_type & SKB_GSO_UDP_TUNNEL_CSUM) {
> +			l4.udp->check = ~csum_magic(&ip, IPPROTO_UDP);
> +			tunnel_type = TUNNEL_UDP_CSUM;
> +		} else if (gso_type & SKB_GSO_UDP_TUNNEL) {
> +			tunnel_type = TUNNEL_UDP_NO_CSUM;
> +		}
> +
> +		l4_tunnel_len = skb_inner_network_offset(skb) -
> +				skb_transport_offset(skb);
> +		hinic_task_set_tunnel_l4(task, tunnel_type, l4_tunnel_len);
> +
> +		ip.hdr = skb_inner_network_header(skb);
> +		l4.hdr = skb_inner_transport_header(skb);
> +	} else {
> +		ip.hdr = skb_network_header(skb);
> +		l4.hdr = skb_transport_header(skb);
> +		network_hdr_len = skb_network_header_len(skb);
> +	}
> +
> +	/* initialize inner IP header fields */
> +	if (ip.v4->version == 4)
> +		ip.v4->tot_len = 0;
> +	else
> +		ip.v6->payload_len = 0;
> +
> +	get_inner_l3_l4_type(skb, &ip, &l4, TX_OFFLOAD_TSO, &l3_offload,
> +			     &l4_proto);
> +
> +	hinic_task_set_inner_l3(task, l3_offload, network_hdr_len);
> +
> +	ip_identify = 0;
> +	if (l4_proto == IPPROTO_TCP)
> +		l4.tcp->check = ~csum_magic(&ip, IPPROTO_TCP);
> +
> +	get_inner_l4_info(skb, &l4, TX_OFFLOAD_TSO, l4_proto, &l4_offload,
> +			  &l4_len, &offset);
> +
> +	hinic_set_tso_inner_l4(task, queue_info, l4_offload, l4_len, offset,
> +			       ip_identify, skb_shinfo(skb)->gso_size);
> +
> +	return 1;
> +}
> +
> +static int offload_csum(struct hinic_sq_task *task, u32 *queue_info,
> +			struct sk_buff *skb)
> +{
> +	enum hinic_l4_offload_type l4_offload;
> +	u32 offset, l4_len, network_hdr_len;
> +	enum hinic_l3_offload_type l3_type;
> +	union hinic_l3 ip;
> +	union hinic_l4 l4;
> +	u8 l4_proto;
> +
> +	if (skb->ip_summed != CHECKSUM_PARTIAL)
> +		return 0;
> +
> +	if (skb->encapsulation) {
> +		u32 l4_tunnel_len;
> +
> +		ip.hdr = skb_network_header(skb);
> +
> +		if (ip.v4->version == 4)
> +			l3_type = IPV4_PKT_NO_CHKSUM_OFFLOAD;
> +		else if (ip.v4->version == 6)
> +			l3_type = IPV6_PKT;
> +		else
> +			l3_type = L3TYPE_UNKNOWN;
> +
> +		hinic_task_set_outter_l3(task, l3_type,
> +					 skb_network_header_len(skb));
> +
> +		l4_tunnel_len = skb_inner_network_offset(skb) -
> +				skb_transport_offset(skb);
> +
> +		hinic_task_set_tunnel_l4(task, TUNNEL_UDP_NO_CSUM,
> +					 l4_tunnel_len);
> +
> +		ip.hdr = skb_inner_network_header(skb);
> +		l4.hdr = skb_inner_transport_header(skb);
> +		network_hdr_len = skb_inner_network_header_len(skb);
> +	} else {
> +		ip.hdr = skb_network_header(skb);
> +		l4.hdr = skb_transport_header(skb);
> +		network_hdr_len = skb_network_header_len(skb);
> +	}
> +
> +	get_inner_l3_l4_type(skb, &ip, &l4, TX_OFFLOAD_CSUM, &l3_type,
> +			     &l4_proto);
> +
> +	hinic_task_set_inner_l3(task, l3_type, network_hdr_len);
> +
> +	get_inner_l4_info(skb, &l4, TX_OFFLOAD_CSUM, l4_proto, &l4_offload,
> +			  &l4_len, &offset);
> +
> +	hinic_set_cs_inner_l4(task, queue_info, l4_offload, l4_len, offset);
> +
> +	return 1;
> +}
> +
> +static int hinic_tx_offload(struct sk_buff *skb, struct hinic_sq_task *task,
> +			    u32 *queue_info)
> +{
> +	enum hinic_offload_type offload = 0;
> +	int enabled;
> +
> +	enabled = offload_tso(task, queue_info, skb);
> +	if (enabled > 0) {
> +		offload |= TX_OFFLOAD_TSO;
> +	} else if (enabled == 0) {
> +		enabled = offload_csum(task, queue_info, skb);
> +		if (enabled)
> +			offload |= TX_OFFLOAD_CSUM;
> +	} else {
> +		return -EPROTONOSUPPORT;
> +	}
> +
> +	if (offload)
> +		hinic_task_set_l2hdr(task, skb_network_offset(skb));
> +
> +	/* payload offset should not more than 221 */
> +	if (HINIC_SQ_CTRL_GET(*queue_info, QUEUE_INFO_PLDOFF) >
> +	    MAX_PAYLOAD_OFFSET) {
> +		return -EPROTONOSUPPORT;
> +	}
> +
> +	/* mss should not less than 80 */
> +	if (HINIC_SQ_CTRL_GET(*queue_info, QUEUE_INFO_MSS) < HINIC_MSS_MIN) {
> +		*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
> +		*queue_info |= HINIC_SQ_CTRL_SET(HINIC_MSS_MIN, QUEUE_INFO_MSS);
> +	}
> +
> +	return 0;
> +}
> +
>  netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
>  {
>  	struct hinic_dev *nic_dev = netdev_priv(netdev);
> +	u16 prod_idx, q_id = skb->queue_mapping;
>  	struct netdev_queue *netdev_txq;
>  	int nr_sges, err = NETDEV_TX_OK;
>  	struct hinic_sq_wqe *sq_wqe;
>  	unsigned int wqe_size;
>  	struct hinic_txq *txq;
>  	struct hinic_qp *qp;
> -	u16 prod_idx;
>  
> -	txq = &nic_dev->txqs[skb->queue_mapping];
> +	txq = &nic_dev->txqs[q_id];
>  	qp = container_of(txq->sq, struct hinic_qp, sq);
>  
>  	if (skb->len < MIN_SKB_LEN) {
> @@ -236,15 +510,23 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
>  process_sq_wqe:
>  	hinic_sq_prepare_wqe(txq->sq, prod_idx, sq_wqe, txq->sges, nr_sges);
>  
> +	err = hinic_tx_offload(skb, &sq_wqe->task, &sq_wqe->ctrl.queue_info);
> +	if (err)
> +		goto offload_error;
> +
>  	hinic_sq_write_wqe(txq->sq, prod_idx, sq_wqe, skb, wqe_size);
>  
>  flush_skbs:
> -	netdev_txq = netdev_get_tx_queue(netdev, skb->queue_mapping);
> +	netdev_txq = netdev_get_tx_queue(netdev, q_id);
>  	if ((!skb->xmit_more) || (netif_xmit_stopped(netdev_txq)))
>  		hinic_sq_write_db(txq->sq, prod_idx, wqe_size, 0);
>  
>  	return err;
>  
> +offload_error:
> +	hinic_sq_return_wqe(txq->sq, wqe_size);
> +	tx_unmap_skb(nic_dev, skb, txq->sges);
> +
>  skb_error:
>  	dev_kfree_skb_any(skb);
>  
> @@ -252,7 +534,8 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
>  	u64_stats_update_begin(&txq->txq_stats.syncp);
>  	txq->txq_stats.tx_dropped++;
>  	u64_stats_update_end(&txq->txq_stats.syncp);
> -	return err;
> +
> +	return NETDEV_TX_OK;
>  }
>  
>  /**
Stefan Bader Nov. 7, 2018, 8:21 a.m. UTC | #3
On 30.10.18 15:04, dann frazier wrote:
> From: Zhao Chen <zhaochen6@huawei.com>
> 
> BugLink: https://bugs.launchpad.net/bugs/1800664
> 
> This patch adds checksum offload and TSO support for the HiNIC
> driver. Perfomance test (Iperf) shows more than 100% improvement
> in TCP streams.
> 
> Signed-off-by: Zhao Chen <zhaochen6@huawei.com>
> Signed-off-by: Xue Chaojing <xuechaojing@huawei.com>
> Signed-off-by: David S. Miller <davem@davemloft.net>
> (cherry picked from commit cc18a7543d2f63a2c93fc61cfa7fd8be5464f75e)
> Signed-off-by: dann frazier <dann.frazier@canonical.com>
> ---

While I am not totally opposed to it due to the limitation to only one driver, I
cannot say how widespread the HW is which is supported (and how many models this
driver covers). This together with the fact that the requested changes come
directly from a very recent upstream source, I would rather wait at least one
additional cycle before we pull this all the way back into Bionic.
So possibly apply to Cosmic now and we can see how that goes. Then follow up
with Bionic later?

-Stefan

>  .../net/ethernet/huawei/hinic/hinic_hw_dev.h  |   2 +
>  .../net/ethernet/huawei/hinic/hinic_hw_qp.c   | 121 +++++--
>  .../net/ethernet/huawei/hinic/hinic_hw_qp.h   |  27 ++
>  .../net/ethernet/huawei/hinic/hinic_hw_wq.c   |  14 +
>  .../net/ethernet/huawei/hinic/hinic_hw_wq.h   |   2 +
>  .../net/ethernet/huawei/hinic/hinic_hw_wqe.h  |  97 ++++--
>  .../net/ethernet/huawei/hinic/hinic_main.c    |  23 +-
>  .../net/ethernet/huawei/hinic/hinic_port.c    |  32 ++
>  .../net/ethernet/huawei/hinic/hinic_port.h    |  18 ++
>  drivers/net/ethernet/huawei/hinic/hinic_tx.c  | 295 +++++++++++++++++-
>  10 files changed, 571 insertions(+), 60 deletions(-)
> 
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
> index 0f5563f3b7798..097b5502603fc 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
> @@ -58,6 +58,8 @@ enum hinic_port_cmd {
>  
>  	HINIC_PORT_CMD_GET_GLOBAL_QPN   = 102,
>  
> +	HINIC_PORT_CMD_SET_TSO          = 112,
> +
>  	HINIC_PORT_CMD_GET_CAP          = 170,
>  };
>  
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
> index cb239627770f4..967c993d5303a 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
> @@ -70,8 +70,6 @@
>  #define SQ_MASKED_IDX(sq, idx)  ((idx) & (sq)->wq->mask)
>  #define RQ_MASKED_IDX(rq, idx)  ((idx) & (rq)->wq->mask)
>  
> -#define TX_MAX_MSS_DEFAULT      0x3E00
> -
>  enum sq_wqe_type {
>  	SQ_NORMAL_WQE = 0,
>  };
> @@ -494,33 +492,16 @@ static void sq_prepare_ctrl(struct hinic_sq_ctrl *ctrl, u16 prod_idx,
>  			  HINIC_SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT)     |
>  			  HINIC_SQ_CTRL_SET(ctrl_size, LEN);
>  
> -	ctrl->queue_info = HINIC_SQ_CTRL_SET(TX_MAX_MSS_DEFAULT,
> -					     QUEUE_INFO_MSS);
> +	ctrl->queue_info = HINIC_SQ_CTRL_SET(HINIC_MSS_DEFAULT,
> +					     QUEUE_INFO_MSS) |
> +			   HINIC_SQ_CTRL_SET(1, QUEUE_INFO_UC);
>  }
>  
>  static void sq_prepare_task(struct hinic_sq_task *task)
>  {
> -	task->pkt_info0 =
> -		HINIC_SQ_TASK_INFO0_SET(0, L2HDR_LEN) |
> -		HINIC_SQ_TASK_INFO0_SET(HINIC_L4_OFF_DISABLE, L4_OFFLOAD) |
> -		HINIC_SQ_TASK_INFO0_SET(HINIC_OUTER_L3TYPE_UNKNOWN,
> -					INNER_L3TYPE) |
> -		HINIC_SQ_TASK_INFO0_SET(HINIC_VLAN_OFF_DISABLE,
> -					VLAN_OFFLOAD) |
> -		HINIC_SQ_TASK_INFO0_SET(HINIC_PKT_NOT_PARSED, PARSE_FLAG);
> -
> -	task->pkt_info1 =
> -		HINIC_SQ_TASK_INFO1_SET(HINIC_MEDIA_UNKNOWN, MEDIA_TYPE) |
> -		HINIC_SQ_TASK_INFO1_SET(0, INNER_L4_LEN) |
> -		HINIC_SQ_TASK_INFO1_SET(0, INNER_L3_LEN);
> -
> -	task->pkt_info2 =
> -		HINIC_SQ_TASK_INFO2_SET(0, TUNNEL_L4_LEN) |
> -		HINIC_SQ_TASK_INFO2_SET(0, OUTER_L3_LEN)  |
> -		HINIC_SQ_TASK_INFO2_SET(HINIC_TUNNEL_L4TYPE_UNKNOWN,
> -					TUNNEL_L4TYPE)    |
> -		HINIC_SQ_TASK_INFO2_SET(HINIC_OUTER_L3TYPE_UNKNOWN,
> -					OUTER_L3TYPE);
> +	task->pkt_info0 = 0;
> +	task->pkt_info1 = 0;
> +	task->pkt_info2 = 0;
>  
>  	task->ufo_v6_identify = 0;
>  
> @@ -529,6 +510,86 @@ static void sq_prepare_task(struct hinic_sq_task *task)
>  	task->zero_pad = 0;
>  }
>  
> +void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len)
> +{
> +	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(len, L2HDR_LEN);
> +}
> +
> +void hinic_task_set_outter_l3(struct hinic_sq_task *task,
> +			      enum hinic_l3_offload_type l3_type,
> +			      u32 network_len)
> +{
> +	task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l3_type, OUTER_L3TYPE) |
> +			   HINIC_SQ_TASK_INFO2_SET(network_len, OUTER_L3LEN);
> +}
> +
> +void hinic_task_set_inner_l3(struct hinic_sq_task *task,
> +			     enum hinic_l3_offload_type l3_type,
> +			     u32 network_len)
> +{
> +	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l3_type, INNER_L3TYPE);
> +	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(network_len, INNER_L3LEN);
> +}
> +
> +void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
> +			      enum hinic_l4_offload_type l4_type,
> +			      u32 tunnel_len)
> +{
> +	task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l4_type, TUNNEL_L4TYPE) |
> +			   HINIC_SQ_TASK_INFO2_SET(tunnel_len, TUNNEL_L4LEN);
> +}
> +
> +void hinic_set_cs_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
> +			   enum hinic_l4_offload_type l4_offload,
> +			   u32 l4_len, u32 offset)
> +{
> +	u32 tcp_udp_cs = 0, sctp = 0;
> +	u32 mss = HINIC_MSS_DEFAULT;
> +
> +	if (l4_offload == TCP_OFFLOAD_ENABLE ||
> +	    l4_offload == UDP_OFFLOAD_ENABLE)
> +		tcp_udp_cs = 1;
> +	else if (l4_offload == SCTP_OFFLOAD_ENABLE)
> +		sctp = 1;
> +
> +	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
> +	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
> +
> +	*queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
> +		       HINIC_SQ_CTRL_SET(tcp_udp_cs, QUEUE_INFO_TCPUDP_CS) |
> +		       HINIC_SQ_CTRL_SET(sctp, QUEUE_INFO_SCTP);
> +
> +	*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
> +	*queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
> +}
> +
> +void hinic_set_tso_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
> +			    enum hinic_l4_offload_type l4_offload,
> +			    u32 l4_len, u32 offset, u32 ip_ident, u32 mss)
> +{
> +	u32 tso = 0, ufo = 0;
> +
> +	if (l4_offload == TCP_OFFLOAD_ENABLE)
> +		tso = 1;
> +	else if (l4_offload == UDP_OFFLOAD_ENABLE)
> +		ufo = 1;
> +
> +	task->ufo_v6_identify = ip_ident;
> +
> +	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
> +	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(tso || ufo, TSO_FLAG);
> +	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
> +
> +	*queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
> +		       HINIC_SQ_CTRL_SET(tso, QUEUE_INFO_TSO) |
> +		       HINIC_SQ_CTRL_SET(ufo, QUEUE_INFO_UFO) |
> +		       HINIC_SQ_CTRL_SET(!!l4_offload, QUEUE_INFO_TCPUDP_CS);
> +
> +	/* set MSS value */
> +	*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
> +	*queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
> +}
> +
>  /**
>   * hinic_sq_prepare_wqe - prepare wqe before insert to the queue
>   * @sq: send queue
> @@ -612,6 +673,16 @@ struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
>  	return &hw_wqe->sq_wqe;
>  }
>  
> +/**
> + * hinic_sq_return_wqe - return the wqe to the sq
> + * @sq: send queue
> + * @wqe_size: the size of the wqe
> + **/
> +void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size)
> +{
> +	hinic_return_wqe(sq->wq, wqe_size);
> +}
> +
>  /**
>   * hinic_sq_write_wqe - write the wqe to the sq
>   * @sq: send queue
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
> index 6c84f83ec2831..a0dc63a4bfc7a 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
> @@ -149,6 +149,31 @@ int hinic_get_sq_free_wqebbs(struct hinic_sq *sq);
>  
>  int hinic_get_rq_free_wqebbs(struct hinic_rq *rq);
>  
> +void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len);
> +
> +void hinic_task_set_outter_l3(struct hinic_sq_task *task,
> +			      enum hinic_l3_offload_type l3_type,
> +			      u32 network_len);
> +
> +void hinic_task_set_inner_l3(struct hinic_sq_task *task,
> +			     enum hinic_l3_offload_type l3_type,
> +			     u32 network_len);
> +
> +void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
> +			      enum hinic_l4_offload_type l4_type,
> +			      u32 tunnel_len);
> +
> +void hinic_set_cs_inner_l4(struct hinic_sq_task *task,
> +			   u32 *queue_info,
> +			   enum hinic_l4_offload_type l4_offload,
> +			   u32 l4_len, u32 offset);
> +
> +void hinic_set_tso_inner_l4(struct hinic_sq_task *task,
> +			    u32 *queue_info,
> +			    enum hinic_l4_offload_type l4_offload,
> +			    u32 l4_len,
> +			    u32 offset, u32 ip_ident, u32 mss);
> +
>  void hinic_sq_prepare_wqe(struct hinic_sq *sq, u16 prod_idx,
>  			  struct hinic_sq_wqe *wqe, struct hinic_sge *sges,
>  			  int nr_sges);
> @@ -159,6 +184,8 @@ void hinic_sq_write_db(struct hinic_sq *sq, u16 prod_idx, unsigned int wqe_size,
>  struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
>  				      unsigned int wqe_size, u16 *prod_idx);
>  
> +void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size);
> +
>  void hinic_sq_write_wqe(struct hinic_sq *sq, u16 prod_idx,
>  			struct hinic_sq_wqe *wqe, struct sk_buff *skb,
>  			unsigned int wqe_size);
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
> index 3e3181c089bdc..f92f1bf3901a7 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
> @@ -774,6 +774,20 @@ struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size,
>  	return WQ_PAGE_ADDR(wq, *prod_idx) + WQE_PAGE_OFF(wq, *prod_idx);
>  }
>  
> +/**
> + * hinic_return_wqe - return the wqe when transmit failed
> + * @wq: wq to return wqe
> + * @wqe_size: wqe size
> + **/
> +void hinic_return_wqe(struct hinic_wq *wq, unsigned int wqe_size)
> +{
> +	int num_wqebbs = ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
> +
> +	atomic_sub(num_wqebbs, &wq->prod_idx);
> +
> +	atomic_add(num_wqebbs, &wq->delta);
> +}
> +
>  /**
>   * hinic_put_wqe - return the wqe place to use for a new wqe
>   * @wq: wq to return wqe
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
> index 9c030a0f035e2..9b66545ba563c 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
> @@ -104,6 +104,8 @@ void hinic_wq_free(struct hinic_wqs *wqs, struct hinic_wq *wq);
>  struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size,
>  				   u16 *prod_idx);
>  
> +void hinic_return_wqe(struct hinic_wq *wq, unsigned int wqe_size);
> +
>  void hinic_put_wqe(struct hinic_wq *wq, unsigned int wqe_size);
>  
>  struct hinic_hw_wqe *hinic_read_wqe(struct hinic_wq *wq, unsigned int wqe_size,
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
> index bc73485483c59..9754d6ed5f4ac 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
> @@ -62,19 +62,33 @@
>  			(((val) >> HINIC_CMDQ_WQE_HEADER_##member##_SHIFT) \
>  			 & HINIC_CMDQ_WQE_HEADER_##member##_MASK)
>  
> -#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_SHIFT    0
> -#define HINIC_SQ_CTRL_TASKSECT_LEN_SHIFT        16
> -#define HINIC_SQ_CTRL_DATA_FORMAT_SHIFT         22
> -#define HINIC_SQ_CTRL_LEN_SHIFT                 29
> -
> -#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_MASK     0xFF
> -#define HINIC_SQ_CTRL_TASKSECT_LEN_MASK         0x1F
> -#define HINIC_SQ_CTRL_DATA_FORMAT_MASK          0x1
> -#define HINIC_SQ_CTRL_LEN_MASK                  0x3
> -
> -#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_SHIFT      13
> -
> -#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_MASK       0x3FFF
> +#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_SHIFT           0
> +#define HINIC_SQ_CTRL_TASKSECT_LEN_SHIFT               16
> +#define HINIC_SQ_CTRL_DATA_FORMAT_SHIFT                22
> +#define HINIC_SQ_CTRL_LEN_SHIFT                        29
> +
> +#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_MASK            0xFF
> +#define HINIC_SQ_CTRL_TASKSECT_LEN_MASK                0x1F
> +#define HINIC_SQ_CTRL_DATA_FORMAT_MASK                 0x1
> +#define HINIC_SQ_CTRL_LEN_MASK                         0x3
> +
> +#define HINIC_SQ_CTRL_QUEUE_INFO_PLDOFF_SHIFT          2
> +#define HINIC_SQ_CTRL_QUEUE_INFO_UFO_SHIFT             10
> +#define HINIC_SQ_CTRL_QUEUE_INFO_TSO_SHIFT             11
> +#define HINIC_SQ_CTRL_QUEUE_INFO_TCPUDP_CS_SHIFT       12
> +#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_SHIFT             13
> +#define HINIC_SQ_CTRL_QUEUE_INFO_SCTP_SHIFT            27
> +#define HINIC_SQ_CTRL_QUEUE_INFO_UC_SHIFT              28
> +#define HINIC_SQ_CTRL_QUEUE_INFO_PRI_SHIFT             29
> +
> +#define HINIC_SQ_CTRL_QUEUE_INFO_PLDOFF_MASK           0xFF
> +#define HINIC_SQ_CTRL_QUEUE_INFO_UFO_MASK              0x1
> +#define HINIC_SQ_CTRL_QUEUE_INFO_TSO_MASK              0x1
> +#define HINIC_SQ_CTRL_QUEUE_INFO_TCPUDP_CS_MASK	       0x1
> +#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_MASK              0x3FFF
> +#define HINIC_SQ_CTRL_QUEUE_INFO_SCTP_MASK             0x1
> +#define HINIC_SQ_CTRL_QUEUE_INFO_UC_MASK               0x1
> +#define HINIC_SQ_CTRL_QUEUE_INFO_PRI_MASK              0x7
>  
>  #define HINIC_SQ_CTRL_SET(val, member)          \
>  		(((u32)(val) & HINIC_SQ_CTRL_##member##_MASK) \
> @@ -84,6 +98,10 @@
>  		(((val) >> HINIC_SQ_CTRL_##member##_SHIFT) \
>  		 & HINIC_SQ_CTRL_##member##_MASK)
>  
> +#define HINIC_SQ_CTRL_CLEAR(val, member)	\
> +		((u32)(val) & (~(HINIC_SQ_CTRL_##member##_MASK \
> +		 << HINIC_SQ_CTRL_##member##_SHIFT)))
> +
>  #define HINIC_SQ_TASK_INFO0_L2HDR_LEN_SHIFT     0
>  #define HINIC_SQ_TASK_INFO0_L4_OFFLOAD_SHIFT    8
>  #define HINIC_SQ_TASK_INFO0_INNER_L3TYPE_SHIFT  10
> @@ -108,28 +126,28 @@
>  
>  /* 8 bits reserved */
>  #define HINIC_SQ_TASK_INFO1_MEDIA_TYPE_SHIFT    8
> -#define HINIC_SQ_TASK_INFO1_INNER_L4_LEN_SHIFT  16
> -#define HINIC_SQ_TASK_INFO1_INNER_L3_LEN_SHIFT  24
> +#define HINIC_SQ_TASK_INFO1_INNER_L4LEN_SHIFT   16
> +#define HINIC_SQ_TASK_INFO1_INNER_L3LEN_SHIFT   24
>  
>  /* 8 bits reserved */
>  #define HINIC_SQ_TASK_INFO1_MEDIA_TYPE_MASK     0xFF
> -#define HINIC_SQ_TASK_INFO1_INNER_L4_LEN_MASK   0xFF
> -#define HINIC_SQ_TASK_INFO1_INNER_L3_LEN_MASK   0xFF
> +#define HINIC_SQ_TASK_INFO1_INNER_L4LEN_MASK    0xFF
> +#define HINIC_SQ_TASK_INFO1_INNER_L3LEN_MASK    0xFF
>  
>  #define HINIC_SQ_TASK_INFO1_SET(val, member)    \
>  		(((u32)(val) & HINIC_SQ_TASK_INFO1_##member##_MASK) <<  \
>  		 HINIC_SQ_TASK_INFO1_##member##_SHIFT)
>  
> -#define HINIC_SQ_TASK_INFO2_TUNNEL_L4_LEN_SHIFT 0
> -#define HINIC_SQ_TASK_INFO2_OUTER_L3_LEN_SHIFT  12
> -#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 19
> +#define HINIC_SQ_TASK_INFO2_TUNNEL_L4LEN_SHIFT  0
> +#define HINIC_SQ_TASK_INFO2_OUTER_L3LEN_SHIFT   8
> +#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 16
>  /* 1 bit reserved */
> -#define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT  22
> +#define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT  24
>  /* 8 bits reserved */
>  
> -#define HINIC_SQ_TASK_INFO2_TUNNEL_L4_LEN_MASK  0xFFF
> -#define HINIC_SQ_TASK_INFO2_OUTER_L3_LEN_MASK   0x7F
> -#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK  0x3
> +#define HINIC_SQ_TASK_INFO2_TUNNEL_L4LEN_MASK   0xFF
> +#define HINIC_SQ_TASK_INFO2_OUTER_L3LEN_MASK    0xFF
> +#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK  0x7
>  /* 1 bit reserved */
>  #define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_MASK   0x3
>  /* 8 bits reserved */
> @@ -187,12 +205,15 @@
>  		 sizeof(struct hinic_sq_task) + \
>  		 (nr_sges) * sizeof(struct hinic_sq_bufdesc))
>  
> -#define HINIC_SCMD_DATA_LEN             16
> +#define HINIC_SCMD_DATA_LEN                     16
> +
> +#define HINIC_MAX_SQ_BUFDESCS                   17
>  
> -#define HINIC_MAX_SQ_BUFDESCS           17
> +#define HINIC_SQ_WQE_MAX_SIZE                   320
> +#define HINIC_RQ_WQE_SIZE                       32
>  
> -#define HINIC_SQ_WQE_MAX_SIZE           320
> -#define HINIC_RQ_WQE_SIZE               32
> +#define HINIC_MSS_DEFAULT		        0x3E00
> +#define HINIC_MSS_MIN		                0x50
>  
>  enum hinic_l4offload_type {
>  	HINIC_L4_OFF_DISABLE            = 0,
> @@ -211,6 +232,26 @@ enum hinic_pkt_parsed {
>  	HINIC_PKT_PARSED     = 1,
>  };
>  
> +enum hinic_l3_offload_type {
> +	L3TYPE_UNKNOWN = 0,
> +	IPV6_PKT = 1,
> +	IPV4_PKT_NO_CHKSUM_OFFLOAD = 2,
> +	IPV4_PKT_WITH_CHKSUM_OFFLOAD = 3,
> +};
> +
> +enum hinic_l4_offload_type {
> +	OFFLOAD_DISABLE     = 0,
> +	TCP_OFFLOAD_ENABLE  = 1,
> +	SCTP_OFFLOAD_ENABLE = 2,
> +	UDP_OFFLOAD_ENABLE  = 3,
> +};
> +
> +enum hinic_l4_tunnel_type {
> +	NOT_TUNNEL,
> +	TUNNEL_UDP_NO_CSUM,
> +	TUNNEL_UDP_CSUM,
> +};
> +
>  enum hinic_outer_l3type {
>  	HINIC_OUTER_L3TYPE_UNKNOWN              = 0,
>  	HINIC_OUTER_L3TYPE_IPV6                 = 1,
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
> index 4a8f82938ed5b..fdf2bdb6b0d06 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
> @@ -805,7 +805,8 @@ static const struct net_device_ops hinic_netdev_ops = {
>  
>  static void netdev_features_init(struct net_device *netdev)
>  {
> -	netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA;
> +	netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM |
> +			      NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6;
>  
>  	netdev->vlan_features = netdev->hw_features;
>  
> @@ -863,6 +864,20 @@ static void link_status_event_handler(void *handle, void *buf_in, u16 in_size,
>  	*out_size = sizeof(*ret_link_status);
>  }
>  
> +static int set_features(struct hinic_dev *nic_dev,
> +			netdev_features_t pre_features,
> +			netdev_features_t features, bool force_change)
> +{
> +	netdev_features_t changed = force_change ? ~0 : pre_features ^ features;
> +	int err = 0;
> +
> +	if (changed & NETIF_F_TSO)
> +		err = hinic_port_set_tso(nic_dev, (features & NETIF_F_TSO) ?
> +					 HINIC_TSO_ENABLE : HINIC_TSO_DISABLE);
> +
> +	return err;
> +}
> +
>  /**
>   * nic_dev_init - Initialize the NIC device
>   * @pdev: the NIC pci device
> @@ -963,7 +978,12 @@ static int nic_dev_init(struct pci_dev *pdev)
>  	hinic_hwdev_cb_register(nic_dev->hwdev, HINIC_MGMT_MSG_CMD_LINK_STATUS,
>  				nic_dev, link_status_event_handler);
>  
> +	err = set_features(nic_dev, 0, nic_dev->netdev->features, true);
> +	if (err)
> +		goto err_set_features;
> +
>  	SET_NETDEV_DEV(netdev, &pdev->dev);
> +
>  	err = register_netdev(netdev);
>  	if (err) {
>  		dev_err(&pdev->dev, "Failed to register netdev\n");
> @@ -973,6 +993,7 @@ static int nic_dev_init(struct pci_dev *pdev)
>  	return 0;
>  
>  err_reg_netdev:
> +err_set_features:
>  	hinic_hwdev_cb_unregister(nic_dev->hwdev,
>  				  HINIC_MGMT_MSG_CMD_LINK_STATUS);
>  	cancel_work_sync(&rx_mode_work->work);
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c
> index 4d4e3f05fb5fb..7575a7d3bd9f6 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_port.c
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c
> @@ -377,3 +377,35 @@ int hinic_port_get_cap(struct hinic_dev *nic_dev,
>  
>  	return 0;
>  }
> +
> +/**
> + * hinic_port_set_tso - set port tso configuration
> + * @nic_dev: nic device
> + * @state: the tso state to set
> + *
> + * Return 0 - Success, negative - Failure
> + **/
> +int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state)
> +{
> +	struct hinic_hwdev *hwdev = nic_dev->hwdev;
> +	struct hinic_hwif *hwif = hwdev->hwif;
> +	struct hinic_tso_config tso_cfg = {0};
> +	struct pci_dev *pdev = hwif->pdev;
> +	u16 out_size;
> +	int err;
> +
> +	tso_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
> +	tso_cfg.tso_en = state;
> +
> +	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_TSO,
> +				 &tso_cfg, sizeof(tso_cfg),
> +				 &tso_cfg, &out_size);
> +	if (err || out_size != sizeof(tso_cfg) || tso_cfg.status) {
> +		dev_err(&pdev->dev,
> +			"Failed to set port tso, ret = %d\n",
> +			tso_cfg.status);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.h b/drivers/net/ethernet/huawei/hinic/hinic_port.h
> index 9404365195ddf..f6e3220fe28fc 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_port.h
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_port.h
> @@ -72,6 +72,11 @@ enum hinic_speed {
>  	HINIC_SPEED_UNKNOWN = 0xFF,
>  };
>  
> +enum hinic_tso_state {
> +	HINIC_TSO_DISABLE = 0,
> +	HINIC_TSO_ENABLE  = 1,
> +};
> +
>  struct hinic_port_mac_cmd {
>  	u8              status;
>  	u8              version;
> @@ -167,6 +172,17 @@ struct hinic_port_cap {
>  	u8      rsvd2[3];
>  };
>  
> +struct hinic_tso_config {
> +	u8	status;
> +	u8	version;
> +	u8	rsvd0[6];
> +
> +	u16	func_id;
> +	u16	rsvd1;
> +	u8	tso_en;
> +	u8	resv2[3];
> +};
> +
>  int hinic_port_add_mac(struct hinic_dev *nic_dev, const u8 *addr,
>  		       u16 vlan_id);
>  
> @@ -195,4 +211,6 @@ int hinic_port_set_func_state(struct hinic_dev *nic_dev,
>  int hinic_port_get_cap(struct hinic_dev *nic_dev,
>  		       struct hinic_port_cap *port_cap);
>  
> +int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state);
> +
>  #endif
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
> index c5fca0356c9c9..11e73e67358d1 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
> @@ -26,6 +26,13 @@
>  #include <linux/skbuff.h>
>  #include <linux/smp.h>
>  #include <asm/byteorder.h>
> +#include <linux/ip.h>
> +#include <linux/tcp.h>
> +#include <linux/sctp.h>
> +#include <linux/ipv6.h>
> +#include <net/ipv6.h>
> +#include <net/checksum.h>
> +#include <net/ip6_checksum.h>
>  
>  #include "hinic_common.h"
>  #include "hinic_hw_if.h"
> @@ -45,9 +52,31 @@
>  #define CI_UPDATE_NO_PENDING            0
>  #define CI_UPDATE_NO_COALESC            0
>  
> -#define HW_CONS_IDX(sq)         be16_to_cpu(*(u16 *)((sq)->hw_ci_addr))
> +#define HW_CONS_IDX(sq)                 be16_to_cpu(*(u16 *)((sq)->hw_ci_addr))
>  
> -#define MIN_SKB_LEN             64
> +#define MIN_SKB_LEN                     17
> +
> +#define	MAX_PAYLOAD_OFFSET	        221
> +#define TRANSPORT_OFFSET(l4_hdr, skb)	((u32)((l4_hdr) - (skb)->data))
> +
> +union hinic_l3 {
> +	struct iphdr *v4;
> +	struct ipv6hdr *v6;
> +	unsigned char *hdr;
> +};
> +
> +union hinic_l4 {
> +	struct tcphdr *tcp;
> +	struct udphdr *udp;
> +	unsigned char *hdr;
> +};
> +
> +enum hinic_offload_type {
> +	TX_OFFLOAD_TSO     = BIT(0),
> +	TX_OFFLOAD_CSUM    = BIT(1),
> +	TX_OFFLOAD_VLAN    = BIT(2),
> +	TX_OFFLOAD_INVALID = BIT(3),
> +};
>  
>  /**
>   * hinic_txq_clean_stats - Clean the statistics of specific queue
> @@ -175,18 +204,263 @@ static void tx_unmap_skb(struct hinic_dev *nic_dev, struct sk_buff *skb,
>  			 DMA_TO_DEVICE);
>  }
>  
> +static void get_inner_l3_l4_type(struct sk_buff *skb, union hinic_l3 *ip,
> +				 union hinic_l4 *l4,
> +				 enum hinic_offload_type offload_type,
> +				 enum hinic_l3_offload_type *l3_type,
> +				 u8 *l4_proto)
> +{
> +	u8 *exthdr;
> +
> +	if (ip->v4->version == 4) {
> +		*l3_type = (offload_type == TX_OFFLOAD_CSUM) ?
> +			   IPV4_PKT_NO_CHKSUM_OFFLOAD :
> +			   IPV4_PKT_WITH_CHKSUM_OFFLOAD;
> +		*l4_proto = ip->v4->protocol;
> +	} else if (ip->v4->version == 6) {
> +		*l3_type = IPV6_PKT;
> +		exthdr = ip->hdr + sizeof(*ip->v6);
> +		*l4_proto = ip->v6->nexthdr;
> +		if (exthdr != l4->hdr) {
> +			int start = exthdr - skb->data;
> +			__be16 frag_off;
> +
> +			ipv6_skip_exthdr(skb, start, l4_proto, &frag_off);
> +		}
> +	} else {
> +		*l3_type = L3TYPE_UNKNOWN;
> +		*l4_proto = 0;
> +	}
> +}
> +
> +static void get_inner_l4_info(struct sk_buff *skb, union hinic_l4 *l4,
> +			      enum hinic_offload_type offload_type, u8 l4_proto,
> +			      enum hinic_l4_offload_type *l4_offload,
> +			      u32 *l4_len, u32 *offset)
> +{
> +	*l4_offload = OFFLOAD_DISABLE;
> +	*offset = 0;
> +	*l4_len = 0;
> +
> +	switch (l4_proto) {
> +	case IPPROTO_TCP:
> +		*l4_offload = TCP_OFFLOAD_ENABLE;
> +		/* doff in unit of 4B */
> +		*l4_len = l4->tcp->doff * 4;
> +		*offset = *l4_len + TRANSPORT_OFFSET(l4->hdr, skb);
> +		break;
> +
> +	case IPPROTO_UDP:
> +		*l4_offload = UDP_OFFLOAD_ENABLE;
> +		*l4_len = sizeof(struct udphdr);
> +		*offset = TRANSPORT_OFFSET(l4->hdr, skb);
> +		break;
> +
> +	case IPPROTO_SCTP:
> +		/* only csum offload support sctp */
> +		if (offload_type != TX_OFFLOAD_CSUM)
> +			break;
> +
> +		*l4_offload = SCTP_OFFLOAD_ENABLE;
> +		*l4_len = sizeof(struct sctphdr);
> +		*offset = TRANSPORT_OFFSET(l4->hdr, skb);
> +		break;
> +
> +	default:
> +		break;
> +	}
> +}
> +
> +static __sum16 csum_magic(union hinic_l3 *ip, unsigned short proto)
> +{
> +	return (ip->v4->version == 4) ?
> +		csum_tcpudp_magic(ip->v4->saddr, ip->v4->daddr, 0, proto, 0) :
> +		csum_ipv6_magic(&ip->v6->saddr, &ip->v6->daddr, 0, proto, 0);
> +}
> +
> +static int offload_tso(struct hinic_sq_task *task, u32 *queue_info,
> +		       struct sk_buff *skb)
> +{
> +	u32 offset, l4_len, ip_identify, network_hdr_len;
> +	enum hinic_l3_offload_type l3_offload;
> +	enum hinic_l4_offload_type l4_offload;
> +	union hinic_l3 ip;
> +	union hinic_l4 l4;
> +	u8 l4_proto;
> +
> +	if (!skb_is_gso(skb))
> +		return 0;
> +
> +	if (skb_cow_head(skb, 0) < 0)
> +		return -EPROTONOSUPPORT;
> +
> +	if (skb->encapsulation) {
> +		u32 gso_type = skb_shinfo(skb)->gso_type;
> +		u32 tunnel_type = 0;
> +		u32 l4_tunnel_len;
> +
> +		ip.hdr = skb_network_header(skb);
> +		l4.hdr = skb_transport_header(skb);
> +		network_hdr_len = skb_inner_network_header_len(skb);
> +
> +		if (ip.v4->version == 4) {
> +			ip.v4->tot_len = 0;
> +			l3_offload = IPV4_PKT_WITH_CHKSUM_OFFLOAD;
> +		} else if (ip.v4->version == 6) {
> +			l3_offload = IPV6_PKT;
> +		} else {
> +			l3_offload = 0;
> +		}
> +
> +		hinic_task_set_outter_l3(task, l3_offload,
> +					 skb_network_header_len(skb));
> +
> +		if (gso_type & SKB_GSO_UDP_TUNNEL_CSUM) {
> +			l4.udp->check = ~csum_magic(&ip, IPPROTO_UDP);
> +			tunnel_type = TUNNEL_UDP_CSUM;
> +		} else if (gso_type & SKB_GSO_UDP_TUNNEL) {
> +			tunnel_type = TUNNEL_UDP_NO_CSUM;
> +		}
> +
> +		l4_tunnel_len = skb_inner_network_offset(skb) -
> +				skb_transport_offset(skb);
> +		hinic_task_set_tunnel_l4(task, tunnel_type, l4_tunnel_len);
> +
> +		ip.hdr = skb_inner_network_header(skb);
> +		l4.hdr = skb_inner_transport_header(skb);
> +	} else {
> +		ip.hdr = skb_network_header(skb);
> +		l4.hdr = skb_transport_header(skb);
> +		network_hdr_len = skb_network_header_len(skb);
> +	}
> +
> +	/* initialize inner IP header fields */
> +	if (ip.v4->version == 4)
> +		ip.v4->tot_len = 0;
> +	else
> +		ip.v6->payload_len = 0;
> +
> +	get_inner_l3_l4_type(skb, &ip, &l4, TX_OFFLOAD_TSO, &l3_offload,
> +			     &l4_proto);
> +
> +	hinic_task_set_inner_l3(task, l3_offload, network_hdr_len);
> +
> +	ip_identify = 0;
> +	if (l4_proto == IPPROTO_TCP)
> +		l4.tcp->check = ~csum_magic(&ip, IPPROTO_TCP);
> +
> +	get_inner_l4_info(skb, &l4, TX_OFFLOAD_TSO, l4_proto, &l4_offload,
> +			  &l4_len, &offset);
> +
> +	hinic_set_tso_inner_l4(task, queue_info, l4_offload, l4_len, offset,
> +			       ip_identify, skb_shinfo(skb)->gso_size);
> +
> +	return 1;
> +}
> +
> +static int offload_csum(struct hinic_sq_task *task, u32 *queue_info,
> +			struct sk_buff *skb)
> +{
> +	enum hinic_l4_offload_type l4_offload;
> +	u32 offset, l4_len, network_hdr_len;
> +	enum hinic_l3_offload_type l3_type;
> +	union hinic_l3 ip;
> +	union hinic_l4 l4;
> +	u8 l4_proto;
> +
> +	if (skb->ip_summed != CHECKSUM_PARTIAL)
> +		return 0;
> +
> +	if (skb->encapsulation) {
> +		u32 l4_tunnel_len;
> +
> +		ip.hdr = skb_network_header(skb);
> +
> +		if (ip.v4->version == 4)
> +			l3_type = IPV4_PKT_NO_CHKSUM_OFFLOAD;
> +		else if (ip.v4->version == 6)
> +			l3_type = IPV6_PKT;
> +		else
> +			l3_type = L3TYPE_UNKNOWN;
> +
> +		hinic_task_set_outter_l3(task, l3_type,
> +					 skb_network_header_len(skb));
> +
> +		l4_tunnel_len = skb_inner_network_offset(skb) -
> +				skb_transport_offset(skb);
> +
> +		hinic_task_set_tunnel_l4(task, TUNNEL_UDP_NO_CSUM,
> +					 l4_tunnel_len);
> +
> +		ip.hdr = skb_inner_network_header(skb);
> +		l4.hdr = skb_inner_transport_header(skb);
> +		network_hdr_len = skb_inner_network_header_len(skb);
> +	} else {
> +		ip.hdr = skb_network_header(skb);
> +		l4.hdr = skb_transport_header(skb);
> +		network_hdr_len = skb_network_header_len(skb);
> +	}
> +
> +	get_inner_l3_l4_type(skb, &ip, &l4, TX_OFFLOAD_CSUM, &l3_type,
> +			     &l4_proto);
> +
> +	hinic_task_set_inner_l3(task, l3_type, network_hdr_len);
> +
> +	get_inner_l4_info(skb, &l4, TX_OFFLOAD_CSUM, l4_proto, &l4_offload,
> +			  &l4_len, &offset);
> +
> +	hinic_set_cs_inner_l4(task, queue_info, l4_offload, l4_len, offset);
> +
> +	return 1;
> +}
> +
> +static int hinic_tx_offload(struct sk_buff *skb, struct hinic_sq_task *task,
> +			    u32 *queue_info)
> +{
> +	enum hinic_offload_type offload = 0;
> +	int enabled;
> +
> +	enabled = offload_tso(task, queue_info, skb);
> +	if (enabled > 0) {
> +		offload |= TX_OFFLOAD_TSO;
> +	} else if (enabled == 0) {
> +		enabled = offload_csum(task, queue_info, skb);
> +		if (enabled)
> +			offload |= TX_OFFLOAD_CSUM;
> +	} else {
> +		return -EPROTONOSUPPORT;
> +	}
> +
> +	if (offload)
> +		hinic_task_set_l2hdr(task, skb_network_offset(skb));
> +
> +	/* payload offset should not more than 221 */
> +	if (HINIC_SQ_CTRL_GET(*queue_info, QUEUE_INFO_PLDOFF) >
> +	    MAX_PAYLOAD_OFFSET) {
> +		return -EPROTONOSUPPORT;
> +	}
> +
> +	/* mss should not less than 80 */
> +	if (HINIC_SQ_CTRL_GET(*queue_info, QUEUE_INFO_MSS) < HINIC_MSS_MIN) {
> +		*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
> +		*queue_info |= HINIC_SQ_CTRL_SET(HINIC_MSS_MIN, QUEUE_INFO_MSS);
> +	}
> +
> +	return 0;
> +}
> +
>  netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
>  {
>  	struct hinic_dev *nic_dev = netdev_priv(netdev);
> +	u16 prod_idx, q_id = skb->queue_mapping;
>  	struct netdev_queue *netdev_txq;
>  	int nr_sges, err = NETDEV_TX_OK;
>  	struct hinic_sq_wqe *sq_wqe;
>  	unsigned int wqe_size;
>  	struct hinic_txq *txq;
>  	struct hinic_qp *qp;
> -	u16 prod_idx;
>  
> -	txq = &nic_dev->txqs[skb->queue_mapping];
> +	txq = &nic_dev->txqs[q_id];
>  	qp = container_of(txq->sq, struct hinic_qp, sq);
>  
>  	if (skb->len < MIN_SKB_LEN) {
> @@ -236,15 +510,23 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
>  process_sq_wqe:
>  	hinic_sq_prepare_wqe(txq->sq, prod_idx, sq_wqe, txq->sges, nr_sges);
>  
> +	err = hinic_tx_offload(skb, &sq_wqe->task, &sq_wqe->ctrl.queue_info);
> +	if (err)
> +		goto offload_error;
> +
>  	hinic_sq_write_wqe(txq->sq, prod_idx, sq_wqe, skb, wqe_size);
>  
>  flush_skbs:
> -	netdev_txq = netdev_get_tx_queue(netdev, skb->queue_mapping);
> +	netdev_txq = netdev_get_tx_queue(netdev, q_id);
>  	if ((!skb->xmit_more) || (netif_xmit_stopped(netdev_txq)))
>  		hinic_sq_write_db(txq->sq, prod_idx, wqe_size, 0);
>  
>  	return err;
>  
> +offload_error:
> +	hinic_sq_return_wqe(txq->sq, wqe_size);
> +	tx_unmap_skb(nic_dev, skb, txq->sges);
> +
>  skb_error:
>  	dev_kfree_skb_any(skb);
>  
> @@ -252,7 +534,8 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
>  	u64_stats_update_begin(&txq->txq_stats.syncp);
>  	txq->txq_stats.tx_dropped++;
>  	u64_stats_update_end(&txq->txq_stats.syncp);
> -	return err;
> +
> +	return NETDEV_TX_OK;
>  }
>  
>  /**
>
dann frazier Nov. 7, 2018, 4:53 p.m. UTC | #4
On Wed, Nov 7, 2018 at 1:21 AM Stefan Bader <stefan.bader@canonical.com> wrote:
>
> On 30.10.18 15:04, dann frazier wrote:
> > From: Zhao Chen <zhaochen6@huawei.com>
> >
> > BugLink: https://bugs.launchpad.net/bugs/1800664
> >
> > This patch adds checksum offload and TSO support for the HiNIC
> > driver. Perfomance test (Iperf) shows more than 100% improvement
> > in TCP streams.
> >
> > Signed-off-by: Zhao Chen <zhaochen6@huawei.com>
> > Signed-off-by: Xue Chaojing <xuechaojing@huawei.com>
> > Signed-off-by: David S. Miller <davem@davemloft.net>
> > (cherry picked from commit cc18a7543d2f63a2c93fc61cfa7fd8be5464f75e)
> > Signed-off-by: dann frazier <dann.frazier@canonical.com>
> > ---
>
> While I am not totally opposed to it due to the limitation to only one driver, I
> cannot say how widespread the HW is which is supported (and how many models this
> driver covers). This together with the fact that the requested changes come
> directly from a very recent upstream source, I would rather wait at least one
> additional cycle before we pull this all the way back into Bionic.
> So possibly apply to Cosmic now and we can see how that goes. Then follow up
> with Bionic later?

That seems fair to me, thanks Stefan. Should I resubmit next cycle, or
is that unnecessary?

  -dann
Stefan Bader Nov. 7, 2018, 5:34 p.m. UTC | #5
On 07.11.18 17:53, dann frazier wrote:
> On Wed, Nov 7, 2018 at 1:21 AM Stefan Bader <stefan.bader@canonical.com> wrote:
>>
>> On 30.10.18 15:04, dann frazier wrote:
>>> From: Zhao Chen <zhaochen6@huawei.com>
>>>
>>> BugLink: https://bugs.launchpad.net/bugs/1800664
>>>
>>> This patch adds checksum offload and TSO support for the HiNIC
>>> driver. Perfomance test (Iperf) shows more than 100% improvement
>>> in TCP streams.
>>>
>>> Signed-off-by: Zhao Chen <zhaochen6@huawei.com>
>>> Signed-off-by: Xue Chaojing <xuechaojing@huawei.com>
>>> Signed-off-by: David S. Miller <davem@davemloft.net>
>>> (cherry picked from commit cc18a7543d2f63a2c93fc61cfa7fd8be5464f75e)
>>> Signed-off-by: dann frazier <dann.frazier@canonical.com>
>>> ---
>>
>> While I am not totally opposed to it due to the limitation to only one driver, I
>> cannot say how widespread the HW is which is supported (and how many models this
>> driver covers). This together with the fact that the requested changes come
>> directly from a very recent upstream source, I would rather wait at least one
>> additional cycle before we pull this all the way back into Bionic.
>> So possibly apply to Cosmic now and we can see how that goes. Then follow up
>> with Bionic later?
> 
> That seems fair to me, thanks Stefan. Should I resubmit next cycle, or
> is that unnecessary?
> 
>   -dann
> 
We will keep it on the list, so there should be no need to re-submit. But if you
keep an eye on this and ping us if there is no update the week before starting
the next cycle, that might be helpful.

-Stefan
Stefan Bader Nov. 8, 2018, 12:20 p.m. UTC | #6
On 30.10.18 15:04, dann frazier wrote:
> From: Zhao Chen <zhaochen6@huawei.com>
> 
> BugLink: https://bugs.launchpad.net/bugs/1800664
> 
> This patch adds checksum offload and TSO support for the HiNIC
> driver. Perfomance test (Iperf) shows more than 100% improvement
> in TCP streams.
> 
> Signed-off-by: Zhao Chen <zhaochen6@huawei.com>
> Signed-off-by: Xue Chaojing <xuechaojing@huawei.com>
> Signed-off-by: David S. Miller <davem@davemloft.net>
> (cherry picked from commit cc18a7543d2f63a2c93fc61cfa7fd8be5464f75e)
> Signed-off-by: dann frazier <dann.frazier@canonical.com>
> ---
>  .../net/ethernet/huawei/hinic/hinic_hw_dev.h  |   2 +
>  .../net/ethernet/huawei/hinic/hinic_hw_qp.c   | 121 +++++--
>  .../net/ethernet/huawei/hinic/hinic_hw_qp.h   |  27 ++
>  .../net/ethernet/huawei/hinic/hinic_hw_wq.c   |  14 +
>  .../net/ethernet/huawei/hinic/hinic_hw_wq.h   |   2 +
>  .../net/ethernet/huawei/hinic/hinic_hw_wqe.h  |  97 ++++--
>  .../net/ethernet/huawei/hinic/hinic_main.c    |  23 +-
>  .../net/ethernet/huawei/hinic/hinic_port.c    |  32 ++
>  .../net/ethernet/huawei/hinic/hinic_port.h    |  18 ++
>  drivers/net/ethernet/huawei/hinic/hinic_tx.c  | 295 +++++++++++++++++-
>  10 files changed, 571 insertions(+), 60 deletions(-)
> 
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
> index 0f5563f3b7798..097b5502603fc 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
> @@ -58,6 +58,8 @@ enum hinic_port_cmd {
>  
>  	HINIC_PORT_CMD_GET_GLOBAL_QPN   = 102,
>  
> +	HINIC_PORT_CMD_SET_TSO          = 112,
> +
>  	HINIC_PORT_CMD_GET_CAP          = 170,
>  };
>  
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
> index cb239627770f4..967c993d5303a 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
> @@ -70,8 +70,6 @@
>  #define SQ_MASKED_IDX(sq, idx)  ((idx) & (sq)->wq->mask)
>  #define RQ_MASKED_IDX(rq, idx)  ((idx) & (rq)->wq->mask)
>  
> -#define TX_MAX_MSS_DEFAULT      0x3E00
> -
>  enum sq_wqe_type {
>  	SQ_NORMAL_WQE = 0,
>  };
> @@ -494,33 +492,16 @@ static void sq_prepare_ctrl(struct hinic_sq_ctrl *ctrl, u16 prod_idx,
>  			  HINIC_SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT)     |
>  			  HINIC_SQ_CTRL_SET(ctrl_size, LEN);
>  
> -	ctrl->queue_info = HINIC_SQ_CTRL_SET(TX_MAX_MSS_DEFAULT,
> -					     QUEUE_INFO_MSS);
> +	ctrl->queue_info = HINIC_SQ_CTRL_SET(HINIC_MSS_DEFAULT,
> +					     QUEUE_INFO_MSS) |
> +			   HINIC_SQ_CTRL_SET(1, QUEUE_INFO_UC);
>  }
>  
>  static void sq_prepare_task(struct hinic_sq_task *task)
>  {
> -	task->pkt_info0 =
> -		HINIC_SQ_TASK_INFO0_SET(0, L2HDR_LEN) |
> -		HINIC_SQ_TASK_INFO0_SET(HINIC_L4_OFF_DISABLE, L4_OFFLOAD) |
> -		HINIC_SQ_TASK_INFO0_SET(HINIC_OUTER_L3TYPE_UNKNOWN,
> -					INNER_L3TYPE) |
> -		HINIC_SQ_TASK_INFO0_SET(HINIC_VLAN_OFF_DISABLE,
> -					VLAN_OFFLOAD) |
> -		HINIC_SQ_TASK_INFO0_SET(HINIC_PKT_NOT_PARSED, PARSE_FLAG);
> -
> -	task->pkt_info1 =
> -		HINIC_SQ_TASK_INFO1_SET(HINIC_MEDIA_UNKNOWN, MEDIA_TYPE) |
> -		HINIC_SQ_TASK_INFO1_SET(0, INNER_L4_LEN) |
> -		HINIC_SQ_TASK_INFO1_SET(0, INNER_L3_LEN);
> -
> -	task->pkt_info2 =
> -		HINIC_SQ_TASK_INFO2_SET(0, TUNNEL_L4_LEN) |
> -		HINIC_SQ_TASK_INFO2_SET(0, OUTER_L3_LEN)  |
> -		HINIC_SQ_TASK_INFO2_SET(HINIC_TUNNEL_L4TYPE_UNKNOWN,
> -					TUNNEL_L4TYPE)    |
> -		HINIC_SQ_TASK_INFO2_SET(HINIC_OUTER_L3TYPE_UNKNOWN,
> -					OUTER_L3TYPE);
> +	task->pkt_info0 = 0;
> +	task->pkt_info1 = 0;
> +	task->pkt_info2 = 0;
>  
>  	task->ufo_v6_identify = 0;
>  
> @@ -529,6 +510,86 @@ static void sq_prepare_task(struct hinic_sq_task *task)
>  	task->zero_pad = 0;
>  }
>  
> +void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len)
> +{
> +	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(len, L2HDR_LEN);
> +}
> +
> +void hinic_task_set_outter_l3(struct hinic_sq_task *task,
> +			      enum hinic_l3_offload_type l3_type,
> +			      u32 network_len)
> +{
> +	task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l3_type, OUTER_L3TYPE) |
> +			   HINIC_SQ_TASK_INFO2_SET(network_len, OUTER_L3LEN);
> +}
> +
> +void hinic_task_set_inner_l3(struct hinic_sq_task *task,
> +			     enum hinic_l3_offload_type l3_type,
> +			     u32 network_len)
> +{
> +	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l3_type, INNER_L3TYPE);
> +	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(network_len, INNER_L3LEN);
> +}
> +
> +void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
> +			      enum hinic_l4_offload_type l4_type,
> +			      u32 tunnel_len)
> +{
> +	task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l4_type, TUNNEL_L4TYPE) |
> +			   HINIC_SQ_TASK_INFO2_SET(tunnel_len, TUNNEL_L4LEN);
> +}
> +
> +void hinic_set_cs_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
> +			   enum hinic_l4_offload_type l4_offload,
> +			   u32 l4_len, u32 offset)
> +{
> +	u32 tcp_udp_cs = 0, sctp = 0;
> +	u32 mss = HINIC_MSS_DEFAULT;
> +
> +	if (l4_offload == TCP_OFFLOAD_ENABLE ||
> +	    l4_offload == UDP_OFFLOAD_ENABLE)
> +		tcp_udp_cs = 1;
> +	else if (l4_offload == SCTP_OFFLOAD_ENABLE)
> +		sctp = 1;
> +
> +	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
> +	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
> +
> +	*queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
> +		       HINIC_SQ_CTRL_SET(tcp_udp_cs, QUEUE_INFO_TCPUDP_CS) |
> +		       HINIC_SQ_CTRL_SET(sctp, QUEUE_INFO_SCTP);
> +
> +	*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
> +	*queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
> +}
> +
> +void hinic_set_tso_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
> +			    enum hinic_l4_offload_type l4_offload,
> +			    u32 l4_len, u32 offset, u32 ip_ident, u32 mss)
> +{
> +	u32 tso = 0, ufo = 0;
> +
> +	if (l4_offload == TCP_OFFLOAD_ENABLE)
> +		tso = 1;
> +	else if (l4_offload == UDP_OFFLOAD_ENABLE)
> +		ufo = 1;
> +
> +	task->ufo_v6_identify = ip_ident;
> +
> +	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
> +	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(tso || ufo, TSO_FLAG);
> +	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
> +
> +	*queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
> +		       HINIC_SQ_CTRL_SET(tso, QUEUE_INFO_TSO) |
> +		       HINIC_SQ_CTRL_SET(ufo, QUEUE_INFO_UFO) |
> +		       HINIC_SQ_CTRL_SET(!!l4_offload, QUEUE_INFO_TCPUDP_CS);
> +
> +	/* set MSS value */
> +	*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
> +	*queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
> +}
> +
>  /**
>   * hinic_sq_prepare_wqe - prepare wqe before insert to the queue
>   * @sq: send queue
> @@ -612,6 +673,16 @@ struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
>  	return &hw_wqe->sq_wqe;
>  }
>  
> +/**
> + * hinic_sq_return_wqe - return the wqe to the sq
> + * @sq: send queue
> + * @wqe_size: the size of the wqe
> + **/
> +void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size)
> +{
> +	hinic_return_wqe(sq->wq, wqe_size);
> +}
> +
>  /**
>   * hinic_sq_write_wqe - write the wqe to the sq
>   * @sq: send queue
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
> index 6c84f83ec2831..a0dc63a4bfc7a 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
> @@ -149,6 +149,31 @@ int hinic_get_sq_free_wqebbs(struct hinic_sq *sq);
>  
>  int hinic_get_rq_free_wqebbs(struct hinic_rq *rq);
>  
> +void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len);
> +
> +void hinic_task_set_outter_l3(struct hinic_sq_task *task,
> +			      enum hinic_l3_offload_type l3_type,
> +			      u32 network_len);
> +
> +void hinic_task_set_inner_l3(struct hinic_sq_task *task,
> +			     enum hinic_l3_offload_type l3_type,
> +			     u32 network_len);
> +
> +void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
> +			      enum hinic_l4_offload_type l4_type,
> +			      u32 tunnel_len);
> +
> +void hinic_set_cs_inner_l4(struct hinic_sq_task *task,
> +			   u32 *queue_info,
> +			   enum hinic_l4_offload_type l4_offload,
> +			   u32 l4_len, u32 offset);
> +
> +void hinic_set_tso_inner_l4(struct hinic_sq_task *task,
> +			    u32 *queue_info,
> +			    enum hinic_l4_offload_type l4_offload,
> +			    u32 l4_len,
> +			    u32 offset, u32 ip_ident, u32 mss);
> +
>  void hinic_sq_prepare_wqe(struct hinic_sq *sq, u16 prod_idx,
>  			  struct hinic_sq_wqe *wqe, struct hinic_sge *sges,
>  			  int nr_sges);
> @@ -159,6 +184,8 @@ void hinic_sq_write_db(struct hinic_sq *sq, u16 prod_idx, unsigned int wqe_size,
>  struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
>  				      unsigned int wqe_size, u16 *prod_idx);
>  
> +void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size);
> +
>  void hinic_sq_write_wqe(struct hinic_sq *sq, u16 prod_idx,
>  			struct hinic_sq_wqe *wqe, struct sk_buff *skb,
>  			unsigned int wqe_size);
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
> index 3e3181c089bdc..f92f1bf3901a7 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
> @@ -774,6 +774,20 @@ struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size,
>  	return WQ_PAGE_ADDR(wq, *prod_idx) + WQE_PAGE_OFF(wq, *prod_idx);
>  }
>  
> +/**
> + * hinic_return_wqe - return the wqe when transmit failed
> + * @wq: wq to return wqe
> + * @wqe_size: wqe size
> + **/
> +void hinic_return_wqe(struct hinic_wq *wq, unsigned int wqe_size)
> +{
> +	int num_wqebbs = ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
> +
> +	atomic_sub(num_wqebbs, &wq->prod_idx);
> +
> +	atomic_add(num_wqebbs, &wq->delta);
> +}
> +
>  /**
>   * hinic_put_wqe - return the wqe place to use for a new wqe
>   * @wq: wq to return wqe
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
> index 9c030a0f035e2..9b66545ba563c 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
> @@ -104,6 +104,8 @@ void hinic_wq_free(struct hinic_wqs *wqs, struct hinic_wq *wq);
>  struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size,
>  				   u16 *prod_idx);
>  
> +void hinic_return_wqe(struct hinic_wq *wq, unsigned int wqe_size);
> +
>  void hinic_put_wqe(struct hinic_wq *wq, unsigned int wqe_size);
>  
>  struct hinic_hw_wqe *hinic_read_wqe(struct hinic_wq *wq, unsigned int wqe_size,
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
> index bc73485483c59..9754d6ed5f4ac 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
> @@ -62,19 +62,33 @@
>  			(((val) >> HINIC_CMDQ_WQE_HEADER_##member##_SHIFT) \
>  			 & HINIC_CMDQ_WQE_HEADER_##member##_MASK)
>  
> -#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_SHIFT    0
> -#define HINIC_SQ_CTRL_TASKSECT_LEN_SHIFT        16
> -#define HINIC_SQ_CTRL_DATA_FORMAT_SHIFT         22
> -#define HINIC_SQ_CTRL_LEN_SHIFT                 29
> -
> -#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_MASK     0xFF
> -#define HINIC_SQ_CTRL_TASKSECT_LEN_MASK         0x1F
> -#define HINIC_SQ_CTRL_DATA_FORMAT_MASK          0x1
> -#define HINIC_SQ_CTRL_LEN_MASK                  0x3
> -
> -#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_SHIFT      13
> -
> -#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_MASK       0x3FFF
> +#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_SHIFT           0
> +#define HINIC_SQ_CTRL_TASKSECT_LEN_SHIFT               16
> +#define HINIC_SQ_CTRL_DATA_FORMAT_SHIFT                22
> +#define HINIC_SQ_CTRL_LEN_SHIFT                        29
> +
> +#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_MASK            0xFF
> +#define HINIC_SQ_CTRL_TASKSECT_LEN_MASK                0x1F
> +#define HINIC_SQ_CTRL_DATA_FORMAT_MASK                 0x1
> +#define HINIC_SQ_CTRL_LEN_MASK                         0x3
> +
> +#define HINIC_SQ_CTRL_QUEUE_INFO_PLDOFF_SHIFT          2
> +#define HINIC_SQ_CTRL_QUEUE_INFO_UFO_SHIFT             10
> +#define HINIC_SQ_CTRL_QUEUE_INFO_TSO_SHIFT             11
> +#define HINIC_SQ_CTRL_QUEUE_INFO_TCPUDP_CS_SHIFT       12
> +#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_SHIFT             13
> +#define HINIC_SQ_CTRL_QUEUE_INFO_SCTP_SHIFT            27
> +#define HINIC_SQ_CTRL_QUEUE_INFO_UC_SHIFT              28
> +#define HINIC_SQ_CTRL_QUEUE_INFO_PRI_SHIFT             29
> +
> +#define HINIC_SQ_CTRL_QUEUE_INFO_PLDOFF_MASK           0xFF
> +#define HINIC_SQ_CTRL_QUEUE_INFO_UFO_MASK              0x1
> +#define HINIC_SQ_CTRL_QUEUE_INFO_TSO_MASK              0x1
> +#define HINIC_SQ_CTRL_QUEUE_INFO_TCPUDP_CS_MASK	       0x1
> +#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_MASK              0x3FFF
> +#define HINIC_SQ_CTRL_QUEUE_INFO_SCTP_MASK             0x1
> +#define HINIC_SQ_CTRL_QUEUE_INFO_UC_MASK               0x1
> +#define HINIC_SQ_CTRL_QUEUE_INFO_PRI_MASK              0x7
>  
>  #define HINIC_SQ_CTRL_SET(val, member)          \
>  		(((u32)(val) & HINIC_SQ_CTRL_##member##_MASK) \
> @@ -84,6 +98,10 @@
>  		(((val) >> HINIC_SQ_CTRL_##member##_SHIFT) \
>  		 & HINIC_SQ_CTRL_##member##_MASK)
>  
> +#define HINIC_SQ_CTRL_CLEAR(val, member)	\
> +		((u32)(val) & (~(HINIC_SQ_CTRL_##member##_MASK \
> +		 << HINIC_SQ_CTRL_##member##_SHIFT)))
> +
>  #define HINIC_SQ_TASK_INFO0_L2HDR_LEN_SHIFT     0
>  #define HINIC_SQ_TASK_INFO0_L4_OFFLOAD_SHIFT    8
>  #define HINIC_SQ_TASK_INFO0_INNER_L3TYPE_SHIFT  10
> @@ -108,28 +126,28 @@
>  
>  /* 8 bits reserved */
>  #define HINIC_SQ_TASK_INFO1_MEDIA_TYPE_SHIFT    8
> -#define HINIC_SQ_TASK_INFO1_INNER_L4_LEN_SHIFT  16
> -#define HINIC_SQ_TASK_INFO1_INNER_L3_LEN_SHIFT  24
> +#define HINIC_SQ_TASK_INFO1_INNER_L4LEN_SHIFT   16
> +#define HINIC_SQ_TASK_INFO1_INNER_L3LEN_SHIFT   24
>  
>  /* 8 bits reserved */
>  #define HINIC_SQ_TASK_INFO1_MEDIA_TYPE_MASK     0xFF
> -#define HINIC_SQ_TASK_INFO1_INNER_L4_LEN_MASK   0xFF
> -#define HINIC_SQ_TASK_INFO1_INNER_L3_LEN_MASK   0xFF
> +#define HINIC_SQ_TASK_INFO1_INNER_L4LEN_MASK    0xFF
> +#define HINIC_SQ_TASK_INFO1_INNER_L3LEN_MASK    0xFF
>  
>  #define HINIC_SQ_TASK_INFO1_SET(val, member)    \
>  		(((u32)(val) & HINIC_SQ_TASK_INFO1_##member##_MASK) <<  \
>  		 HINIC_SQ_TASK_INFO1_##member##_SHIFT)
>  
> -#define HINIC_SQ_TASK_INFO2_TUNNEL_L4_LEN_SHIFT 0
> -#define HINIC_SQ_TASK_INFO2_OUTER_L3_LEN_SHIFT  12
> -#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 19
> +#define HINIC_SQ_TASK_INFO2_TUNNEL_L4LEN_SHIFT  0
> +#define HINIC_SQ_TASK_INFO2_OUTER_L3LEN_SHIFT   8
> +#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 16
>  /* 1 bit reserved */
> -#define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT  22
> +#define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT  24
>  /* 8 bits reserved */
>  
> -#define HINIC_SQ_TASK_INFO2_TUNNEL_L4_LEN_MASK  0xFFF
> -#define HINIC_SQ_TASK_INFO2_OUTER_L3_LEN_MASK   0x7F
> -#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK  0x3
> +#define HINIC_SQ_TASK_INFO2_TUNNEL_L4LEN_MASK   0xFF
> +#define HINIC_SQ_TASK_INFO2_OUTER_L3LEN_MASK    0xFF
> +#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK  0x7
>  /* 1 bit reserved */
>  #define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_MASK   0x3
>  /* 8 bits reserved */
> @@ -187,12 +205,15 @@
>  		 sizeof(struct hinic_sq_task) + \
>  		 (nr_sges) * sizeof(struct hinic_sq_bufdesc))
>  
> -#define HINIC_SCMD_DATA_LEN             16
> +#define HINIC_SCMD_DATA_LEN                     16
> +
> +#define HINIC_MAX_SQ_BUFDESCS                   17
>  
> -#define HINIC_MAX_SQ_BUFDESCS           17
> +#define HINIC_SQ_WQE_MAX_SIZE                   320
> +#define HINIC_RQ_WQE_SIZE                       32
>  
> -#define HINIC_SQ_WQE_MAX_SIZE           320
> -#define HINIC_RQ_WQE_SIZE               32
> +#define HINIC_MSS_DEFAULT		        0x3E00
> +#define HINIC_MSS_MIN		                0x50
>  
>  enum hinic_l4offload_type {
>  	HINIC_L4_OFF_DISABLE            = 0,
> @@ -211,6 +232,26 @@ enum hinic_pkt_parsed {
>  	HINIC_PKT_PARSED     = 1,
>  };
>  
> +enum hinic_l3_offload_type {
> +	L3TYPE_UNKNOWN = 0,
> +	IPV6_PKT = 1,
> +	IPV4_PKT_NO_CHKSUM_OFFLOAD = 2,
> +	IPV4_PKT_WITH_CHKSUM_OFFLOAD = 3,
> +};
> +
> +enum hinic_l4_offload_type {
> +	OFFLOAD_DISABLE     = 0,
> +	TCP_OFFLOAD_ENABLE  = 1,
> +	SCTP_OFFLOAD_ENABLE = 2,
> +	UDP_OFFLOAD_ENABLE  = 3,
> +};
> +
> +enum hinic_l4_tunnel_type {
> +	NOT_TUNNEL,
> +	TUNNEL_UDP_NO_CSUM,
> +	TUNNEL_UDP_CSUM,
> +};
> +
>  enum hinic_outer_l3type {
>  	HINIC_OUTER_L3TYPE_UNKNOWN              = 0,
>  	HINIC_OUTER_L3TYPE_IPV6                 = 1,
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
> index 4a8f82938ed5b..fdf2bdb6b0d06 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
> @@ -805,7 +805,8 @@ static const struct net_device_ops hinic_netdev_ops = {
>  
>  static void netdev_features_init(struct net_device *netdev)
>  {
> -	netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA;
> +	netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM |
> +			      NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6;
>  
>  	netdev->vlan_features = netdev->hw_features;
>  
> @@ -863,6 +864,20 @@ static void link_status_event_handler(void *handle, void *buf_in, u16 in_size,
>  	*out_size = sizeof(*ret_link_status);
>  }
>  
> +static int set_features(struct hinic_dev *nic_dev,
> +			netdev_features_t pre_features,
> +			netdev_features_t features, bool force_change)
> +{
> +	netdev_features_t changed = force_change ? ~0 : pre_features ^ features;
> +	int err = 0;
> +
> +	if (changed & NETIF_F_TSO)
> +		err = hinic_port_set_tso(nic_dev, (features & NETIF_F_TSO) ?
> +					 HINIC_TSO_ENABLE : HINIC_TSO_DISABLE);
> +
> +	return err;
> +}
> +
>  /**
>   * nic_dev_init - Initialize the NIC device
>   * @pdev: the NIC pci device
> @@ -963,7 +978,12 @@ static int nic_dev_init(struct pci_dev *pdev)
>  	hinic_hwdev_cb_register(nic_dev->hwdev, HINIC_MGMT_MSG_CMD_LINK_STATUS,
>  				nic_dev, link_status_event_handler);
>  
> +	err = set_features(nic_dev, 0, nic_dev->netdev->features, true);
> +	if (err)
> +		goto err_set_features;
> +
>  	SET_NETDEV_DEV(netdev, &pdev->dev);
> +
>  	err = register_netdev(netdev);
>  	if (err) {
>  		dev_err(&pdev->dev, "Failed to register netdev\n");
> @@ -973,6 +993,7 @@ static int nic_dev_init(struct pci_dev *pdev)
>  	return 0;
>  
>  err_reg_netdev:
> +err_set_features:
>  	hinic_hwdev_cb_unregister(nic_dev->hwdev,
>  				  HINIC_MGMT_MSG_CMD_LINK_STATUS);
>  	cancel_work_sync(&rx_mode_work->work);
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c
> index 4d4e3f05fb5fb..7575a7d3bd9f6 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_port.c
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c
> @@ -377,3 +377,35 @@ int hinic_port_get_cap(struct hinic_dev *nic_dev,
>  
>  	return 0;
>  }
> +
> +/**
> + * hinic_port_set_tso - set port tso configuration
> + * @nic_dev: nic device
> + * @state: the tso state to set
> + *
> + * Return 0 - Success, negative - Failure
> + **/
> +int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state)
> +{
> +	struct hinic_hwdev *hwdev = nic_dev->hwdev;
> +	struct hinic_hwif *hwif = hwdev->hwif;
> +	struct hinic_tso_config tso_cfg = {0};
> +	struct pci_dev *pdev = hwif->pdev;
> +	u16 out_size;
> +	int err;
> +
> +	tso_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
> +	tso_cfg.tso_en = state;
> +
> +	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_TSO,
> +				 &tso_cfg, sizeof(tso_cfg),
> +				 &tso_cfg, &out_size);
> +	if (err || out_size != sizeof(tso_cfg) || tso_cfg.status) {
> +		dev_err(&pdev->dev,
> +			"Failed to set port tso, ret = %d\n",
> +			tso_cfg.status);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.h b/drivers/net/ethernet/huawei/hinic/hinic_port.h
> index 9404365195ddf..f6e3220fe28fc 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_port.h
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_port.h
> @@ -72,6 +72,11 @@ enum hinic_speed {
>  	HINIC_SPEED_UNKNOWN = 0xFF,
>  };
>  
> +enum hinic_tso_state {
> +	HINIC_TSO_DISABLE = 0,
> +	HINIC_TSO_ENABLE  = 1,
> +};
> +
>  struct hinic_port_mac_cmd {
>  	u8              status;
>  	u8              version;
> @@ -167,6 +172,17 @@ struct hinic_port_cap {
>  	u8      rsvd2[3];
>  };
>  
> +struct hinic_tso_config {
> +	u8	status;
> +	u8	version;
> +	u8	rsvd0[6];
> +
> +	u16	func_id;
> +	u16	rsvd1;
> +	u8	tso_en;
> +	u8	resv2[3];
> +};
> +
>  int hinic_port_add_mac(struct hinic_dev *nic_dev, const u8 *addr,
>  		       u16 vlan_id);
>  
> @@ -195,4 +211,6 @@ int hinic_port_set_func_state(struct hinic_dev *nic_dev,
>  int hinic_port_get_cap(struct hinic_dev *nic_dev,
>  		       struct hinic_port_cap *port_cap);
>  
> +int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state);
> +
>  #endif
> diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
> index c5fca0356c9c9..11e73e67358d1 100644
> --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
> +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
> @@ -26,6 +26,13 @@
>  #include <linux/skbuff.h>
>  #include <linux/smp.h>
>  #include <asm/byteorder.h>
> +#include <linux/ip.h>
> +#include <linux/tcp.h>
> +#include <linux/sctp.h>
> +#include <linux/ipv6.h>
> +#include <net/ipv6.h>
> +#include <net/checksum.h>
> +#include <net/ip6_checksum.h>
>  
>  #include "hinic_common.h"
>  #include "hinic_hw_if.h"
> @@ -45,9 +52,31 @@
>  #define CI_UPDATE_NO_PENDING            0
>  #define CI_UPDATE_NO_COALESC            0
>  
> -#define HW_CONS_IDX(sq)         be16_to_cpu(*(u16 *)((sq)->hw_ci_addr))
> +#define HW_CONS_IDX(sq)                 be16_to_cpu(*(u16 *)((sq)->hw_ci_addr))
>  
> -#define MIN_SKB_LEN             64
> +#define MIN_SKB_LEN                     17
> +
> +#define	MAX_PAYLOAD_OFFSET	        221
> +#define TRANSPORT_OFFSET(l4_hdr, skb)	((u32)((l4_hdr) - (skb)->data))
> +
> +union hinic_l3 {
> +	struct iphdr *v4;
> +	struct ipv6hdr *v6;
> +	unsigned char *hdr;
> +};
> +
> +union hinic_l4 {
> +	struct tcphdr *tcp;
> +	struct udphdr *udp;
> +	unsigned char *hdr;
> +};
> +
> +enum hinic_offload_type {
> +	TX_OFFLOAD_TSO     = BIT(0),
> +	TX_OFFLOAD_CSUM    = BIT(1),
> +	TX_OFFLOAD_VLAN    = BIT(2),
> +	TX_OFFLOAD_INVALID = BIT(3),
> +};
>  
>  /**
>   * hinic_txq_clean_stats - Clean the statistics of specific queue
> @@ -175,18 +204,263 @@ static void tx_unmap_skb(struct hinic_dev *nic_dev, struct sk_buff *skb,
>  			 DMA_TO_DEVICE);
>  }
>  
> +static void get_inner_l3_l4_type(struct sk_buff *skb, union hinic_l3 *ip,
> +				 union hinic_l4 *l4,
> +				 enum hinic_offload_type offload_type,
> +				 enum hinic_l3_offload_type *l3_type,
> +				 u8 *l4_proto)
> +{
> +	u8 *exthdr;
> +
> +	if (ip->v4->version == 4) {
> +		*l3_type = (offload_type == TX_OFFLOAD_CSUM) ?
> +			   IPV4_PKT_NO_CHKSUM_OFFLOAD :
> +			   IPV4_PKT_WITH_CHKSUM_OFFLOAD;
> +		*l4_proto = ip->v4->protocol;
> +	} else if (ip->v4->version == 6) {
> +		*l3_type = IPV6_PKT;
> +		exthdr = ip->hdr + sizeof(*ip->v6);
> +		*l4_proto = ip->v6->nexthdr;
> +		if (exthdr != l4->hdr) {
> +			int start = exthdr - skb->data;
> +			__be16 frag_off;
> +
> +			ipv6_skip_exthdr(skb, start, l4_proto, &frag_off);
> +		}
> +	} else {
> +		*l3_type = L3TYPE_UNKNOWN;
> +		*l4_proto = 0;
> +	}
> +}
> +
> +static void get_inner_l4_info(struct sk_buff *skb, union hinic_l4 *l4,
> +			      enum hinic_offload_type offload_type, u8 l4_proto,
> +			      enum hinic_l4_offload_type *l4_offload,
> +			      u32 *l4_len, u32 *offset)
> +{
> +	*l4_offload = OFFLOAD_DISABLE;
> +	*offset = 0;
> +	*l4_len = 0;
> +
> +	switch (l4_proto) {
> +	case IPPROTO_TCP:
> +		*l4_offload = TCP_OFFLOAD_ENABLE;
> +		/* doff in unit of 4B */
> +		*l4_len = l4->tcp->doff * 4;
> +		*offset = *l4_len + TRANSPORT_OFFSET(l4->hdr, skb);
> +		break;
> +
> +	case IPPROTO_UDP:
> +		*l4_offload = UDP_OFFLOAD_ENABLE;
> +		*l4_len = sizeof(struct udphdr);
> +		*offset = TRANSPORT_OFFSET(l4->hdr, skb);
> +		break;
> +
> +	case IPPROTO_SCTP:
> +		/* only csum offload support sctp */
> +		if (offload_type != TX_OFFLOAD_CSUM)
> +			break;
> +
> +		*l4_offload = SCTP_OFFLOAD_ENABLE;
> +		*l4_len = sizeof(struct sctphdr);
> +		*offset = TRANSPORT_OFFSET(l4->hdr, skb);
> +		break;
> +
> +	default:
> +		break;
> +	}
> +}
> +
> +static __sum16 csum_magic(union hinic_l3 *ip, unsigned short proto)
> +{
> +	return (ip->v4->version == 4) ?
> +		csum_tcpudp_magic(ip->v4->saddr, ip->v4->daddr, 0, proto, 0) :
> +		csum_ipv6_magic(&ip->v6->saddr, &ip->v6->daddr, 0, proto, 0);
> +}
> +
> +static int offload_tso(struct hinic_sq_task *task, u32 *queue_info,
> +		       struct sk_buff *skb)
> +{
> +	u32 offset, l4_len, ip_identify, network_hdr_len;
> +	enum hinic_l3_offload_type l3_offload;
> +	enum hinic_l4_offload_type l4_offload;
> +	union hinic_l3 ip;
> +	union hinic_l4 l4;
> +	u8 l4_proto;
> +
> +	if (!skb_is_gso(skb))
> +		return 0;
> +
> +	if (skb_cow_head(skb, 0) < 0)
> +		return -EPROTONOSUPPORT;
> +
> +	if (skb->encapsulation) {
> +		u32 gso_type = skb_shinfo(skb)->gso_type;
> +		u32 tunnel_type = 0;
> +		u32 l4_tunnel_len;
> +
> +		ip.hdr = skb_network_header(skb);
> +		l4.hdr = skb_transport_header(skb);
> +		network_hdr_len = skb_inner_network_header_len(skb);
> +
> +		if (ip.v4->version == 4) {
> +			ip.v4->tot_len = 0;
> +			l3_offload = IPV4_PKT_WITH_CHKSUM_OFFLOAD;
> +		} else if (ip.v4->version == 6) {
> +			l3_offload = IPV6_PKT;
> +		} else {
> +			l3_offload = 0;
> +		}
> +
> +		hinic_task_set_outter_l3(task, l3_offload,
> +					 skb_network_header_len(skb));
> +
> +		if (gso_type & SKB_GSO_UDP_TUNNEL_CSUM) {
> +			l4.udp->check = ~csum_magic(&ip, IPPROTO_UDP);
> +			tunnel_type = TUNNEL_UDP_CSUM;
> +		} else if (gso_type & SKB_GSO_UDP_TUNNEL) {
> +			tunnel_type = TUNNEL_UDP_NO_CSUM;
> +		}
> +
> +		l4_tunnel_len = skb_inner_network_offset(skb) -
> +				skb_transport_offset(skb);
> +		hinic_task_set_tunnel_l4(task, tunnel_type, l4_tunnel_len);
> +
> +		ip.hdr = skb_inner_network_header(skb);
> +		l4.hdr = skb_inner_transport_header(skb);
> +	} else {
> +		ip.hdr = skb_network_header(skb);
> +		l4.hdr = skb_transport_header(skb);
> +		network_hdr_len = skb_network_header_len(skb);
> +	}
> +
> +	/* initialize inner IP header fields */
> +	if (ip.v4->version == 4)
> +		ip.v4->tot_len = 0;
> +	else
> +		ip.v6->payload_len = 0;
> +
> +	get_inner_l3_l4_type(skb, &ip, &l4, TX_OFFLOAD_TSO, &l3_offload,
> +			     &l4_proto);
> +
> +	hinic_task_set_inner_l3(task, l3_offload, network_hdr_len);
> +
> +	ip_identify = 0;
> +	if (l4_proto == IPPROTO_TCP)
> +		l4.tcp->check = ~csum_magic(&ip, IPPROTO_TCP);
> +
> +	get_inner_l4_info(skb, &l4, TX_OFFLOAD_TSO, l4_proto, &l4_offload,
> +			  &l4_len, &offset);
> +
> +	hinic_set_tso_inner_l4(task, queue_info, l4_offload, l4_len, offset,
> +			       ip_identify, skb_shinfo(skb)->gso_size);
> +
> +	return 1;
> +}
> +
> +static int offload_csum(struct hinic_sq_task *task, u32 *queue_info,
> +			struct sk_buff *skb)
> +{
> +	enum hinic_l4_offload_type l4_offload;
> +	u32 offset, l4_len, network_hdr_len;
> +	enum hinic_l3_offload_type l3_type;
> +	union hinic_l3 ip;
> +	union hinic_l4 l4;
> +	u8 l4_proto;
> +
> +	if (skb->ip_summed != CHECKSUM_PARTIAL)
> +		return 0;
> +
> +	if (skb->encapsulation) {
> +		u32 l4_tunnel_len;
> +
> +		ip.hdr = skb_network_header(skb);
> +
> +		if (ip.v4->version == 4)
> +			l3_type = IPV4_PKT_NO_CHKSUM_OFFLOAD;
> +		else if (ip.v4->version == 6)
> +			l3_type = IPV6_PKT;
> +		else
> +			l3_type = L3TYPE_UNKNOWN;
> +
> +		hinic_task_set_outter_l3(task, l3_type,
> +					 skb_network_header_len(skb));
> +
> +		l4_tunnel_len = skb_inner_network_offset(skb) -
> +				skb_transport_offset(skb);
> +
> +		hinic_task_set_tunnel_l4(task, TUNNEL_UDP_NO_CSUM,
> +					 l4_tunnel_len);
> +
> +		ip.hdr = skb_inner_network_header(skb);
> +		l4.hdr = skb_inner_transport_header(skb);
> +		network_hdr_len = skb_inner_network_header_len(skb);
> +	} else {
> +		ip.hdr = skb_network_header(skb);
> +		l4.hdr = skb_transport_header(skb);
> +		network_hdr_len = skb_network_header_len(skb);
> +	}
> +
> +	get_inner_l3_l4_type(skb, &ip, &l4, TX_OFFLOAD_CSUM, &l3_type,
> +			     &l4_proto);
> +
> +	hinic_task_set_inner_l3(task, l3_type, network_hdr_len);
> +
> +	get_inner_l4_info(skb, &l4, TX_OFFLOAD_CSUM, l4_proto, &l4_offload,
> +			  &l4_len, &offset);
> +
> +	hinic_set_cs_inner_l4(task, queue_info, l4_offload, l4_len, offset);
> +
> +	return 1;
> +}
> +
> +static int hinic_tx_offload(struct sk_buff *skb, struct hinic_sq_task *task,
> +			    u32 *queue_info)
> +{
> +	enum hinic_offload_type offload = 0;
> +	int enabled;
> +
> +	enabled = offload_tso(task, queue_info, skb);
> +	if (enabled > 0) {
> +		offload |= TX_OFFLOAD_TSO;
> +	} else if (enabled == 0) {
> +		enabled = offload_csum(task, queue_info, skb);
> +		if (enabled)
> +			offload |= TX_OFFLOAD_CSUM;
> +	} else {
> +		return -EPROTONOSUPPORT;
> +	}
> +
> +	if (offload)
> +		hinic_task_set_l2hdr(task, skb_network_offset(skb));
> +
> +	/* payload offset should not more than 221 */
> +	if (HINIC_SQ_CTRL_GET(*queue_info, QUEUE_INFO_PLDOFF) >
> +	    MAX_PAYLOAD_OFFSET) {
> +		return -EPROTONOSUPPORT;
> +	}
> +
> +	/* mss should not less than 80 */
> +	if (HINIC_SQ_CTRL_GET(*queue_info, QUEUE_INFO_MSS) < HINIC_MSS_MIN) {
> +		*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
> +		*queue_info |= HINIC_SQ_CTRL_SET(HINIC_MSS_MIN, QUEUE_INFO_MSS);
> +	}
> +
> +	return 0;
> +}
> +
>  netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
>  {
>  	struct hinic_dev *nic_dev = netdev_priv(netdev);
> +	u16 prod_idx, q_id = skb->queue_mapping;
>  	struct netdev_queue *netdev_txq;
>  	int nr_sges, err = NETDEV_TX_OK;
>  	struct hinic_sq_wqe *sq_wqe;
>  	unsigned int wqe_size;
>  	struct hinic_txq *txq;
>  	struct hinic_qp *qp;
> -	u16 prod_idx;
>  
> -	txq = &nic_dev->txqs[skb->queue_mapping];
> +	txq = &nic_dev->txqs[q_id];
>  	qp = container_of(txq->sq, struct hinic_qp, sq);
>  
>  	if (skb->len < MIN_SKB_LEN) {
> @@ -236,15 +510,23 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
>  process_sq_wqe:
>  	hinic_sq_prepare_wqe(txq->sq, prod_idx, sq_wqe, txq->sges, nr_sges);
>  
> +	err = hinic_tx_offload(skb, &sq_wqe->task, &sq_wqe->ctrl.queue_info);
> +	if (err)
> +		goto offload_error;
> +
>  	hinic_sq_write_wqe(txq->sq, prod_idx, sq_wqe, skb, wqe_size);
>  
>  flush_skbs:
> -	netdev_txq = netdev_get_tx_queue(netdev, skb->queue_mapping);
> +	netdev_txq = netdev_get_tx_queue(netdev, q_id);
>  	if ((!skb->xmit_more) || (netif_xmit_stopped(netdev_txq)))
>  		hinic_sq_write_db(txq->sq, prod_idx, wqe_size, 0);
>  
>  	return err;
>  
> +offload_error:
> +	hinic_sq_return_wqe(txq->sq, wqe_size);
> +	tx_unmap_skb(nic_dev, skb, txq->sges);
> +
>  skb_error:
>  	dev_kfree_skb_any(skb);
>  
> @@ -252,7 +534,8 @@ netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
>  	u64_stats_update_begin(&txq->txq_stats.syncp);
>  	txq->txq_stats.tx_dropped++;
>  	u64_stats_update_end(&txq->txq_stats.syncp);
> -	return err;
> +
> +	return NETDEV_TX_OK;
>  }
>  
>  /**
> 
Applied to cosmic/master-next. Thanks. Waiting for Bionic.

-Stefan
dann frazier Nov. 20, 2018, 12:50 a.m. UTC | #7
On Wed, Nov 7, 2018 at 10:34 AM Stefan Bader <stefan.bader@canonical.com> wrote:
>
> On 07.11.18 17:53, dann frazier wrote:
> > On Wed, Nov 7, 2018 at 1:21 AM Stefan Bader <stefan.bader@canonical.com> wrote:
> >>
> >> On 30.10.18 15:04, dann frazier wrote:
> >>> From: Zhao Chen <zhaochen6@huawei.com>
> >>>
> >>> BugLink: https://bugs.launchpad.net/bugs/1800664
> >>>
> >>> This patch adds checksum offload and TSO support for the HiNIC
> >>> driver. Perfomance test (Iperf) shows more than 100% improvement
> >>> in TCP streams.
> >>>
> >>> Signed-off-by: Zhao Chen <zhaochen6@huawei.com>
> >>> Signed-off-by: Xue Chaojing <xuechaojing@huawei.com>
> >>> Signed-off-by: David S. Miller <davem@davemloft.net>
> >>> (cherry picked from commit cc18a7543d2f63a2c93fc61cfa7fd8be5464f75e)
> >>> Signed-off-by: dann frazier <dann.frazier@canonical.com>
> >>> ---
> >>
> >> While I am not totally opposed to it due to the limitation to only one driver, I
> >> cannot say how widespread the HW is which is supported (and how many models this
> >> driver covers). This together with the fact that the requested changes come
> >> directly from a very recent upstream source, I would rather wait at least one
> >> additional cycle before we pull this all the way back into Bionic.
> >> So possibly apply to Cosmic now and we can see how that goes. Then follow up
> >> with Bionic later?
> >
> > That seems fair to me, thanks Stefan. Should I resubmit next cycle, or
> > is that unnecessary?
> >
> >   -dann
> >
> We will keep it on the list, so there should be no need to re-submit. But if you
> keep an eye on this and ping us if there is no update the week before starting
> the next cycle, that might be helpful.

Your delay request was prescient - we are seeing an issue w/ longer runs:
  https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1800664/comments/3

So, self NAK'ing this for bionic until we get to the bottom of that.

  -dann
diff mbox series

Patch

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
index 0f5563f3b7798..097b5502603fc 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
@@ -58,6 +58,8 @@  enum hinic_port_cmd {
 
 	HINIC_PORT_CMD_GET_GLOBAL_QPN   = 102,
 
+	HINIC_PORT_CMD_SET_TSO          = 112,
+
 	HINIC_PORT_CMD_GET_CAP          = 170,
 };
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
index cb239627770f4..967c993d5303a 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
@@ -70,8 +70,6 @@ 
 #define SQ_MASKED_IDX(sq, idx)  ((idx) & (sq)->wq->mask)
 #define RQ_MASKED_IDX(rq, idx)  ((idx) & (rq)->wq->mask)
 
-#define TX_MAX_MSS_DEFAULT      0x3E00
-
 enum sq_wqe_type {
 	SQ_NORMAL_WQE = 0,
 };
@@ -494,33 +492,16 @@  static void sq_prepare_ctrl(struct hinic_sq_ctrl *ctrl, u16 prod_idx,
 			  HINIC_SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT)     |
 			  HINIC_SQ_CTRL_SET(ctrl_size, LEN);
 
-	ctrl->queue_info = HINIC_SQ_CTRL_SET(TX_MAX_MSS_DEFAULT,
-					     QUEUE_INFO_MSS);
+	ctrl->queue_info = HINIC_SQ_CTRL_SET(HINIC_MSS_DEFAULT,
+					     QUEUE_INFO_MSS) |
+			   HINIC_SQ_CTRL_SET(1, QUEUE_INFO_UC);
 }
 
 static void sq_prepare_task(struct hinic_sq_task *task)
 {
-	task->pkt_info0 =
-		HINIC_SQ_TASK_INFO0_SET(0, L2HDR_LEN) |
-		HINIC_SQ_TASK_INFO0_SET(HINIC_L4_OFF_DISABLE, L4_OFFLOAD) |
-		HINIC_SQ_TASK_INFO0_SET(HINIC_OUTER_L3TYPE_UNKNOWN,
-					INNER_L3TYPE) |
-		HINIC_SQ_TASK_INFO0_SET(HINIC_VLAN_OFF_DISABLE,
-					VLAN_OFFLOAD) |
-		HINIC_SQ_TASK_INFO0_SET(HINIC_PKT_NOT_PARSED, PARSE_FLAG);
-
-	task->pkt_info1 =
-		HINIC_SQ_TASK_INFO1_SET(HINIC_MEDIA_UNKNOWN, MEDIA_TYPE) |
-		HINIC_SQ_TASK_INFO1_SET(0, INNER_L4_LEN) |
-		HINIC_SQ_TASK_INFO1_SET(0, INNER_L3_LEN);
-
-	task->pkt_info2 =
-		HINIC_SQ_TASK_INFO2_SET(0, TUNNEL_L4_LEN) |
-		HINIC_SQ_TASK_INFO2_SET(0, OUTER_L3_LEN)  |
-		HINIC_SQ_TASK_INFO2_SET(HINIC_TUNNEL_L4TYPE_UNKNOWN,
-					TUNNEL_L4TYPE)    |
-		HINIC_SQ_TASK_INFO2_SET(HINIC_OUTER_L3TYPE_UNKNOWN,
-					OUTER_L3TYPE);
+	task->pkt_info0 = 0;
+	task->pkt_info1 = 0;
+	task->pkt_info2 = 0;
 
 	task->ufo_v6_identify = 0;
 
@@ -529,6 +510,86 @@  static void sq_prepare_task(struct hinic_sq_task *task)
 	task->zero_pad = 0;
 }
 
+void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len)
+{
+	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(len, L2HDR_LEN);
+}
+
+void hinic_task_set_outter_l3(struct hinic_sq_task *task,
+			      enum hinic_l3_offload_type l3_type,
+			      u32 network_len)
+{
+	task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l3_type, OUTER_L3TYPE) |
+			   HINIC_SQ_TASK_INFO2_SET(network_len, OUTER_L3LEN);
+}
+
+void hinic_task_set_inner_l3(struct hinic_sq_task *task,
+			     enum hinic_l3_offload_type l3_type,
+			     u32 network_len)
+{
+	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l3_type, INNER_L3TYPE);
+	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(network_len, INNER_L3LEN);
+}
+
+void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
+			      enum hinic_l4_offload_type l4_type,
+			      u32 tunnel_len)
+{
+	task->pkt_info2 |= HINIC_SQ_TASK_INFO2_SET(l4_type, TUNNEL_L4TYPE) |
+			   HINIC_SQ_TASK_INFO2_SET(tunnel_len, TUNNEL_L4LEN);
+}
+
+void hinic_set_cs_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
+			   enum hinic_l4_offload_type l4_offload,
+			   u32 l4_len, u32 offset)
+{
+	u32 tcp_udp_cs = 0, sctp = 0;
+	u32 mss = HINIC_MSS_DEFAULT;
+
+	if (l4_offload == TCP_OFFLOAD_ENABLE ||
+	    l4_offload == UDP_OFFLOAD_ENABLE)
+		tcp_udp_cs = 1;
+	else if (l4_offload == SCTP_OFFLOAD_ENABLE)
+		sctp = 1;
+
+	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
+	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
+
+	*queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
+		       HINIC_SQ_CTRL_SET(tcp_udp_cs, QUEUE_INFO_TCPUDP_CS) |
+		       HINIC_SQ_CTRL_SET(sctp, QUEUE_INFO_SCTP);
+
+	*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
+	*queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
+}
+
+void hinic_set_tso_inner_l4(struct hinic_sq_task *task, u32 *queue_info,
+			    enum hinic_l4_offload_type l4_offload,
+			    u32 l4_len, u32 offset, u32 ip_ident, u32 mss)
+{
+	u32 tso = 0, ufo = 0;
+
+	if (l4_offload == TCP_OFFLOAD_ENABLE)
+		tso = 1;
+	else if (l4_offload == UDP_OFFLOAD_ENABLE)
+		ufo = 1;
+
+	task->ufo_v6_identify = ip_ident;
+
+	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(l4_offload, L4_OFFLOAD);
+	task->pkt_info0 |= HINIC_SQ_TASK_INFO0_SET(tso || ufo, TSO_FLAG);
+	task->pkt_info1 |= HINIC_SQ_TASK_INFO1_SET(l4_len, INNER_L4LEN);
+
+	*queue_info |= HINIC_SQ_CTRL_SET(offset, QUEUE_INFO_PLDOFF) |
+		       HINIC_SQ_CTRL_SET(tso, QUEUE_INFO_TSO) |
+		       HINIC_SQ_CTRL_SET(ufo, QUEUE_INFO_UFO) |
+		       HINIC_SQ_CTRL_SET(!!l4_offload, QUEUE_INFO_TCPUDP_CS);
+
+	/* set MSS value */
+	*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
+	*queue_info |= HINIC_SQ_CTRL_SET(mss, QUEUE_INFO_MSS);
+}
+
 /**
  * hinic_sq_prepare_wqe - prepare wqe before insert to the queue
  * @sq: send queue
@@ -612,6 +673,16 @@  struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
 	return &hw_wqe->sq_wqe;
 }
 
+/**
+ * hinic_sq_return_wqe - return the wqe to the sq
+ * @sq: send queue
+ * @wqe_size: the size of the wqe
+ **/
+void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size)
+{
+	hinic_return_wqe(sq->wq, wqe_size);
+}
+
 /**
  * hinic_sq_write_wqe - write the wqe to the sq
  * @sq: send queue
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
index 6c84f83ec2831..a0dc63a4bfc7a 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
@@ -149,6 +149,31 @@  int hinic_get_sq_free_wqebbs(struct hinic_sq *sq);
 
 int hinic_get_rq_free_wqebbs(struct hinic_rq *rq);
 
+void hinic_task_set_l2hdr(struct hinic_sq_task *task, u32 len);
+
+void hinic_task_set_outter_l3(struct hinic_sq_task *task,
+			      enum hinic_l3_offload_type l3_type,
+			      u32 network_len);
+
+void hinic_task_set_inner_l3(struct hinic_sq_task *task,
+			     enum hinic_l3_offload_type l3_type,
+			     u32 network_len);
+
+void hinic_task_set_tunnel_l4(struct hinic_sq_task *task,
+			      enum hinic_l4_offload_type l4_type,
+			      u32 tunnel_len);
+
+void hinic_set_cs_inner_l4(struct hinic_sq_task *task,
+			   u32 *queue_info,
+			   enum hinic_l4_offload_type l4_offload,
+			   u32 l4_len, u32 offset);
+
+void hinic_set_tso_inner_l4(struct hinic_sq_task *task,
+			    u32 *queue_info,
+			    enum hinic_l4_offload_type l4_offload,
+			    u32 l4_len,
+			    u32 offset, u32 ip_ident, u32 mss);
+
 void hinic_sq_prepare_wqe(struct hinic_sq *sq, u16 prod_idx,
 			  struct hinic_sq_wqe *wqe, struct hinic_sge *sges,
 			  int nr_sges);
@@ -159,6 +184,8 @@  void hinic_sq_write_db(struct hinic_sq *sq, u16 prod_idx, unsigned int wqe_size,
 struct hinic_sq_wqe *hinic_sq_get_wqe(struct hinic_sq *sq,
 				      unsigned int wqe_size, u16 *prod_idx);
 
+void hinic_sq_return_wqe(struct hinic_sq *sq, unsigned int wqe_size);
+
 void hinic_sq_write_wqe(struct hinic_sq *sq, u16 prod_idx,
 			struct hinic_sq_wqe *wqe, struct sk_buff *skb,
 			unsigned int wqe_size);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
index 3e3181c089bdc..f92f1bf3901a7 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
@@ -774,6 +774,20 @@  struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size,
 	return WQ_PAGE_ADDR(wq, *prod_idx) + WQE_PAGE_OFF(wq, *prod_idx);
 }
 
+/**
+ * hinic_return_wqe - return the wqe when transmit failed
+ * @wq: wq to return wqe
+ * @wqe_size: wqe size
+ **/
+void hinic_return_wqe(struct hinic_wq *wq, unsigned int wqe_size)
+{
+	int num_wqebbs = ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
+
+	atomic_sub(num_wqebbs, &wq->prod_idx);
+
+	atomic_add(num_wqebbs, &wq->delta);
+}
+
 /**
  * hinic_put_wqe - return the wqe place to use for a new wqe
  * @wq: wq to return wqe
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
index 9c030a0f035e2..9b66545ba563c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
@@ -104,6 +104,8 @@  void hinic_wq_free(struct hinic_wqs *wqs, struct hinic_wq *wq);
 struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size,
 				   u16 *prod_idx);
 
+void hinic_return_wqe(struct hinic_wq *wq, unsigned int wqe_size);
+
 void hinic_put_wqe(struct hinic_wq *wq, unsigned int wqe_size);
 
 struct hinic_hw_wqe *hinic_read_wqe(struct hinic_wq *wq, unsigned int wqe_size,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
index bc73485483c59..9754d6ed5f4ac 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
@@ -62,19 +62,33 @@ 
 			(((val) >> HINIC_CMDQ_WQE_HEADER_##member##_SHIFT) \
 			 & HINIC_CMDQ_WQE_HEADER_##member##_MASK)
 
-#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_SHIFT    0
-#define HINIC_SQ_CTRL_TASKSECT_LEN_SHIFT        16
-#define HINIC_SQ_CTRL_DATA_FORMAT_SHIFT         22
-#define HINIC_SQ_CTRL_LEN_SHIFT                 29
-
-#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_MASK     0xFF
-#define HINIC_SQ_CTRL_TASKSECT_LEN_MASK         0x1F
-#define HINIC_SQ_CTRL_DATA_FORMAT_MASK          0x1
-#define HINIC_SQ_CTRL_LEN_MASK                  0x3
-
-#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_SHIFT      13
-
-#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_MASK       0x3FFF
+#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_SHIFT           0
+#define HINIC_SQ_CTRL_TASKSECT_LEN_SHIFT               16
+#define HINIC_SQ_CTRL_DATA_FORMAT_SHIFT                22
+#define HINIC_SQ_CTRL_LEN_SHIFT                        29
+
+#define HINIC_SQ_CTRL_BUFDESC_SECT_LEN_MASK            0xFF
+#define HINIC_SQ_CTRL_TASKSECT_LEN_MASK                0x1F
+#define HINIC_SQ_CTRL_DATA_FORMAT_MASK                 0x1
+#define HINIC_SQ_CTRL_LEN_MASK                         0x3
+
+#define HINIC_SQ_CTRL_QUEUE_INFO_PLDOFF_SHIFT          2
+#define HINIC_SQ_CTRL_QUEUE_INFO_UFO_SHIFT             10
+#define HINIC_SQ_CTRL_QUEUE_INFO_TSO_SHIFT             11
+#define HINIC_SQ_CTRL_QUEUE_INFO_TCPUDP_CS_SHIFT       12
+#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_SHIFT             13
+#define HINIC_SQ_CTRL_QUEUE_INFO_SCTP_SHIFT            27
+#define HINIC_SQ_CTRL_QUEUE_INFO_UC_SHIFT              28
+#define HINIC_SQ_CTRL_QUEUE_INFO_PRI_SHIFT             29
+
+#define HINIC_SQ_CTRL_QUEUE_INFO_PLDOFF_MASK           0xFF
+#define HINIC_SQ_CTRL_QUEUE_INFO_UFO_MASK              0x1
+#define HINIC_SQ_CTRL_QUEUE_INFO_TSO_MASK              0x1
+#define HINIC_SQ_CTRL_QUEUE_INFO_TCPUDP_CS_MASK	       0x1
+#define HINIC_SQ_CTRL_QUEUE_INFO_MSS_MASK              0x3FFF
+#define HINIC_SQ_CTRL_QUEUE_INFO_SCTP_MASK             0x1
+#define HINIC_SQ_CTRL_QUEUE_INFO_UC_MASK               0x1
+#define HINIC_SQ_CTRL_QUEUE_INFO_PRI_MASK              0x7
 
 #define HINIC_SQ_CTRL_SET(val, member)          \
 		(((u32)(val) & HINIC_SQ_CTRL_##member##_MASK) \
@@ -84,6 +98,10 @@ 
 		(((val) >> HINIC_SQ_CTRL_##member##_SHIFT) \
 		 & HINIC_SQ_CTRL_##member##_MASK)
 
+#define HINIC_SQ_CTRL_CLEAR(val, member)	\
+		((u32)(val) & (~(HINIC_SQ_CTRL_##member##_MASK \
+		 << HINIC_SQ_CTRL_##member##_SHIFT)))
+
 #define HINIC_SQ_TASK_INFO0_L2HDR_LEN_SHIFT     0
 #define HINIC_SQ_TASK_INFO0_L4_OFFLOAD_SHIFT    8
 #define HINIC_SQ_TASK_INFO0_INNER_L3TYPE_SHIFT  10
@@ -108,28 +126,28 @@ 
 
 /* 8 bits reserved */
 #define HINIC_SQ_TASK_INFO1_MEDIA_TYPE_SHIFT    8
-#define HINIC_SQ_TASK_INFO1_INNER_L4_LEN_SHIFT  16
-#define HINIC_SQ_TASK_INFO1_INNER_L3_LEN_SHIFT  24
+#define HINIC_SQ_TASK_INFO1_INNER_L4LEN_SHIFT   16
+#define HINIC_SQ_TASK_INFO1_INNER_L3LEN_SHIFT   24
 
 /* 8 bits reserved */
 #define HINIC_SQ_TASK_INFO1_MEDIA_TYPE_MASK     0xFF
-#define HINIC_SQ_TASK_INFO1_INNER_L4_LEN_MASK   0xFF
-#define HINIC_SQ_TASK_INFO1_INNER_L3_LEN_MASK   0xFF
+#define HINIC_SQ_TASK_INFO1_INNER_L4LEN_MASK    0xFF
+#define HINIC_SQ_TASK_INFO1_INNER_L3LEN_MASK    0xFF
 
 #define HINIC_SQ_TASK_INFO1_SET(val, member)    \
 		(((u32)(val) & HINIC_SQ_TASK_INFO1_##member##_MASK) <<  \
 		 HINIC_SQ_TASK_INFO1_##member##_SHIFT)
 
-#define HINIC_SQ_TASK_INFO2_TUNNEL_L4_LEN_SHIFT 0
-#define HINIC_SQ_TASK_INFO2_OUTER_L3_LEN_SHIFT  12
-#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 19
+#define HINIC_SQ_TASK_INFO2_TUNNEL_L4LEN_SHIFT  0
+#define HINIC_SQ_TASK_INFO2_OUTER_L3LEN_SHIFT   8
+#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_SHIFT 16
 /* 1 bit reserved */
-#define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT  22
+#define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_SHIFT  24
 /* 8 bits reserved */
 
-#define HINIC_SQ_TASK_INFO2_TUNNEL_L4_LEN_MASK  0xFFF
-#define HINIC_SQ_TASK_INFO2_OUTER_L3_LEN_MASK   0x7F
-#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK  0x3
+#define HINIC_SQ_TASK_INFO2_TUNNEL_L4LEN_MASK   0xFF
+#define HINIC_SQ_TASK_INFO2_OUTER_L3LEN_MASK    0xFF
+#define HINIC_SQ_TASK_INFO2_TUNNEL_L4TYPE_MASK  0x7
 /* 1 bit reserved */
 #define HINIC_SQ_TASK_INFO2_OUTER_L3TYPE_MASK   0x3
 /* 8 bits reserved */
@@ -187,12 +205,15 @@ 
 		 sizeof(struct hinic_sq_task) + \
 		 (nr_sges) * sizeof(struct hinic_sq_bufdesc))
 
-#define HINIC_SCMD_DATA_LEN             16
+#define HINIC_SCMD_DATA_LEN                     16
+
+#define HINIC_MAX_SQ_BUFDESCS                   17
 
-#define HINIC_MAX_SQ_BUFDESCS           17
+#define HINIC_SQ_WQE_MAX_SIZE                   320
+#define HINIC_RQ_WQE_SIZE                       32
 
-#define HINIC_SQ_WQE_MAX_SIZE           320
-#define HINIC_RQ_WQE_SIZE               32
+#define HINIC_MSS_DEFAULT		        0x3E00
+#define HINIC_MSS_MIN		                0x50
 
 enum hinic_l4offload_type {
 	HINIC_L4_OFF_DISABLE            = 0,
@@ -211,6 +232,26 @@  enum hinic_pkt_parsed {
 	HINIC_PKT_PARSED     = 1,
 };
 
+enum hinic_l3_offload_type {
+	L3TYPE_UNKNOWN = 0,
+	IPV6_PKT = 1,
+	IPV4_PKT_NO_CHKSUM_OFFLOAD = 2,
+	IPV4_PKT_WITH_CHKSUM_OFFLOAD = 3,
+};
+
+enum hinic_l4_offload_type {
+	OFFLOAD_DISABLE     = 0,
+	TCP_OFFLOAD_ENABLE  = 1,
+	SCTP_OFFLOAD_ENABLE = 2,
+	UDP_OFFLOAD_ENABLE  = 3,
+};
+
+enum hinic_l4_tunnel_type {
+	NOT_TUNNEL,
+	TUNNEL_UDP_NO_CSUM,
+	TUNNEL_UDP_CSUM,
+};
+
 enum hinic_outer_l3type {
 	HINIC_OUTER_L3TYPE_UNKNOWN              = 0,
 	HINIC_OUTER_L3TYPE_IPV6                 = 1,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 4a8f82938ed5b..fdf2bdb6b0d06 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -805,7 +805,8 @@  static const struct net_device_ops hinic_netdev_ops = {
 
 static void netdev_features_init(struct net_device *netdev)
 {
-	netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA;
+	netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM |
+			      NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6;
 
 	netdev->vlan_features = netdev->hw_features;
 
@@ -863,6 +864,20 @@  static void link_status_event_handler(void *handle, void *buf_in, u16 in_size,
 	*out_size = sizeof(*ret_link_status);
 }
 
+static int set_features(struct hinic_dev *nic_dev,
+			netdev_features_t pre_features,
+			netdev_features_t features, bool force_change)
+{
+	netdev_features_t changed = force_change ? ~0 : pre_features ^ features;
+	int err = 0;
+
+	if (changed & NETIF_F_TSO)
+		err = hinic_port_set_tso(nic_dev, (features & NETIF_F_TSO) ?
+					 HINIC_TSO_ENABLE : HINIC_TSO_DISABLE);
+
+	return err;
+}
+
 /**
  * nic_dev_init - Initialize the NIC device
  * @pdev: the NIC pci device
@@ -963,7 +978,12 @@  static int nic_dev_init(struct pci_dev *pdev)
 	hinic_hwdev_cb_register(nic_dev->hwdev, HINIC_MGMT_MSG_CMD_LINK_STATUS,
 				nic_dev, link_status_event_handler);
 
+	err = set_features(nic_dev, 0, nic_dev->netdev->features, true);
+	if (err)
+		goto err_set_features;
+
 	SET_NETDEV_DEV(netdev, &pdev->dev);
+
 	err = register_netdev(netdev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to register netdev\n");
@@ -973,6 +993,7 @@  static int nic_dev_init(struct pci_dev *pdev)
 	return 0;
 
 err_reg_netdev:
+err_set_features:
 	hinic_hwdev_cb_unregister(nic_dev->hwdev,
 				  HINIC_MGMT_MSG_CMD_LINK_STATUS);
 	cancel_work_sync(&rx_mode_work->work);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c
index 4d4e3f05fb5fb..7575a7d3bd9f6 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c
@@ -377,3 +377,35 @@  int hinic_port_get_cap(struct hinic_dev *nic_dev,
 
 	return 0;
 }
+
+/**
+ * hinic_port_set_tso - set port tso configuration
+ * @nic_dev: nic device
+ * @state: the tso state to set
+ *
+ * Return 0 - Success, negative - Failure
+ **/
+int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state)
+{
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct hinic_tso_config tso_cfg = {0};
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	tso_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	tso_cfg.tso_en = state;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_TSO,
+				 &tso_cfg, sizeof(tso_cfg),
+				 &tso_cfg, &out_size);
+	if (err || out_size != sizeof(tso_cfg) || tso_cfg.status) {
+		dev_err(&pdev->dev,
+			"Failed to set port tso, ret = %d\n",
+			tso_cfg.status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.h b/drivers/net/ethernet/huawei/hinic/hinic_port.h
index 9404365195ddf..f6e3220fe28fc 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.h
@@ -72,6 +72,11 @@  enum hinic_speed {
 	HINIC_SPEED_UNKNOWN = 0xFF,
 };
 
+enum hinic_tso_state {
+	HINIC_TSO_DISABLE = 0,
+	HINIC_TSO_ENABLE  = 1,
+};
+
 struct hinic_port_mac_cmd {
 	u8              status;
 	u8              version;
@@ -167,6 +172,17 @@  struct hinic_port_cap {
 	u8      rsvd2[3];
 };
 
+struct hinic_tso_config {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	rsvd1;
+	u8	tso_en;
+	u8	resv2[3];
+};
+
 int hinic_port_add_mac(struct hinic_dev *nic_dev, const u8 *addr,
 		       u16 vlan_id);
 
@@ -195,4 +211,6 @@  int hinic_port_set_func_state(struct hinic_dev *nic_dev,
 int hinic_port_get_cap(struct hinic_dev *nic_dev,
 		       struct hinic_port_cap *port_cap);
 
+int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state);
+
 #endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index c5fca0356c9c9..11e73e67358d1 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -26,6 +26,13 @@ 
 #include <linux/skbuff.h>
 #include <linux/smp.h>
 #include <asm/byteorder.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/sctp.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
 
 #include "hinic_common.h"
 #include "hinic_hw_if.h"
@@ -45,9 +52,31 @@ 
 #define CI_UPDATE_NO_PENDING            0
 #define CI_UPDATE_NO_COALESC            0
 
-#define HW_CONS_IDX(sq)         be16_to_cpu(*(u16 *)((sq)->hw_ci_addr))
+#define HW_CONS_IDX(sq)                 be16_to_cpu(*(u16 *)((sq)->hw_ci_addr))
 
-#define MIN_SKB_LEN             64
+#define MIN_SKB_LEN                     17
+
+#define	MAX_PAYLOAD_OFFSET	        221
+#define TRANSPORT_OFFSET(l4_hdr, skb)	((u32)((l4_hdr) - (skb)->data))
+
+union hinic_l3 {
+	struct iphdr *v4;
+	struct ipv6hdr *v6;
+	unsigned char *hdr;
+};
+
+union hinic_l4 {
+	struct tcphdr *tcp;
+	struct udphdr *udp;
+	unsigned char *hdr;
+};
+
+enum hinic_offload_type {
+	TX_OFFLOAD_TSO     = BIT(0),
+	TX_OFFLOAD_CSUM    = BIT(1),
+	TX_OFFLOAD_VLAN    = BIT(2),
+	TX_OFFLOAD_INVALID = BIT(3),
+};
 
 /**
  * hinic_txq_clean_stats - Clean the statistics of specific queue
@@ -175,18 +204,263 @@  static void tx_unmap_skb(struct hinic_dev *nic_dev, struct sk_buff *skb,
 			 DMA_TO_DEVICE);
 }
 
+static void get_inner_l3_l4_type(struct sk_buff *skb, union hinic_l3 *ip,
+				 union hinic_l4 *l4,
+				 enum hinic_offload_type offload_type,
+				 enum hinic_l3_offload_type *l3_type,
+				 u8 *l4_proto)
+{
+	u8 *exthdr;
+
+	if (ip->v4->version == 4) {
+		*l3_type = (offload_type == TX_OFFLOAD_CSUM) ?
+			   IPV4_PKT_NO_CHKSUM_OFFLOAD :
+			   IPV4_PKT_WITH_CHKSUM_OFFLOAD;
+		*l4_proto = ip->v4->protocol;
+	} else if (ip->v4->version == 6) {
+		*l3_type = IPV6_PKT;
+		exthdr = ip->hdr + sizeof(*ip->v6);
+		*l4_proto = ip->v6->nexthdr;
+		if (exthdr != l4->hdr) {
+			int start = exthdr - skb->data;
+			__be16 frag_off;
+
+			ipv6_skip_exthdr(skb, start, l4_proto, &frag_off);
+		}
+	} else {
+		*l3_type = L3TYPE_UNKNOWN;
+		*l4_proto = 0;
+	}
+}
+
+static void get_inner_l4_info(struct sk_buff *skb, union hinic_l4 *l4,
+			      enum hinic_offload_type offload_type, u8 l4_proto,
+			      enum hinic_l4_offload_type *l4_offload,
+			      u32 *l4_len, u32 *offset)
+{
+	*l4_offload = OFFLOAD_DISABLE;
+	*offset = 0;
+	*l4_len = 0;
+
+	switch (l4_proto) {
+	case IPPROTO_TCP:
+		*l4_offload = TCP_OFFLOAD_ENABLE;
+		/* doff in unit of 4B */
+		*l4_len = l4->tcp->doff * 4;
+		*offset = *l4_len + TRANSPORT_OFFSET(l4->hdr, skb);
+		break;
+
+	case IPPROTO_UDP:
+		*l4_offload = UDP_OFFLOAD_ENABLE;
+		*l4_len = sizeof(struct udphdr);
+		*offset = TRANSPORT_OFFSET(l4->hdr, skb);
+		break;
+
+	case IPPROTO_SCTP:
+		/* only csum offload support sctp */
+		if (offload_type != TX_OFFLOAD_CSUM)
+			break;
+
+		*l4_offload = SCTP_OFFLOAD_ENABLE;
+		*l4_len = sizeof(struct sctphdr);
+		*offset = TRANSPORT_OFFSET(l4->hdr, skb);
+		break;
+
+	default:
+		break;
+	}
+}
+
+static __sum16 csum_magic(union hinic_l3 *ip, unsigned short proto)
+{
+	return (ip->v4->version == 4) ?
+		csum_tcpudp_magic(ip->v4->saddr, ip->v4->daddr, 0, proto, 0) :
+		csum_ipv6_magic(&ip->v6->saddr, &ip->v6->daddr, 0, proto, 0);
+}
+
+static int offload_tso(struct hinic_sq_task *task, u32 *queue_info,
+		       struct sk_buff *skb)
+{
+	u32 offset, l4_len, ip_identify, network_hdr_len;
+	enum hinic_l3_offload_type l3_offload;
+	enum hinic_l4_offload_type l4_offload;
+	union hinic_l3 ip;
+	union hinic_l4 l4;
+	u8 l4_proto;
+
+	if (!skb_is_gso(skb))
+		return 0;
+
+	if (skb_cow_head(skb, 0) < 0)
+		return -EPROTONOSUPPORT;
+
+	if (skb->encapsulation) {
+		u32 gso_type = skb_shinfo(skb)->gso_type;
+		u32 tunnel_type = 0;
+		u32 l4_tunnel_len;
+
+		ip.hdr = skb_network_header(skb);
+		l4.hdr = skb_transport_header(skb);
+		network_hdr_len = skb_inner_network_header_len(skb);
+
+		if (ip.v4->version == 4) {
+			ip.v4->tot_len = 0;
+			l3_offload = IPV4_PKT_WITH_CHKSUM_OFFLOAD;
+		} else if (ip.v4->version == 6) {
+			l3_offload = IPV6_PKT;
+		} else {
+			l3_offload = 0;
+		}
+
+		hinic_task_set_outter_l3(task, l3_offload,
+					 skb_network_header_len(skb));
+
+		if (gso_type & SKB_GSO_UDP_TUNNEL_CSUM) {
+			l4.udp->check = ~csum_magic(&ip, IPPROTO_UDP);
+			tunnel_type = TUNNEL_UDP_CSUM;
+		} else if (gso_type & SKB_GSO_UDP_TUNNEL) {
+			tunnel_type = TUNNEL_UDP_NO_CSUM;
+		}
+
+		l4_tunnel_len = skb_inner_network_offset(skb) -
+				skb_transport_offset(skb);
+		hinic_task_set_tunnel_l4(task, tunnel_type, l4_tunnel_len);
+
+		ip.hdr = skb_inner_network_header(skb);
+		l4.hdr = skb_inner_transport_header(skb);
+	} else {
+		ip.hdr = skb_network_header(skb);
+		l4.hdr = skb_transport_header(skb);
+		network_hdr_len = skb_network_header_len(skb);
+	}
+
+	/* initialize inner IP header fields */
+	if (ip.v4->version == 4)
+		ip.v4->tot_len = 0;
+	else
+		ip.v6->payload_len = 0;
+
+	get_inner_l3_l4_type(skb, &ip, &l4, TX_OFFLOAD_TSO, &l3_offload,
+			     &l4_proto);
+
+	hinic_task_set_inner_l3(task, l3_offload, network_hdr_len);
+
+	ip_identify = 0;
+	if (l4_proto == IPPROTO_TCP)
+		l4.tcp->check = ~csum_magic(&ip, IPPROTO_TCP);
+
+	get_inner_l4_info(skb, &l4, TX_OFFLOAD_TSO, l4_proto, &l4_offload,
+			  &l4_len, &offset);
+
+	hinic_set_tso_inner_l4(task, queue_info, l4_offload, l4_len, offset,
+			       ip_identify, skb_shinfo(skb)->gso_size);
+
+	return 1;
+}
+
+static int offload_csum(struct hinic_sq_task *task, u32 *queue_info,
+			struct sk_buff *skb)
+{
+	enum hinic_l4_offload_type l4_offload;
+	u32 offset, l4_len, network_hdr_len;
+	enum hinic_l3_offload_type l3_type;
+	union hinic_l3 ip;
+	union hinic_l4 l4;
+	u8 l4_proto;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	if (skb->encapsulation) {
+		u32 l4_tunnel_len;
+
+		ip.hdr = skb_network_header(skb);
+
+		if (ip.v4->version == 4)
+			l3_type = IPV4_PKT_NO_CHKSUM_OFFLOAD;
+		else if (ip.v4->version == 6)
+			l3_type = IPV6_PKT;
+		else
+			l3_type = L3TYPE_UNKNOWN;
+
+		hinic_task_set_outter_l3(task, l3_type,
+					 skb_network_header_len(skb));
+
+		l4_tunnel_len = skb_inner_network_offset(skb) -
+				skb_transport_offset(skb);
+
+		hinic_task_set_tunnel_l4(task, TUNNEL_UDP_NO_CSUM,
+					 l4_tunnel_len);
+
+		ip.hdr = skb_inner_network_header(skb);
+		l4.hdr = skb_inner_transport_header(skb);
+		network_hdr_len = skb_inner_network_header_len(skb);
+	} else {
+		ip.hdr = skb_network_header(skb);
+		l4.hdr = skb_transport_header(skb);
+		network_hdr_len = skb_network_header_len(skb);
+	}
+
+	get_inner_l3_l4_type(skb, &ip, &l4, TX_OFFLOAD_CSUM, &l3_type,
+			     &l4_proto);
+
+	hinic_task_set_inner_l3(task, l3_type, network_hdr_len);
+
+	get_inner_l4_info(skb, &l4, TX_OFFLOAD_CSUM, l4_proto, &l4_offload,
+			  &l4_len, &offset);
+
+	hinic_set_cs_inner_l4(task, queue_info, l4_offload, l4_len, offset);
+
+	return 1;
+}
+
+static int hinic_tx_offload(struct sk_buff *skb, struct hinic_sq_task *task,
+			    u32 *queue_info)
+{
+	enum hinic_offload_type offload = 0;
+	int enabled;
+
+	enabled = offload_tso(task, queue_info, skb);
+	if (enabled > 0) {
+		offload |= TX_OFFLOAD_TSO;
+	} else if (enabled == 0) {
+		enabled = offload_csum(task, queue_info, skb);
+		if (enabled)
+			offload |= TX_OFFLOAD_CSUM;
+	} else {
+		return -EPROTONOSUPPORT;
+	}
+
+	if (offload)
+		hinic_task_set_l2hdr(task, skb_network_offset(skb));
+
+	/* payload offset should not more than 221 */
+	if (HINIC_SQ_CTRL_GET(*queue_info, QUEUE_INFO_PLDOFF) >
+	    MAX_PAYLOAD_OFFSET) {
+		return -EPROTONOSUPPORT;
+	}
+
+	/* mss should not less than 80 */
+	if (HINIC_SQ_CTRL_GET(*queue_info, QUEUE_INFO_MSS) < HINIC_MSS_MIN) {
+		*queue_info = HINIC_SQ_CTRL_CLEAR(*queue_info, QUEUE_INFO_MSS);
+		*queue_info |= HINIC_SQ_CTRL_SET(HINIC_MSS_MIN, QUEUE_INFO_MSS);
+	}
+
+	return 0;
+}
+
 netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	u16 prod_idx, q_id = skb->queue_mapping;
 	struct netdev_queue *netdev_txq;
 	int nr_sges, err = NETDEV_TX_OK;
 	struct hinic_sq_wqe *sq_wqe;
 	unsigned int wqe_size;
 	struct hinic_txq *txq;
 	struct hinic_qp *qp;
-	u16 prod_idx;
 
-	txq = &nic_dev->txqs[skb->queue_mapping];
+	txq = &nic_dev->txqs[q_id];
 	qp = container_of(txq->sq, struct hinic_qp, sq);
 
 	if (skb->len < MIN_SKB_LEN) {
@@ -236,15 +510,23 @@  netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 process_sq_wqe:
 	hinic_sq_prepare_wqe(txq->sq, prod_idx, sq_wqe, txq->sges, nr_sges);
 
+	err = hinic_tx_offload(skb, &sq_wqe->task, &sq_wqe->ctrl.queue_info);
+	if (err)
+		goto offload_error;
+
 	hinic_sq_write_wqe(txq->sq, prod_idx, sq_wqe, skb, wqe_size);
 
 flush_skbs:
-	netdev_txq = netdev_get_tx_queue(netdev, skb->queue_mapping);
+	netdev_txq = netdev_get_tx_queue(netdev, q_id);
 	if ((!skb->xmit_more) || (netif_xmit_stopped(netdev_txq)))
 		hinic_sq_write_db(txq->sq, prod_idx, wqe_size, 0);
 
 	return err;
 
+offload_error:
+	hinic_sq_return_wqe(txq->sq, wqe_size);
+	tx_unmap_skb(nic_dev, skb, txq->sges);
+
 skb_error:
 	dev_kfree_skb_any(skb);
 
@@ -252,7 +534,8 @@  netdev_tx_t hinic_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	u64_stats_update_begin(&txq->txq_stats.syncp);
 	txq->txq_stats.tx_dropped++;
 	u64_stats_update_end(&txq->txq_stats.syncp);
-	return err;
+
+	return NETDEV_TX_OK;
 }
 
 /**