From patchwork Tue Aug 8 14:21:53 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Roi Dayan X-Patchwork-Id: 799211 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Authentication-Results: ozlabs.org; spf=pass (mailfrom) smtp.mailfrom=openvswitch.org (client-ip=140.211.169.12; helo=mail.linuxfoundation.org; envelope-from=ovs-dev-bounces@openvswitch.org; receiver=) Received: from mail.linuxfoundation.org (mail.linuxfoundation.org [140.211.169.12]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 3xRc9n2rkwz9s4s for ; Wed, 9 Aug 2017 00:25:01 +1000 (AEST) Received: from mail.linux-foundation.org (localhost [127.0.0.1]) by mail.linuxfoundation.org (Postfix) with ESMTP id 182CEB0A; Tue, 8 Aug 2017 14:23:11 +0000 (UTC) X-Original-To: dev@openvswitch.org Delivered-To: ovs-dev@mail.linuxfoundation.org Received: from smtp1.linuxfoundation.org (smtp1.linux-foundation.org [172.17.192.35]) by mail.linuxfoundation.org (Postfix) with ESMTPS id 5D836A86 for ; Tue, 8 Aug 2017 14:23:09 +0000 (UTC) X-Greylist: domain auto-whitelisted by SQLgrey-1.7.6 Received: from mellanox.co.il (mail-il-dmz.mellanox.com [193.47.165.129]) by smtp1.linuxfoundation.org (Postfix) with ESMTP id 29FAF1BB for ; Tue, 8 Aug 2017 14:22:10 +0000 (UTC) Received: from Internal Mail-Server by MTLPINE1 (envelope-from roid@mellanox.com) with ESMTPS (AES256-SHA encrypted); 8 Aug 2017 17:21:56 +0300 Received: from dev-r-vrt-189.mtr.labs.mlnx (dev-r-vrt-189.mtr.labs.mlnx [10.212.189.1]) by labmailer.mlnx (8.13.8/8.13.8) with ESMTP id v78ELtaC017970; Tue, 8 Aug 2017 17:21:55 +0300 From: Roi Dayan To: dev@openvswitch.org Date: Tue, 8 Aug 2017 17:21:53 +0300 Message-Id: <1502202114-57266-4-git-send-email-roid@mellanox.com> X-Mailer: git-send-email 2.8.0 In-Reply-To: <1502202114-57266-1-git-send-email-roid@mellanox.com> References: <1502202114-57266-1-git-send-email-roid@mellanox.com> X-Spam-Status: No, score=-1.9 required=5.0 tests=BAYES_00,RP_MATCHES_RCVD, UNPARSEABLE_RELAY autolearn=unavailable version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on smtp1.linux-foundation.org Cc: Simon Horman Subject: [ovs-dev] [PATCH 3/4] tc: Add header rewrite using tc pedit action X-BeenThere: ovs-dev@openvswitch.org X-Mailman-Version: 2.1.12 Precedence: list List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , MIME-Version: 1.0 Sender: ovs-dev-bounces@openvswitch.org Errors-To: ovs-dev-bounces@openvswitch.org From: Paul Blakey To be later used to implement ovs action set offloading. Signed-off-by: Paul Blakey Reviewed-by: Roi Dayan --- lib/tc.c | 372 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- lib/tc.h | 12 +++ 2 files changed, 381 insertions(+), 3 deletions(-) diff --git a/lib/tc.c b/lib/tc.c index 5c36d0d..4cece17 100644 --- a/lib/tc.c +++ b/lib/tc.c @@ -21,8 +21,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -33,11 +35,14 @@ #include "netlink-socket.h" #include "netlink.h" #include "openvswitch/ofpbuf.h" +#include "openvswitch/util.h" #include "openvswitch/vlog.h" #include "packets.h" #include "timeval.h" #include "unaligned.h" +#define MAX_PEDIT_OFFSETS 8 + VLOG_DEFINE_THIS_MODULE(tc); static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5); @@ -50,6 +55,80 @@ enum tc_offload_policy { static enum tc_offload_policy tc_policy = TC_POLICY_NONE; +struct tc_pedit_key_ex { + enum pedit_header_type htype; + enum pedit_cmd cmd; +}; + +struct flower_key_to_pedit { + enum pedit_header_type htype; + int offset; + int size; +}; + +static struct flower_key_to_pedit flower_pedit_map[] = { + [offsetof(struct tc_flower_key, ipv4.ipv4_src)] = { + TCA_PEDIT_KEY_EX_HDR_TYPE_IP4, + 12, + MEMBER_SIZEOF(struct tc_flower_key, ipv4.ipv4_src) + }, + [offsetof(struct tc_flower_key, ipv4.ipv4_dst)] = { + TCA_PEDIT_KEY_EX_HDR_TYPE_IP4, + 16, + MEMBER_SIZEOF(struct tc_flower_key, ipv4.ipv4_dst) + }, + [offsetof(struct tc_flower_key, ipv4.rewrite_ttl)] = { + TCA_PEDIT_KEY_EX_HDR_TYPE_IP4, + 8, + MEMBER_SIZEOF(struct tc_flower_key, ipv4.rewrite_ttl) + }, + [offsetof(struct tc_flower_key, ipv6.ipv6_src)] = { + TCA_PEDIT_KEY_EX_HDR_TYPE_IP6, + 8, + MEMBER_SIZEOF(struct tc_flower_key, ipv6.ipv6_src) + }, + [offsetof(struct tc_flower_key, ipv6.ipv6_dst)] = { + TCA_PEDIT_KEY_EX_HDR_TYPE_IP6, + 24, + MEMBER_SIZEOF(struct tc_flower_key, ipv6.ipv6_dst) + }, + [offsetof(struct tc_flower_key, src_mac)] = { + TCA_PEDIT_KEY_EX_HDR_TYPE_ETH, + 6, + MEMBER_SIZEOF(struct tc_flower_key, src_mac) + }, + [offsetof(struct tc_flower_key, dst_mac)] = { + TCA_PEDIT_KEY_EX_HDR_TYPE_ETH, + 0, + MEMBER_SIZEOF(struct tc_flower_key, dst_mac) + }, + [offsetof(struct tc_flower_key, eth_type)] = { + TCA_PEDIT_KEY_EX_HDR_TYPE_ETH, + 12, + MEMBER_SIZEOF(struct tc_flower_key, eth_type) + }, + [offsetof(struct tc_flower_key, tcp_src)] = { + TCA_PEDIT_KEY_EX_HDR_TYPE_TCP, + 0, + MEMBER_SIZEOF(struct tc_flower_key, tcp_src) + }, + [offsetof(struct tc_flower_key, tcp_dst)] = { + TCA_PEDIT_KEY_EX_HDR_TYPE_TCP, + 2, + MEMBER_SIZEOF(struct tc_flower_key, tcp_dst) + }, + [offsetof(struct tc_flower_key, udp_src)] = { + TCA_PEDIT_KEY_EX_HDR_TYPE_UDP, + 0, + MEMBER_SIZEOF(struct tc_flower_key, udp_src) + }, + [offsetof(struct tc_flower_key, udp_dst)] = { + TCA_PEDIT_KEY_EX_HDR_TYPE_UDP, + 2, + MEMBER_SIZEOF(struct tc_flower_key, udp_dst) + }, +}; + struct tcmsg * tc_make_request(int ifindex, int type, unsigned int flags, struct ofpbuf *request) @@ -346,6 +425,96 @@ nl_parse_flower_ip(struct nlattr **attrs, struct tc_flower *flower) { } } +static const struct nl_policy pedit_policy[] = { + [TCA_PEDIT_PARMS_EX] = { .type = NL_A_UNSPEC, + .min_len = sizeof(struct tc_pedit), + .optional = false, }, + [TCA_PEDIT_KEYS_EX] = { .type = NL_A_NESTED, + .optional = false, }, +}; + +static int +nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower) +{ + struct nlattr *pe_attrs[ARRAY_SIZE(pedit_policy)]; + const struct tc_pedit *pe; + const struct tc_pedit_key *keys; + const struct nlattr *nla, *keys_ex, *ex_type; + const void *keys_attr; + char *rewrite_key = (void *) &flower->rewrite.key; + char *rewrite_mask = (void *) &flower->rewrite.mask; + size_t keys_ex_size, left; + int type, i = 0; + + if (!nl_parse_nested(options, pedit_policy, pe_attrs, + ARRAY_SIZE(pedit_policy))) { + VLOG_ERR_RL(&error_rl, "failed to parse pedit action options"); + return EPROTO; + } + + pe = nl_attr_get_unspec(pe_attrs[TCA_PEDIT_PARMS_EX], sizeof *pe); + keys = pe->keys; + keys_attr = pe_attrs[TCA_PEDIT_KEYS_EX]; + keys_ex = nl_attr_get(keys_attr); + keys_ex_size = nl_attr_get_size(keys_attr); + + NL_ATTR_FOR_EACH(nla, left, keys_ex, keys_ex_size) { + if (i >= pe->nkeys) { + break; + } + + if (nl_attr_type(nla) == TCA_PEDIT_KEY_EX) { + ex_type = nl_attr_find_nested(nla, TCA_PEDIT_KEY_EX_HTYPE); + type = nl_attr_get_u16(ex_type); + + for (int j = 0; j < ARRAY_SIZE(flower_pedit_map); j++) { + struct flower_key_to_pedit *m = &flower_pedit_map[j]; + int flower_off = j; + int sz = m->size; + int mf = m->offset; + + if (!sz || m->htype != type) { + continue; + } + + /* check overlap between current pedit key, which is always + * 4 bytes (range [off, off + 3]), and a map entry in + * flower_pedit_map (range [mf, mf + sz - 1]) */ + if ((keys->off >= mf && keys->off < mf + sz) + || (keys->off + 3 >= mf && keys->off + 3 < mf + sz)) { + int diff = flower_off + (keys->off - mf); + uint32_t *dst = (void *) (rewrite_key + diff); + uint32_t *dst_m = (void *) (rewrite_mask + diff); + uint32_t mask = ~(keys->mask); + uint32_t zero_bits; + + if (keys->off < mf) { + zero_bits = 8 * (mf - keys->off); + mask &= UINT32_MAX << zero_bits; + } else if (keys->off + 4 > mf + m->size) { + zero_bits = 8 * (keys->off + 4 - mf - m->size); + mask &= UINT32_MAX >> zero_bits; + } + + *dst_m |= mask; + *dst |= keys->val & mask; + } + } + } else { + VLOG_ERR_RL(&error_rl, "unable to parse legacy pedit type: %d", + nl_attr_type(nla)); + return EOPNOTSUPP; + } + + keys++; + i++; + } + + flower->rewrite.rewrite = true; + + return 0; +} + static const struct nl_policy tunnel_key_policy[] = { [TCA_TUNNEL_KEY_PARMS] = { .type = NL_A_UNSPEC, .min_len = sizeof(struct tc_tunnel_key), @@ -589,6 +758,10 @@ nl_parse_single_action(struct nlattr *action, struct tc_flower *flower) nl_parse_act_vlan(act_options, flower); } else if (!strcmp(act_kind, "tunnel_key")) { nl_parse_act_tunnel_key(act_options, flower); + } else if (!strcmp(act_kind, "pedit")) { + nl_parse_act_pedit(act_options, flower); + } else if (!strcmp(act_kind, "csum")) { + /* not doing anything for now */ } else { VLOG_ERR_RL(&error_rl, "unknown tc action kind: %s", act_kind); return EINVAL; @@ -790,6 +963,48 @@ tc_get_tc_cls_policy(enum tc_offload_policy policy) } static void +nl_msg_put_act_csum(struct ofpbuf *request, uint32_t flags) +{ + size_t offset; + + nl_msg_put_string(request, TCA_ACT_KIND, "csum"); + offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS); + { + struct tc_csum parm = { .action = TC_ACT_PIPE, + .update_flags = flags }; + + nl_msg_put_unspec(request, TCA_CSUM_PARMS, &parm, sizeof parm); + } + nl_msg_end_nested(request, offset); +} + +static void +nl_msg_put_act_pedit(struct ofpbuf *request, struct tc_pedit *parm, + struct tc_pedit_key_ex *ex) +{ + size_t ksize = sizeof *parm + (parm->nkeys * sizeof(struct tc_pedit_key)); + size_t offset, offset_keys_ex, offset_key; + int i; + + nl_msg_put_string(request, TCA_ACT_KIND, "pedit"); + offset = nl_msg_start_nested(request, TCA_ACT_OPTIONS); + { + parm->action = TC_ACT_PIPE; + + nl_msg_put_unspec(request, TCA_PEDIT_PARMS_EX, parm, ksize); + offset_keys_ex = nl_msg_start_nested(request, TCA_PEDIT_KEYS_EX); + for (i = 0; i < parm->nkeys; i++, ex++) { + offset_key = nl_msg_start_nested(request, TCA_PEDIT_KEY_EX); + nl_msg_put_u16(request, TCA_PEDIT_KEY_EX_HTYPE, ex->htype); + nl_msg_put_u16(request, TCA_PEDIT_KEY_EX_CMD, ex->cmd); + nl_msg_end_nested(request, offset_key); + } + nl_msg_end_nested(request, offset_keys_ex); + } + nl_msg_end_nested(request, offset); +} + +static void nl_msg_put_act_push_vlan(struct ofpbuf *request, uint16_t vid, uint8_t prio) { size_t offset; @@ -911,7 +1126,129 @@ nl_msg_put_act_cookie(struct ofpbuf *request, struct tc_cookie *ck) { } } +/* Given flower, a key_to_pedit map entry and src_offset, calculates the rest, + * where: + * + * src_offset - offset in tc_flower_key of where to read the value. + * mask, data - pointers of where read the first word of flower->key/mask. + * current_offset - which offset to use for the first pedit action. + * cnt - max pedits actions to use. + * first_word_mask/last_word_mask - the mask to use for the first/last read + * (as we read entire words). */ static void +calc_offsets(struct tc_flower *flower, struct flower_key_to_pedit *m, + int src_offset, int *cur_offset, int *cnt, + uint32_t *last_word_mask, uint32_t *first_word_mask, + uint32_t **mask, uint32_t **data) +{ + int start_offset, max_offset, total_size; + int diff, right_zero_bits, left_zero_bits; + char *rewrite_key = (void *) &flower->rewrite.key; + char *rewrite_mask = (void *) &flower->rewrite.mask; + + max_offset = m->offset + m->size; + start_offset = ROUND_DOWN(m->offset, 4); + diff = m->offset - start_offset; + total_size = max_offset - start_offset; + right_zero_bits = 8 * (4 - (max_offset % 4)); + left_zero_bits = 8 * (m->offset - start_offset); + + *cur_offset = start_offset; + *cnt = (total_size / 4) + (total_size % 4 ? 1 : 0); + *last_word_mask = UINT32_MAX >> right_zero_bits; + *first_word_mask = UINT32_MAX << left_zero_bits; + *data = (void *) (rewrite_key + src_offset - diff); + *mask = (void *) (rewrite_mask + src_offset - diff); +} + +static inline void +csum_update_flag(struct tc_flower *flower, + enum pedit_header_type htype) { + if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4) { + flower->csum_update_flags |= TCA_CSUM_UPDATE_FLAG_IPV4HDR; + } + if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 + || htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6 + || htype == TCA_PEDIT_KEY_EX_HDR_TYPE_TCP + || htype == TCA_PEDIT_KEY_EX_HDR_TYPE_UDP) { + if (flower->key.ip_proto == IPPROTO_TCP) { + flower->mask.ip_proto = UINT8_MAX; + flower->csum_update_flags |= TCA_CSUM_UPDATE_FLAG_TCP; + } else if (flower->key.ip_proto == IPPROTO_UDP) { + flower->mask.ip_proto = UINT8_MAX; + flower->csum_update_flags |= TCA_CSUM_UPDATE_FLAG_UDP; + } else if (flower->key.ip_proto == IPPROTO_ICMP + || flower->key.ip_proto == IPPROTO_ICMPV6) { + flower->mask.ip_proto = UINT8_MAX; + flower->csum_update_flags |= TCA_CSUM_UPDATE_FLAG_ICMP; + } + } +} + +static int +nl_msg_put_flower_rewrite_pedits(struct ofpbuf *request, + struct tc_flower *flower) +{ + struct { + struct tc_pedit sel; + struct tc_pedit_key keys[MAX_PEDIT_OFFSETS]; + struct tc_pedit_key_ex keys_ex[MAX_PEDIT_OFFSETS]; + } sel = { + .sel = { + .nkeys = 0 + } + }; + int i, j; + + for (i = 0; i < ARRAY_SIZE(flower_pedit_map); i++) { + struct flower_key_to_pedit *m = &flower_pedit_map[i]; + struct tc_pedit_key *pedit_key = NULL; + struct tc_pedit_key_ex *pedit_key_ex = NULL; + uint32_t *mask, *data, first_word_mask, last_word_mask; + int cnt = 0, cur_offset = 0; + + if (!m->size) { + continue; + } + + calc_offsets(flower, m, i, &cur_offset, &cnt, &last_word_mask, + &first_word_mask, &mask, &data); + + for (j = 0; j < cnt; j++, mask++, data++, cur_offset += 4) { + uint32_t mask_word = *mask; + + if (j == 0) { + mask_word &= first_word_mask; + } + if (j == cnt - 1) { + mask_word &= last_word_mask; + } + if (!mask_word) { + continue; + } + if (sel.sel.nkeys == MAX_PEDIT_OFFSETS) { + VLOG_ERR_RL(&error_rl, "reached too many pedit offsets: %d", + MAX_PEDIT_OFFSETS); + return EOPNOTSUPP; + } + + pedit_key = &sel.keys[sel.sel.nkeys]; + pedit_key_ex = &sel.keys_ex[sel.sel.nkeys]; + pedit_key_ex->cmd = TCA_PEDIT_KEY_EX_CMD_SET; + pedit_key_ex->htype = m->htype; + pedit_key->off = cur_offset; + pedit_key->mask = ~mask_word; + pedit_key->val = *data & mask_word; + sel.sel.nkeys++; + csum_update_flag(flower, m->htype); + } + } + nl_msg_put_act_pedit(request, &sel.sel, sel.keys_ex); + + return 0; +} + +static int nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) { size_t offset; @@ -920,7 +1257,20 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) offset = nl_msg_start_nested(request, TCA_FLOWER_ACT); { uint16_t act_index = 1; + int error; + + if (flower->rewrite.rewrite) { + act_offset = nl_msg_start_nested(request, act_index++); + error = nl_msg_put_flower_rewrite_pedits(request, flower); + if (error) { + return error; + } + nl_msg_end_nested(request, act_offset); + act_offset = nl_msg_start_nested(request, act_index++); + nl_msg_put_act_csum(request, flower->csum_update_flags); + nl_msg_end_nested(request, act_offset); + } if (flower->set.set) { act_offset = nl_msg_start_nested(request, act_index++); nl_msg_put_act_tunnel_key_set(request, flower->set.id, @@ -961,6 +1311,8 @@ nl_msg_put_flower_acts(struct ofpbuf *request, struct tc_flower *flower) } } nl_msg_end_nested(request, offset); + + return 0; } static void @@ -1002,11 +1354,19 @@ nl_msg_put_flower_tunnel(struct ofpbuf *request, struct tc_flower *flower) nl_msg_put_masked_value(request, type, type##_MASK, &flower->key.member, \ &flower->mask.member, sizeof flower->key.member) -static void +static int nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower) { + uint16_t host_eth_type = ntohs(flower->key.eth_type); bool is_vlan = (host_eth_type == ETH_TYPE_VLAN); + int err; + + /* need to parse acts first as some acts require changing the matching */ + err = nl_msg_put_flower_acts(request, flower); + if (err) { + return err; + } if (is_vlan) { host_eth_type = ntohs(flower->key.encap_eth_type); @@ -1062,7 +1422,7 @@ nl_msg_put_flower_options(struct ofpbuf *request, struct tc_flower *flower) nl_msg_put_flower_tunnel(request, flower); } - nl_msg_put_flower_acts(request, flower); + return 0; } int @@ -1085,11 +1445,17 @@ tc_replace_flower(int ifindex, uint16_t prio, uint32_t handle, nl_msg_put_string(&request, TCA_KIND, "flower"); basic_offset = nl_msg_start_nested(&request, TCA_OPTIONS); { - nl_msg_put_flower_options(&request, flower); + error = nl_msg_put_flower_options(&request, flower); + + if (error) { + ofpbuf_uninit(&request); + return error; + } } nl_msg_end_nested(&request, basic_offset); error = tc_transact(&request, &reply); + if (!error) { struct tcmsg *tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc); diff --git a/lib/tc.h b/lib/tc.h index 5f363d0..2269a22 100644 --- a/lib/tc.h +++ b/lib/tc.h @@ -93,6 +93,7 @@ struct tc_flower_key { struct { ovs_be32 ipv4_src; ovs_be32 ipv4_dst; + uint8_t rewrite_ttl; } ipv4; struct { struct in6_addr ipv6_src; @@ -117,6 +118,17 @@ struct tc_flower { uint64_t lastused; struct { + bool rewrite; + uint8_t pad1[3]; + struct tc_flower_key key; + uint8_t pad2[3]; + struct tc_flower_key mask; + uint8_t pad3[3]; + } rewrite; + + uint32_t csum_update_flags; + + struct { bool set; ovs_be64 id; ovs_be16 tp_src;