@@ -620,9 +620,7 @@ bool mlx5e_tc_tun_encap_info_equal_options(struct mlx5e_encap_key *a,
b_info = container_of(b->ip_tun_key, struct ip_tunnel_info, key);
return a_info->options_len == b_info->options_len &&
- !memcmp(ip_tunnel_info_opts(a_info),
- ip_tunnel_info_opts(b_info),
- a_info->options_len);
+ !memcmp(a_info->options, b_info->options, a_info->options_len);
}
static int cmp_decap_info(struct mlx5e_decap_key *a,
@@ -100,7 +100,7 @@ static int mlx5e_gen_ip_tunnel_header_vxlan(char buf[],
vxh->vx_flags = VXLAN_HF_VNI;
vxh->vx_vni = vxlan_vni_field(tun_id);
if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, tun_key->tun_flags)) {
- md = ip_tunnel_info_opts(e->tun_info);
+ md = (struct vxlan_metadata *)e->tun_info->options;
vxlan_build_gbp_hdr(vxh, md);
}
@@ -333,7 +333,7 @@ nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len,
{
struct ip_tunnel_info *ip_tun = (struct ip_tunnel_info *)act->tunnel;
int opt_len, opt_cnt, act_start, tot_push_len;
- u8 *src = ip_tunnel_info_opts(ip_tun);
+ u8 *src = ip_tun->options;
/* We need to populate the options in reverse order for HW.
* Therefore we go through the options, calculating the
@@ -370,7 +370,7 @@ nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len,
act_start = *list_len;
*list_len += tot_push_len;
- src = ip_tunnel_info_opts(ip_tun);
+ src = ip_tun->options;
while (opt_cnt) {
struct geneve_opt *opt = (struct geneve_opt *)src;
struct nfp_fl_push_geneve *push;
@@ -71,7 +71,7 @@ static int pfcp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (unlikely(!tun_dst))
goto drop;
- md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
+ md = (struct pfcp_metadata *)tun_dst->u.tun_info.options;
if (unlikely(!md))
goto drop;
@@ -1756,7 +1756,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
goto drop;
}
- md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
+ md = (struct vxlan_metadata *)tun_dst->u.tun_info.options;
skb_dst_set(skb, (struct dst_entry *)tun_dst);
} else {
@@ -2459,7 +2459,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (test_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags)) {
if (info->options_len < sizeof(*md))
goto drop;
- md = ip_tunnel_info_opts(info);
+ md = (struct vxlan_metadata *)info->options;
}
ttl = info->key.ttl;
tos = info->key.tos;
@@ -163,11 +163,8 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb)
if (!new_md)
return ERR_PTR(-ENOMEM);
- unsafe_memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
- sizeof(struct ip_tunnel_info) + md_size,
- /* metadata_dst_alloc() reserves room (md_size bytes) for
- * options right after the ip_tunnel_info struct.
- */);
+ memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
+ sizeof(struct ip_tunnel_info) + md_size);
#ifdef CONFIG_DST_CACHE
/* Unclone the dst cache if there is one */
if (new_md->u.tun_info.dst_cache.cache) {
@@ -93,12 +93,6 @@ struct ip_tunnel_encap {
GENMASK((sizeof_field(struct ip_tunnel_info, \
options_len) * BITS_PER_BYTE) - 1, 0)
-#define ip_tunnel_info_opts(info) \
- _Generic(info, \
- const struct ip_tunnel_info * : ((const void *)((info) + 1)),\
- struct ip_tunnel_info * : ((void *)((info) + 1))\
- )
-
struct ip_tunnel_info {
struct ip_tunnel_key key;
struct ip_tunnel_encap encap;
@@ -107,6 +101,7 @@ struct ip_tunnel_info {
#endif
u8 options_len;
u8 mode;
+ u8 options[] __aligned(sizeof(void *)) __counted_by(options_len);
};
/* 6rd prefix/relay information */
@@ -650,7 +645,7 @@ static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
static inline void ip_tunnel_info_opts_get(void *to,
const struct ip_tunnel_info *info)
{
- memcpy(to, info + 1, info->options_len);
+ memcpy(to, info->options, info->options_len);
}
static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
@@ -659,7 +654,7 @@ static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info,
{
info->options_len = len;
if (len > 0) {
- memcpy(ip_tunnel_info_opts(info), from, len);
+ memcpy(info->options, from, len);
ip_tunnel_flags_or(info->key.tun_flags, info->key.tun_flags,
flags);
}
@@ -286,7 +286,8 @@ struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type,
{
struct metadata_dst *md_dst;
- md_dst = kmalloc(sizeof(*md_dst) + optslen, flags);
+ md_dst = kmalloc(struct_size(md_dst, u.tun_info.options, optslen),
+ flags);
if (!md_dst)
return NULL;
@@ -314,7 +315,8 @@ metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags)
int cpu;
struct metadata_dst __percpu *md_dst;
- md_dst = __alloc_percpu_gfp(sizeof(struct metadata_dst) + optslen,
+ md_dst = __alloc_percpu_gfp(struct_size(md_dst, u.tun_info.options,
+ optslen),
__alignof__(struct metadata_dst), flags);
if (!md_dst)
return NULL;
@@ -334,7 +334,7 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
skb_network_header_len(skb);
pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
sizeof(*ershdr));
- md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
+ md = (struct erspan_metadata *)tun_dst->u.tun_info.options;
md->version = ver;
md2 = &md->u.md2;
memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
@@ -556,7 +556,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb;
if (tun_info->options_len < sizeof(*md))
goto err_free_skb;
- md = ip_tunnel_info_opts(tun_info);
+ md = (struct erspan_metadata *)tun_info->options;
/* ERSPAN has fixed 8 byte GRE header */
version = md->version;
@@ -147,8 +147,7 @@ struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
dst->key.u.ipv4.dst = src->key.u.ipv4.src;
ip_tunnel_flags_copy(dst->key.tun_flags, src->key.tun_flags);
dst->mode = src->mode | IP_TUNNEL_INFO_TX;
- ip_tunnel_info_opts_set(dst, ip_tunnel_info_opts(src),
- src->options_len, tun_flags);
+ ip_tunnel_info_opts_set(dst, src->options, src->options_len, tun_flags);
return res;
}
@@ -490,7 +489,8 @@ static int ip_tun_parse_opts_geneve(struct nlattr *attr,
return -EINVAL;
if (info) {
- struct geneve_opt *opt = ip_tunnel_info_opts(info) + opts_len;
+ struct geneve_opt *opt =
+ (struct geneve_opt *)(info->options + opts_len);
memcpy(opt->opt_data, nla_data(attr), data_len);
opt->length = data_len / 4;
@@ -521,7 +521,7 @@ static int ip_tun_parse_opts_vxlan(struct nlattr *attr,
if (info) {
struct vxlan_metadata *md =
- ip_tunnel_info_opts(info) + opts_len;
+ (struct vxlan_metadata *)(info->options + opts_len);
attr = tb[LWTUNNEL_IP_OPT_VXLAN_GBP];
md->gbp = nla_get_u32(attr);
@@ -562,7 +562,7 @@ static int ip_tun_parse_opts_erspan(struct nlattr *attr,
if (info) {
struct erspan_metadata *md =
- ip_tunnel_info_opts(info) + opts_len;
+ (struct erspan_metadata *)(info->options + opts_len);
md->version = ver;
if (ver == 1) {
@@ -746,7 +746,7 @@ static int ip_tun_fill_encap_opts_geneve(struct sk_buff *skb,
return -ENOMEM;
while (tun_info->options_len > offset) {
- opt = ip_tunnel_info_opts(tun_info) + offset;
+ opt = (struct geneve_opt *)(tun_info->options + offset);
if (nla_put_be16(skb, LWTUNNEL_IP_OPT_GENEVE_CLASS,
opt->opt_class) ||
nla_put_u8(skb, LWTUNNEL_IP_OPT_GENEVE_TYPE, opt->type) ||
@@ -772,7 +772,7 @@ static int ip_tun_fill_encap_opts_vxlan(struct sk_buff *skb,
if (!nest)
return -ENOMEM;
- md = ip_tunnel_info_opts(tun_info);
+ md = (struct vxlan_metadata *)tun_info->options;
if (nla_put_u32(skb, LWTUNNEL_IP_OPT_VXLAN_GBP, md->gbp)) {
nla_nest_cancel(skb, nest);
return -ENOMEM;
@@ -792,7 +792,7 @@ static int ip_tun_fill_encap_opts_erspan(struct sk_buff *skb,
if (!nest)
return -ENOMEM;
- md = ip_tunnel_info_opts(tun_info);
+ md = (struct erspan_metadata *)tun_info->options;
if (nla_put_u8(skb, LWTUNNEL_IP_OPT_ERSPAN_VER, md->version))
goto err;
@@ -875,7 +875,7 @@ static int ip_tun_opts_nlsize(struct ip_tunnel_info *info)
opt_len += nla_total_size(0); /* LWTUNNEL_IP_OPTS_GENEVE */
while (info->options_len > offset) {
- opt = ip_tunnel_info_opts(info) + offset;
+ opt = (struct geneve_opt *)(info->options + offset);
opt_len += nla_total_size(2) /* OPT_GENEVE_CLASS */
+ nla_total_size(1) /* OPT_GENEVE_TYPE */
+ nla_total_size(opt->length * 4);
@@ -886,7 +886,8 @@ static int ip_tun_opts_nlsize(struct ip_tunnel_info *info)
opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_VXLAN */
+ nla_total_size(4); /* OPT_VXLAN_GBP */
} else if (test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags)) {
- struct erspan_metadata *md = ip_tunnel_info_opts(info);
+ struct erspan_metadata *md =
+ (struct erspan_metadata *)info->options;
opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_ERSPAN */
+ nla_total_size(1) /* OPT_ERSPAN_VER */
@@ -920,8 +921,7 @@ static int ip_tun_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
return memcmp(info_a, info_b, sizeof(info_a->key)) ||
info_a->mode != info_b->mode ||
info_a->options_len != info_b->options_len ||
- memcmp(ip_tunnel_info_opts(info_a),
- ip_tunnel_info_opts(info_b), info_a->options_len);
+ memcmp(info_a->options, info_b->options, info_a->options_len);
}
static const struct lwtunnel_encap_ops ip_tun_lwt_ops = {
@@ -575,7 +575,7 @@ static int ip6erspan_rcv(struct sk_buff *skb,
pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
sizeof(*ershdr));
info = &tun_dst->u.tun_info;
- md = ip_tunnel_info_opts(info);
+ md = (struct erspan_metadata *)info->options;
md->version = ver;
md2 = &md->u.md2;
memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
@@ -1022,7 +1022,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
goto tx_err;
if (tun_info->options_len < sizeof(*md))
goto tx_err;
- md = ip_tunnel_info_opts(tun_info);
+ md = (struct erspan_metadata *)tun_info->options;
tun_id = tunnel_id_to_key32(key->tun_id);
if (md->version == 1) {
@@ -980,7 +980,7 @@ int ovs_nla_put_tunnel_info(struct sk_buff *skb,
struct ip_tunnel_info *tun_info)
{
return __ip_tun_to_nlattr(skb, &tun_info->key,
- ip_tunnel_info_opts(tun_info),
+ tun_info->options,
tun_info->options_len,
ip_tunnel_info_af(tun_info), tun_info->mode);
}
@@ -3753,7 +3753,7 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
return -EMSGSIZE;
err = ip_tun_to_nlattr(skb, &tun_info->key,
- ip_tunnel_info_opts(tun_info),
+ tun_info->options,
tun_info->options_len,
ip_tunnel_info_af(tun_info), tun_info->mode);
if (err)
@@ -217,7 +217,7 @@ static int __psample_ip_tun_to_nlattr(struct sk_buff *skb,
struct ip_tunnel_info *tun_info)
{
unsigned short tun_proto = ip_tunnel_info_af(tun_info);
- const void *tun_opts = ip_tunnel_info_opts(tun_info);
+ const void *tun_opts = tun_info->options;
const struct ip_tunnel_key *tun_key = &tun_info->key;
int tun_opts_len = tun_info->options_len;
@@ -303,7 +303,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info,
case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
#if IS_ENABLED(CONFIG_INET)
__set_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags);
- return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
+ return tunnel_key_copy_opts(nla, info->options,
opts_len, extack);
#else
return -EAFNOSUPPORT;
@@ -311,7 +311,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info,
case TCA_TUNNEL_KEY_ENC_OPTS_VXLAN:
#if IS_ENABLED(CONFIG_INET)
__set_bit(IP_TUNNEL_VXLAN_OPT_BIT, info->key.tun_flags);
- return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
+ return tunnel_key_copy_opts(nla, info->options,
opts_len, extack);
#else
return -EAFNOSUPPORT;
@@ -319,7 +319,7 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info,
case TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN:
#if IS_ENABLED(CONFIG_INET)
__set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, info->key.tun_flags);
- return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
+ return tunnel_key_copy_opts(nla, info->options,
opts_len, extack);
#else
return -EAFNOSUPPORT;
@@ -572,7 +572,7 @@ static int tunnel_key_geneve_opts_dump(struct sk_buff *skb,
const struct ip_tunnel_info *info)
{
int len = info->options_len;
- u8 *src = (u8 *)(info + 1);
+ u8 *src = (u8 *)info->options;
struct nlattr *start;
start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_GENEVE);
@@ -603,7 +603,7 @@ static int tunnel_key_geneve_opts_dump(struct sk_buff *skb,
static int tunnel_key_vxlan_opts_dump(struct sk_buff *skb,
const struct ip_tunnel_info *info)
{
- struct vxlan_metadata *md = (struct vxlan_metadata *)(info + 1);
+ struct vxlan_metadata *md = (struct vxlan_metadata *)info->options;
struct nlattr *start;
start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_VXLAN);
@@ -622,7 +622,7 @@ static int tunnel_key_vxlan_opts_dump(struct sk_buff *skb,
static int tunnel_key_erspan_opts_dump(struct sk_buff *skb,
const struct ip_tunnel_info *info)
{
- struct erspan_metadata *md = (struct erspan_metadata *)(info + 1);
+ struct erspan_metadata *md = (struct erspan_metadata *)info->options;
struct nlattr *start;
start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN);
Remove the hidden assumption that options are allocated at the end of the struct, and teach the compiler about them using a flexible array. With this, we can revert the unsafe_memcpy() call we have in tun_dst_unclone() [1], and resolve the false field-spanning write warning caused by the memcpy() in ip_tunnel_info_opts_set(). The layout of struct ip_tunnel_info remains the same with this patch. Before this patch, there was an implicit padding at the end of the struct, options would be written at 'info + 1' which is after the padding. This will remain the same as this patch explicitly aligns 'options'. The alignment is needed as the options are later casted to different structs, and might result in unaligned memory access. Pahole output before this patch: struct ip_tunnel_info { struct ip_tunnel_key key; /* 0 64 */ /* XXX last struct has 1 byte of padding */ /* --- cacheline 1 boundary (64 bytes) --- */ struct ip_tunnel_encap encap; /* 64 8 */ struct dst_cache dst_cache; /* 72 16 */ u8 options_len; /* 88 1 */ u8 mode; /* 89 1 */ /* size: 96, cachelines: 2, members: 5 */ /* padding: 6 */ /* paddings: 1, sum paddings: 1 */ /* last cacheline: 32 bytes */ }; Pahole output after this patch: struct ip_tunnel_info { struct ip_tunnel_key key; /* 0 64 */ /* XXX last struct has 1 byte of padding */ /* --- cacheline 1 boundary (64 bytes) --- */ struct ip_tunnel_encap encap; /* 64 8 */ struct dst_cache dst_cache; /* 72 16 */ u8 options_len; /* 88 1 */ u8 mode; /* 89 1 */ /* XXX 6 bytes hole, try to pack */ u8 options[] __attribute__((__aligned__(8))); /* 96 0 */ /* size: 96, cachelines: 2, members: 6 */ /* sum members: 90, holes: 1, sum holes: 6 */ /* paddings: 1, sum paddings: 1 */ /* forced alignments: 1, forced holes: 1, sum forced holes: 6 */ /* last cacheline: 32 bytes */ } __attribute__((__aligned__(8))); [1] Commit 13cfd6a6d7ac ("net: Silence false field-spanning write warning in metadata_dst memcpy") Link: https://lore.kernel.org/all/53D1D353-B8F6-4ADC-8F29-8C48A7C9C6F1@kernel.org/ Suggested-by: Kees Cook <kees@kernel.org> Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com> Reviewed-by: Tariq Toukan <tariqt@nvidia.com> Signed-off-by: Gal Pressman <gal@nvidia.com> --- Changelog - v1->v2: https://lore.kernel.org/netdev/20250209101853.15828-1-gal@nvidia.com/ * Remove change in struct layout, align 'options' field explicitly (Ilya, Kees, Jakub). * Change allocation I missed in v1 in metadata_dst_alloc_percpu(). --- .../mellanox/mlx5/core/en/tc_tun_encap.c | 4 +--- .../mellanox/mlx5/core/en/tc_tun_vxlan.c | 2 +- .../ethernet/netronome/nfp/flower/action.c | 4 ++-- drivers/net/pfcp.c | 2 +- drivers/net/vxlan/vxlan_core.c | 4 ++-- include/net/dst_metadata.h | 7 ++---- include/net/ip_tunnels.h | 11 +++------ net/core/dst.c | 6 +++-- net/ipv4/ip_gre.c | 4 ++-- net/ipv4/ip_tunnel_core.c | 24 +++++++++---------- net/ipv6/ip6_gre.c | 4 ++-- net/openvswitch/flow_netlink.c | 4 ++-- net/psample/psample.c | 2 +- net/sched/act_tunnel_key.c | 12 +++++----- 14 files changed, 41 insertions(+), 49 deletions(-)