From patchwork Fri Mar 1 14:12:30 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: =?utf-8?q?Toke_H=C3=B8iland-J=C3=B8rgensen?= X-Patchwork-Id: 1050218 X-Patchwork-Delegate: bpf@iogearbox.net Return-Path: X-Original-To: patchwork-incoming-netdev@ozlabs.org Delivered-To: patchwork-incoming-netdev@ozlabs.org Authentication-Results: ozlabs.org; spf=none (mailfrom) smtp.mailfrom=vger.kernel.org (client-ip=209.132.180.67; helo=vger.kernel.org; envelope-from=netdev-owner@vger.kernel.org; receiver=) Authentication-Results: ozlabs.org; dmarc=fail (p=none dis=none) header.from=redhat.com Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by ozlabs.org (Postfix) with ESMTP id 449rwM3pZPz9s2R for ; Sat, 2 Mar 2019 01:12:35 +1100 (AEDT) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1732259AbfCAOMe (ORCPT ); Fri, 1 Mar 2019 09:12:34 -0500 Received: from mail-ed1-f65.google.com ([209.85.208.65]:42671 "EHLO mail-ed1-f65.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1726195AbfCAOMe (ORCPT ); Fri, 1 Mar 2019 09:12:34 -0500 Received: by mail-ed1-f65.google.com with SMTP id j89so20092356edb.9 for ; Fri, 01 Mar 2019 06:12:32 -0800 (PST) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:subject:from:to:cc:date:message-id:in-reply-to :references:user-agent:mime-version:content-transfer-encoding; bh=qj9QnBzbwINP6SaIsS3v9kAXXFe2n/e5QsbrjFR6y+U=; b=NfEXN+uh5w5U+JQzOm7qlo87mxp4iUAwovtdWu6xzIQ7sfBnysv+/REQTwmcmMVpKx 2+ZBoYHkXX3I6LhbVCEEO8Wx/L0EIXCAJxuDADMCz/wM4YlVLCkDFlO/2UQQD+V3ksyr T08elAcJFITe1ClN9XeiWMV3DYBr1thHxARl0RfnvtSFWzMQ9z7PYffITtj9dscpx719 G+GNrjw6GJXpFmELyhqLbYk+REYvc20+2YEzEbTIKRnjyny7v49ReT/V8N+bJRaQ1Okr hkQFKN4ZcF+JlTSIjbu6bmAhnPt2MRXQsPYnf16cVL6WqARp6Fi5wbKhtmShiPIjHLtC lqrQ== X-Gm-Message-State: APjAAAVhc5IZyZeCSEXIdCMNNiyX9mstV0WiZjle3o2yqhCXIV21n9be NgU96FOyihwBlg9CYlUHb87S/w== X-Google-Smtp-Source: APXvYqwpDvbRx2Me2BjaHLO+vfkUhN9k4DDr5alyo6F84EPuGaaiv0/sYnYeyRj50Ie4qNMy8015wg== X-Received: by 2002:a50:a5ab:: with SMTP id a40mr4361566edc.253.1551449551418; Fri, 01 Mar 2019 06:12:31 -0800 (PST) Received: from alrua-x1.borgediget.toke.dk (alrua-x1.vpn.toke.dk. [2a00:7660:6da:10::2]) by smtp.gmail.com with ESMTPSA id z7sm6117894edl.59.2019.03.01.06.12.30 (version=TLS1_2 cipher=ECDHE-RSA-CHACHA20-POLY1305 bits=256/256); Fri, 01 Mar 2019 06:12:30 -0800 (PST) Received: by alrua-x1.borgediget.toke.dk (Postfix, from userid 1000) id 6FE49183BC2; Fri, 1 Mar 2019 15:12:30 +0100 (CET) Subject: [PATCH net-next v3 1/3] xdp: Refactor devmap code in preparation for subsequent additions From: Toke =?utf-8?q?H=C3=B8iland-J=C3=B8rgensen?= To: David Miller Cc: netdev@vger.kernel.org, Jesper Dangaard Brouer , Daniel Borkmann , Alexei Starovoitov , Jakub Kicinski Date: Fri, 01 Mar 2019 15:12:30 +0100 Message-ID: <155144955040.28287.1075106871059918653.stgit@alrua-x1> In-Reply-To: <155144955030.28287.14029975169967438162.stgit@alrua-x1> References: <155144955030.28287.14029975169967438162.stgit@alrua-x1> User-Agent: StGit/unknown-version MIME-Version: 1.0 Sender: netdev-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: netdev@vger.kernel.org The subsequent commits introducing default maps and a hash-based ifindex devmap require a bit of refactoring of the devmap code. Perform this first so the subsequent commits become easier to read. Signed-off-by: Toke Høiland-Jørgensen --- kernel/bpf/devmap.c | 177 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 109 insertions(+), 68 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 191b79948424..1037fc08c504 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -75,6 +75,7 @@ struct bpf_dtab { struct bpf_dtab_netdev **netdev_map; unsigned long __percpu *flush_needed; struct list_head list; + struct rcu_head rcu; }; static DEFINE_SPINLOCK(dev_map_lock); @@ -85,23 +86,11 @@ static u64 dev_map_bitmap_size(const union bpf_attr *attr) return BITS_TO_LONGS((u64) attr->max_entries) * sizeof(unsigned long); } -static struct bpf_map *dev_map_alloc(union bpf_attr *attr) +static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr, + bool check_memlock) { - struct bpf_dtab *dtab; - int err = -EINVAL; u64 cost; - - if (!capable(CAP_NET_ADMIN)) - return ERR_PTR(-EPERM); - - /* check sanity of attributes */ - if (attr->max_entries == 0 || attr->key_size != 4 || - attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK) - return ERR_PTR(-EINVAL); - - dtab = kzalloc(sizeof(*dtab), GFP_USER); - if (!dtab) - return ERR_PTR(-ENOMEM); + int err; bpf_map_init_from_attr(&dtab->map, attr); @@ -109,60 +98,72 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); cost += dev_map_bitmap_size(attr) * num_possible_cpus(); if (cost >= U32_MAX - PAGE_SIZE) - goto free_dtab; + return -EINVAL; dtab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; - /* if map size is larger than memlock limit, reject it early */ - err = bpf_map_precharge_memlock(dtab->map.pages); - if (err) - goto free_dtab; - - err = -ENOMEM; + if (check_memlock) { + /* if map size is larger than memlock limit, reject it early */ + err = bpf_map_precharge_memlock(dtab->map.pages); + if (err) + return -EINVAL; + } /* A per cpu bitfield with a bit per possible net device */ dtab->flush_needed = __alloc_percpu_gfp(dev_map_bitmap_size(attr), __alignof__(unsigned long), GFP_KERNEL | __GFP_NOWARN); if (!dtab->flush_needed) - goto free_dtab; + goto err_alloc; dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *), dtab->map.numa_node); if (!dtab->netdev_map) - goto free_dtab; + goto err_map; - spin_lock(&dev_map_lock); - list_add_tail_rcu(&dtab->list, &dev_map_list); - spin_unlock(&dev_map_lock); + return 0; - return &dtab->map; -free_dtab: + err_map: free_percpu(dtab->flush_needed); - kfree(dtab); - return ERR_PTR(err); + err_alloc: + return -ENOMEM; } -static void dev_map_free(struct bpf_map *map) +static struct bpf_map *dev_map_alloc(union bpf_attr *attr) { - struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - int i, cpu; + struct bpf_dtab *dtab; + int err; - /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, - * so the programs (can be more than one that used this map) were - * disconnected from events. Wait for outstanding critical sections in - * these programs to complete. The rcu critical section only guarantees - * no further reads against netdev_map. It does __not__ ensure pending - * flush operations (if any) are complete. - */ + if (!capable(CAP_NET_ADMIN)) + return ERR_PTR(-EPERM); + + /* check sanity of attributes */ + if (attr->max_entries == 0 || attr->key_size != 4 || + attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK) + return ERR_PTR(-EINVAL); + + dtab = kzalloc(sizeof(*dtab), GFP_USER); + if (!dtab) + return ERR_PTR(-ENOMEM); + + err = dev_map_init_map(dtab, attr, true); + if (err) { + kfree(dtab); + return ERR_PTR(err); + } spin_lock(&dev_map_lock); - list_del_rcu(&dtab->list); + list_add_tail_rcu(&dtab->list, &dev_map_list); spin_unlock(&dev_map_lock); - bpf_clear_redirect_map(map); - synchronize_rcu(); + return &dtab->map; +} + +static void __dev_map_free(struct rcu_head *rcu) +{ + struct bpf_dtab *dtab = container_of(rcu, struct bpf_dtab, rcu); + int i, cpu; /* To ensure all pending flush operations have completed wait for flush * bitmap to indicate all flush_needed bits to be zero on _all_ cpus. @@ -192,6 +193,26 @@ static void dev_map_free(struct bpf_map *map) kfree(dtab); } +static void dev_map_free(struct bpf_map *map) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + + /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, + * so the programs (can be more than one that used this map) were + * disconnected from events. Wait for outstanding critical sections in + * these programs to complete. The rcu critical section only guarantees + * no further reads against netdev_map. It does __not__ ensure pending + * flush operations (if any) are complete. + */ + + spin_lock(&dev_map_lock); + list_del_rcu(&dtab->list); + spin_unlock(&dev_map_lock); + + bpf_clear_redirect_map(map); + call_rcu(&dtab->rcu, __dev_map_free); +} + static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); @@ -429,12 +450,42 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key) return 0; } -static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net, + struct bpf_dtab *dtab, + u32 ifindex, + unsigned int bit) { - struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - struct net *net = current->nsproxy->net_ns; gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; + struct bpf_dtab_netdev *dev; + + dev = kmalloc_node(sizeof(*dev), gfp, dtab->map.numa_node); + if (!dev) + return ERR_PTR(-ENOMEM); + + dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq), + sizeof(void *), gfp); + if (!dev->bulkq) { + kfree(dev); + return ERR_PTR(-ENOMEM); + } + + dev->dev = dev_get_by_index(net, ifindex); + if (!dev->dev) { + free_percpu(dev->bulkq); + kfree(dev); + return ERR_PTR(-EINVAL); + } + + dev->bit = bit; + dev->dtab = dtab; + + return dev; +} + +static int __dev_map_update_elem(struct net *net, struct bpf_map *map, + void *key, void *value, u64 map_flags) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *dev, *old_dev; u32 i = *(u32 *)key; u32 ifindex = *(u32 *)value; @@ -449,26 +500,9 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, if (!ifindex) { dev = NULL; } else { - dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node); - if (!dev) - return -ENOMEM; - - dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq), - sizeof(void *), gfp); - if (!dev->bulkq) { - kfree(dev); - return -ENOMEM; - } - - dev->dev = dev_get_by_index(net, ifindex); - if (!dev->dev) { - free_percpu(dev->bulkq); - kfree(dev); - return -EINVAL; - } - - dev->bit = i; - dev->dtab = dtab; + dev = __dev_map_alloc_node(net, dtab, ifindex, i); + if (IS_ERR(dev)) + return PTR_ERR(dev); } /* Use call_rcu() here to ensure rcu critical sections have completed @@ -482,6 +516,13 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, return 0; } +static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) +{ + return __dev_map_update_elem(current->nsproxy->net_ns, + map, key, value, map_flags); +} + const struct bpf_map_ops dev_map_ops = { .map_alloc = dev_map_alloc, .map_free = dev_map_free,