diff mbox

Re: [PATCH][RFC] qemu:virtio-net: Use TUNSETTXFILTER for MAC filteringlogin register about

Message ID 20110112175326.GC1242@redhat.com
State New
Headers show

Commit Message

Michael S. Tsirkin Jan. 12, 2011, 5:53 p.m. UTC
On Wed, Jan 12, 2011 at 06:26:55PM +0100, Dragos Tatulea wrote:
> Hi,
> 
> Trying to stir up  a year old conversation [1] about mac filtering.
> The patch below is Alex Williamson's work updated for the current qemu
> taking into account some comments. An extra check for multiple nics on
> the same vlan has been added as well. Now, I know it's not ideal but
> I'm looking for feedback here.
> 
> The original thread contained some ideas that weren't discussed extensively:
> 
> * Alex Williamson's patch that makes the nic call the associated tap
> vlan client to set the mac filters. This is possible only for 1 nix x
> 1 tap setup. It's a bit hackish (even more visible in the current
> version) but it's straightforward.
> 
> * Paul Brook considered that the nic shouldn't know anything about
> what else is connected to the vlan. He proposed a model that is more
> generic and would allow us to filter stuff from device to vlan and the
> other way around.
> 
> * Anthony Liguori proposed moving all the mac filtering to the vlan
> side. This would be nice, but it would imply removing & rewriting all
> the emulated nics. Don't know how acceptable this is since, I suppose,
> the emulated behavior will change.
> 
> The bottom line here is that neither of these 3 approaches is ok for
> what we want to achieve: basically tap & macvtap filtering for virtio.
> What do we want? The simple change for virtio tun tx fiiltering or
> filtering MACs in both directions taking into account how nics are
> connected and if hw filtering is possible? The latter being quite a
> change.
> 
> [1] - http://thread.gmane.org/gmane.comp.emulators.qemu/37714/focus=37719
> 
> -- Dragos Tatulea
> 
> Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
> Signed-off-by: Dragos Tatulea <dragos.tatulea@gmail.com>

BTW the comments above are mostly addressed by using
a peer and not a vlan. Here's the patch I used for
testing. As I mentioned, this all needs to be optional
to avoid filtering performance hit.

So - don't apply this as is, but it does work.

-->

tap: use kernel filtering support

Use kernel filtering for tap where available.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>

---
diff mbox

Patch

diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 1d61f19..7892ca9 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -343,6 +343,15 @@  static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
             n->mac_table.multi_overflow = 1;
         }
     }
+    if (n->nic->nc.peer && n->nic->nc.peer->info->set_rx_filter) {
+        n->nic->nc.peer->info->set_rx_filter(n->nic->nc.peer,
+                                             n->promisc,
+                                             n->allmulti,
+                                             n->nobcast,
+                                             n->mac,
+                                             n->mac_table.in_use,
+                                             n->mac_table.macs);
+    }
 
     return VIRTIO_NET_OK;
 }
diff --git a/net.h b/net.h
index 44c31a9..9e164b9 100644
--- a/net.h
+++ b/net.h
@@ -42,6 +42,8 @@  typedef ssize_t (NetReceive)(VLANClientState *, const uint8_t *, size_t);
 typedef ssize_t (NetReceiveIOV)(VLANClientState *, const struct iovec *, int);
 typedef void (NetCleanup) (VLANClientState *);
 typedef void (LinkStatusChanged)(VLANClientState *);
+typedef int (NetRXFilter)(VLANClientState *, bool allmulti, bool promisc,
+                          bool nobcast, uint8_t *, int , uint8_t *);
 
 typedef struct NetClientInfo {
     net_client_type type;
@@ -50,6 +52,7 @@  typedef struct NetClientInfo {
     NetReceive *receive_raw;
     NetReceiveIOV *receive_iov;
     NetCanReceive *can_receive;
+    NetRXFilter *set_rx_filter;
     NetCleanup *cleanup;
     LinkStatusChanged *link_status_changed;
     NetPoll *poll;
diff --git a/net/tap-aix.c b/net/tap-aix.c
index e19aaba..4cf5ad9 100644
--- a/net/tap-aix.c
+++ b/net/tap-aix.c
@@ -59,3 +59,9 @@  void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
 }
+
+int tap_fd_set_rx_filter(int fd, bool allmulti, bool promisc, bool nobcast,
+                         uint8_t *mac, int count, uint8_t *macs)
+{
+    return -1;
+}
diff --git a/net/tap-bsd.c b/net/tap-bsd.c
index efccfe0..d092c6f 100644
--- a/net/tap-bsd.c
+++ b/net/tap-bsd.c
@@ -125,3 +125,9 @@  void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
 }
+
+int tap_fd_set_rx_filter(int fd, bool allmulti, bool promisc, bool nobcast,
+                         uint8_t *mac, int count, uint8_t *macs)
+{
+    return -1;
+}
diff --git a/net/tap-haiku.c b/net/tap-haiku.c
index 91dda8e..8117c03 100644
--- a/net/tap-haiku.c
+++ b/net/tap-haiku.c
@@ -59,3 +59,9 @@  void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
 }
+
+int tap_fd_set_rx_filter(int fd, bool allmulti, bool promisc, bool nobcast,
+                         uint8_t *mac, int count, uint8_t *macs)
+{
+    return -1;
+}
diff --git a/net/tap-linux.c b/net/tap-linux.c
index f7aa904..19d05c6 100644
--- a/net/tap-linux.c
+++ b/net/tap-linux.c
@@ -188,3 +188,41 @@  void tap_fd_set_offload(int fd, int csum, int tso4,
         }
     }
 }
+
+int tap_fd_set_rx_filter(int fd, bool allmulti, bool promisc, bool nobcast,
+                         uint8_t *mac, int count, uint8_t *list)
+{
+    struct tun_filter *filter;
+    int ret;
+    int len = TAP_LINUX_ETH_ALEN * (promisc ? 0 : (count + 2));
+
+    filter = qemu_mallocz(offsetof(struct tun_filter, addr) + len);
+    if (!filter) {
+        return -1;
+    }
+
+    if (!promisc) {
+        memcpy(filter->addr, mac, TAP_LINUX_ETH_ALEN);
+        filter->count += 1;
+
+        memcpy(&filter->addr[filter->count], list, count * TAP_LINUX_ETH_ALEN);
+        filter->count += count;
+    }
+
+    if (!promisc && !allmulti && !nobcast) {
+        /* If enabled and not included implicitly by promisc or all multicast
+         * mode, add all ones - the broadcast address. */
+        memset(&filter->addr[filter->count], 0xff, TAP_LINUX_ETH_ALEN);
+        filter->count += 1;
+    }
+
+    if (allmulti) {
+        filter->flags |= TUN_FLT_ALLMULTI;
+    }
+
+    ret = ioctl(fd, TUNSETTXFILTER, filter);
+
+    qemu_free(filter);
+
+    return ret;
+}
diff --git a/net/tap-linux.h b/net/tap-linux.h
index 659e981..6e53562 100644
--- a/net/tap-linux.h
+++ b/net/tap-linux.h
@@ -25,6 +25,7 @@ 
 #define TUNSETIFF     _IOW('T', 202, int)
 #define TUNGETFEATURES _IOR('T', 207, unsigned int)
 #define TUNSETOFFLOAD  _IOW('T', 208, unsigned int)
+#define TUNSETTXFILTER _IOW('T', 209, unsigned int)
 #define TUNGETIFF      _IOR('T', 210, unsigned int)
 #define TUNSETSNDBUF   _IOW('T', 212, int)
 #define TUNGETVNETHDRSZ _IOR('T', 215, int)
@@ -44,6 +45,24 @@ 
 #define TUN_F_TSO_ECN	0x08	/* I can handle TSO with ECN bits. */
 #define TUN_F_UFO	0x10	/* I can handle UFO packets */
 
+/*
+ * Filter spec (used for SETXXFILTER ioctls)
+ * This stuff is applicable only to the TAP (Ethernet) devices.
+ * If the count is zero the filter is disabled and the driver accepts
+ * all packets (promisc mode).
+ * If the filter is enabled in order to accept broadcast packets
+ * broadcast addr must be explicitly included in the addr list.
+ */
+#define TUN_FLT_ALLMULTI 0x0001 /* Accept all multicast packets */
+
+#define TAP_LINUX_ETH_ALEN 6
+
+struct tun_filter {
+	uint16_t  flags; /* TUN_FLT_ flags see above */
+	uint16_t  count; /* Number of addresses */
+	uint8_t   addr[0][TAP_LINUX_ETH_ALEN];
+};
+
 struct virtio_net_hdr
 {
     uint8_t flags;
diff --git a/net/tap-solaris.c b/net/tap-solaris.c
index c216d28..2a1d065 100644
--- a/net/tap-solaris.c
+++ b/net/tap-solaris.c
@@ -225,3 +225,9 @@  void tap_fd_set_offload(int fd, int csum, int tso4,
                         int tso6, int ecn, int ufo)
 {
 }
+
+int tap_fd_set_rx_filter(int fd, bool allmulti, bool promisc, bool nobcast,
+                         uint8_t *mac, int count, uint8_t *macs)
+{
+    return -1;
+}
diff --git a/net/tap-win32.c b/net/tap-win32.c
index 081904e..333846b 100644
--- a/net/tap-win32.c
+++ b/net/tap-win32.c
@@ -749,3 +749,9 @@  struct vhost_net *tap_get_vhost_net(VLANClientState *nc)
 {
     return NULL;
 }
+
+int tap_fd_set_rx_filter(int fd, bool allmulti, bool promisc, bool nobcast,
+                         uint8_t *mac, int count, uint8_t *macs)
+{
+    return -1;
+}
diff --git a/net/tap.c b/net/tap.c
index eada34a..7bc47f8 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -310,6 +310,13 @@  int tap_get_fd(VLANClientState *nc)
     return s->fd;
 }
 
+static int tap_set_rx_filter(VLANClientState *nc, bool allmulti, bool promisc,
+                             bool nobcast, uint8_t *mac, int count, uint8_t *list)
+{
+    TAPState *s = DO_UPCAST(TAPState, nc, nc);
+    return tap_fd_set_rx_filter(s->fd, allmulti, promisc, nobcast, mac, count, list);
+}
+
 /* fd support */
 
 static NetClientInfo net_tap_info = {
@@ -318,6 +325,7 @@  static NetClientInfo net_tap_info = {
     .receive = tap_receive,
     .receive_raw = tap_receive_raw,
     .receive_iov = tap_receive_iov,
+    .set_rx_filter = tap_set_rx_filter,
     .poll = tap_poll,
     .cleanup = tap_cleanup,
 };
diff --git a/net/tap.h b/net/tap.h
index e44bd2b..0cf820c 100644
--- a/net/tap.h
+++ b/net/tap.h
@@ -51,6 +51,8 @@  int tap_probe_vnet_hdr_len(int fd, int len);
 int tap_probe_has_ufo(int fd);
 void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo);
 void tap_fd_set_vnet_hdr_len(int fd, int len);
+int tap_fd_set_rx_filter(int fd, bool allmulti, bool promisc, bool nobcast,
+                         uint8_t *, int , uint8_t *);
 
 int tap_get_fd(VLANClientState *vc);