diff mbox

linux-user: manage SOCK_PACKET socket type.

Message ID 1444151509-5047-1-git-send-email-laurent@vivier.eu
State New
Headers show

Commit Message

Laurent Vivier Oct. 6, 2015, 5:11 p.m. UTC
This is obsolete, but if we want to use dhcp with some distros (like debian
ppc 8.2 jessie), we need it.

At the bind level, we are not able to know the socket type so we try to
guess it by analyzing the name. We manage only the case "ethX",
"ethX" in spk_device is similar to set htons(0x6574) in sll_protocol in the
normal case, and as this protocol does not exist, it's ok.

SOCK_PACKET uses network endian to encode protocol in socket()

in PACKET(7) :
                                 protocol is the  IEEE  802.3  protocol
number in network order.  See the <linux/if_ether.h> include file for a
list of allowed protocols.  When protocol is  set  to  htons(ETH_P_ALL)
then all protocols are received.  All incoming packets of that protocol
type will be passed to the packet socket before they are passed to  the
protocols implemented in the kernel.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
---
This patch is a remix of an old patch sent in 2012:
https://patchwork.ozlabs.org/patch/208892/

 linux-user/syscall.c | 33 +++++++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 6 deletions(-)

Comments

Peter Maydell Oct. 26, 2015, 2:40 p.m. UTC | #1
On 6 October 2015 at 18:11, Laurent Vivier <laurent@vivier.eu> wrote:
> This is obsolete, but if we want to use dhcp with some distros (like debian
> ppc 8.2 jessie), we need it.
>
> At the bind level, we are not able to know the socket type so we try to
> guess it by analyzing the name. We manage only the case "ethX",
> "ethX" in spk_device is similar to set htons(0x6574) in sll_protocol in the
> normal case, and as this protocol does not exist, it's ok.
>
> SOCK_PACKET uses network endian to encode protocol in socket()
>
> in PACKET(7) :
>                                  protocol is the  IEEE  802.3  protocol
> number in network order.  See the <linux/if_ether.h> include file for a
> list of allowed protocols.  When protocol is  set  to  htons(ETH_P_ALL)
> then all protocols are received.  All incoming packets of that protocol
> type will be passed to the packet socket before they are passed to  the
> protocols implemented in the kernel.
>
> Signed-off-by: Laurent Vivier <laurent@vivier.eu>
> ---
> This patch is a remix of an old patch sent in 2012:
> https://patchwork.ozlabs.org/patch/208892/
>
>  linux-user/syscall.c | 33 +++++++++++++++++++++++++++------
>  1 file changed, 27 insertions(+), 6 deletions(-)
>
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index 64be431..71cc1e2 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -111,6 +111,7 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
>  #include <linux/route.h>
>  #include <linux/filter.h>
>  #include <linux/blkpg.h>
> +#include <linux/if_packet.h>
>  #include "linux_loop.h"
>  #include "uname.h"
>
> @@ -1198,11 +1199,20 @@ static inline abi_long target_to_host_sockaddr(struct sockaddr *addr,
>      memcpy(addr, target_saddr, len);
>      addr->sa_family = sa_family;
>      if (sa_family == AF_PACKET) {
> -       struct target_sockaddr_ll *lladdr;
> +        /* Manage an obsolete case :
> +         * if socket type is SOCK_PACKET, bind by name otherwise by index
> +         * but we are not able to know socket type, so check if the name
> +         * is usable...
> +         * see linux/net/packet/af_packet.c: packet_bind_spkt()
> +         */
> +        if (strncmp((char *)((struct sockaddr_pkt *)addr)->spkt_device,
> +                    "eth", 3) != 0) {
> +            struct target_sockaddr_ll *lladdr;

This confuses me. The packet(7) manpage suggests there are two flavours
of packet socket:
 (1) legacy AF_INET + SOCK_PACKET
 (2) new style AF_PACKET + SOCK_RAW / SOCK_DGRAM

but this comment suggests it's trying to handle AF_PACKET + SOCK_PACKET ?

If AF_PACKET was introduced as the new way of doing things, it's not
clear why it would be the family type used in the legacy approach's
sockaddr_pkt (though there seems to be code around that does this).
I suspect that 2.0 kernels just didn't check af_family here at all.

The code in the kernel's packet_recvmsg fills in the spkt_family
field with the netdevice's type field, which is an ARPHRD_* constant
as far as I can tell (I could well be wrong here).

Odd to have code in target_to_host_sockaddr and not in
host_to_target_sockaddr.

> -       lladdr = (struct target_sockaddr_ll *)addr;
> -       lladdr->sll_ifindex = tswap32(lladdr->sll_ifindex);
> -       lladdr->sll_hatype = tswap16(lladdr->sll_hatype);
> +            lladdr = (struct target_sockaddr_ll *)addr;
> +            lladdr->sll_ifindex = tswap32(lladdr->sll_ifindex);
> +            lladdr->sll_hatype = tswap16(lladdr->sll_hatype);
> +        }
>      }
>      unlock_user(target_saddr, target_addr, 0);
>
> @@ -2509,7 +2519,12 @@ static abi_long do_socketcall(int num, abi_ulong vptr)
>      /* now when we have the args, actually handle the call */
>      switch (num) {
>      case SOCKOP_socket: /* domain, type, protocol */
> -        return do_socket(a[0], a[1], a[2]);
> +        if (a[0] == AF_PACKET ||
> +            a[1] == TARGET_SOCK_PACKET) {
> +            return do_socket(a[0], a[1], tswap16(a[2]));
> +        } else {
> +            return do_socket(a[0], a[1], a[2]);
> +        }
>      case SOCKOP_bind: /* sockfd, addr, addrlen */
>          return do_bind(a[0], a[1], a[2]);
>      case SOCKOP_connect: /* sockfd, addr, addrlen */
> @@ -7500,7 +7515,13 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
>  #endif
>  #ifdef TARGET_NR_socket
>      case TARGET_NR_socket:
> -        ret = do_socket(arg1, arg2, arg3);
> +        if (arg1 == AF_PACKET ||
> +            arg2 == TARGET_SOCK_PACKET) {
> +            /* in this case, socket() needs a network endian short */
> +            ret = do_socket(arg1, arg2, tswap16(arg3));
> +        } else {
> +            ret = do_socket(arg1, arg2, arg3);
> +        }

This doesn't make sense to me. The argument to socket()
is passed in via a register; so if the guest code correctly
passes the protocol value as htons(whatever) then that will
be the value in arg3 and we do not need to swap anything.

I see we had this discussion about the previous version of the
patch too... and my argument that we don't need the tswap16
in the socketcall code path either still makes sense to me.

>          fd_trans_unregister(ret);
>          break;
>  #endif
> --
> 2.4.3

thanks
-- PMM
Laurent Vivier Oct. 27, 2015, 3:09 a.m. UTC | #2
Le 26/10/2015 15:40, Peter Maydell a écrit :
> On 6 October 2015 at 18:11, Laurent Vivier <laurent@vivier.eu> wrote:
>> This is obsolete, but if we want to use dhcp with some distros (like debian
>> ppc 8.2 jessie), we need it.
>>
>> At the bind level, we are not able to know the socket type so we try to
>> guess it by analyzing the name. We manage only the case "ethX",
>> "ethX" in spk_device is similar to set htons(0x6574) in sll_protocol in the
>> normal case, and as this protocol does not exist, it's ok.
>>
>> SOCK_PACKET uses network endian to encode protocol in socket()
>>
>> in PACKET(7) :
>>                                  protocol is the  IEEE  802.3  protocol
>> number in network order.  See the <linux/if_ether.h> include file for a
>> list of allowed protocols.  When protocol is  set  to  htons(ETH_P_ALL)
>> then all protocols are received.  All incoming packets of that protocol
>> type will be passed to the packet socket before they are passed to  the
>> protocols implemented in the kernel.
>>
>> Signed-off-by: Laurent Vivier <laurent@vivier.eu>
>> ---
>> This patch is a remix of an old patch sent in 2012:
>> https://patchwork.ozlabs.org/patch/208892/
>>
>>  linux-user/syscall.c | 33 +++++++++++++++++++++++++++------
>>  1 file changed, 27 insertions(+), 6 deletions(-)
>>
>> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
>> index 64be431..71cc1e2 100644
>> --- a/linux-user/syscall.c
>> +++ b/linux-user/syscall.c
>> @@ -111,6 +111,7 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
>>  #include <linux/route.h>
>>  #include <linux/filter.h>
>>  #include <linux/blkpg.h>
>> +#include <linux/if_packet.h>
>>  #include "linux_loop.h"
>>  #include "uname.h"
>>
>> @@ -1198,11 +1199,20 @@ static inline abi_long target_to_host_sockaddr(struct sockaddr *addr,
>>      memcpy(addr, target_saddr, len);
>>      addr->sa_family = sa_family;
>>      if (sa_family == AF_PACKET) {
>> -       struct target_sockaddr_ll *lladdr;
>> +        /* Manage an obsolete case :
>> +         * if socket type is SOCK_PACKET, bind by name otherwise by index
>> +         * but we are not able to know socket type, so check if the name
>> +         * is usable...
>> +         * see linux/net/packet/af_packet.c: packet_bind_spkt()
>> +         */
>> +        if (strncmp((char *)((struct sockaddr_pkt *)addr)->spkt_device,
>> +                    "eth", 3) != 0) {
>> +            struct target_sockaddr_ll *lladdr;
> 
> This confuses me. The packet(7) manpage suggests there are two flavours
> of packet socket:
>  (1) legacy AF_INET + SOCK_PACKET
>  (2) new style AF_PACKET + SOCK_RAW / SOCK_DGRAM
> 
> but this comment suggests it's trying to handle AF_PACKET + SOCK_PACKET ?

In fact, I've started not from the man page, but from a non working dhcp
client, originally with a m68k target and etch-m68k distro, and I've met
again this problem on a ppc target and jessie distro.

> 
> If AF_PACKET was introduced as the new way of doing things, it's not
> clear why it would be the family type used in the legacy approach's
> sockaddr_pkt (though there seems to be code around that does this).
> I suspect that 2.0 kernels just didn't check af_family here at all.

by digging into the code, I have found:

$ apt-get source isc-dhcp-client
$ vi isc-dhcp-4.3.1/common/lpf.c

...
 72 int if_register_lpf (info)
 73         struct interface_info *info;
 74 {
...
 79         if ((sock = socket(PF_PACKET, SOCK_PACKET,
 80                            htons((short)ETH_P_ALL))) < 0) {
...

So we can see socket() is used with domain PF_PACKET and type SOCK_PACKET.

Next, the interface name is put into the sa_data of sockaddr, and bind()
is used with AF_PACKET:

 94         /* Bind to the interface name */
 95         memset (&sa, 0, sizeof sa);
 96         sa.sa_family = AF_PACKET;
 97         strncpy (sa.sa_data, (const char *)info -> ifp, sizeof
sa.sa_data);
 98         if (bind (sock, &sa, sizeof sa)) {

ifp is initialized from a list of all discovered interfaces in
common/discover.c:
...
 238 int
 239 begin_iface_scan(struct iface_conf_list *ifaces) {
...
 283         if (ioctl(ifaces->sock, SIOCGLIFCONF, &ifaces->conf) < 0) {
...
 918 void
 919 discover_interfaces(int state) {
...
 924         struct interface_info *tmp;
...
 939         if (!begin_iface_scan(&ifaces)) {
...
 955         while (next_iface(&info, &err, &ifaces)) {
 956
 957                 /* See if we've seen an interface that matches this
one. */
 958                 for (tmp = interfaces; tmp; tmp = tmp->next) {
 959                         if (!strcmp(tmp->name, info.name))
 960                                 break;
 961                 }

...
 987                         strcpy(tmp->name, info.name);
...
1050         }
...
1063         for (tmp = interfaces ; tmp != NULL ; tmp = tmp->next) {
1064                 if (tmp->ifp == NULL) {
1065                         struct ifreq *tif;
1066
1067                         tif = (struct ifreq *)dmalloc(sizeof(struct
ifreq),
1068                                                       MDL);
1069                         if (tif == NULL)
1070                                 log_fatal("no space for ifp mockup.");
1071                         strcpy(tif->ifr_name, tmp->name);
1072                         tmp->ifp = tif;
1073                 }
1074         }

> 
> The code in the kernel's packet_recvmsg fills in the spkt_family
> field with the netdevice's type field, which is an ARPHRD_* constant
> as far as I can tell (I could well be wrong here).

kernel 4.3.0-rc3, net/packet/af_packet.c:

   2961
   2962 static int packet_bind_spkt(struct socket *sock, struct sockaddr
*uaddr,
   2963                             int addr_len)
   2964 {
   2965         struct sock *sk = sock->sk;
   2966         char name[15];
   2967         struct net_device *dev;
   2968         int err = -ENODEV;
   2969
   2970         /*
   2971          *      Check legality
   2972          */
   2973
   2974         if (addr_len != sizeof(struct sockaddr))
   2975                 return -EINVAL;
   2976         strlcpy(name, uaddr->sa_data, sizeof(name));
   2977
   2978         dev = dev_get_by_name(sock_net(sk), name);
   2979         if (dev)
   2980                 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
   2981         return err;
   2982 }
...
   4246 static const struct proto_ops packet_ops_spkt = {
   4247         .family =       PF_PACKET,
...
   4250         .bind =         packet_bind_spkt,
...
   3022
   3023 static int packet_create(struct net *net, struct socket *sock,
int protocol,
   3024                          int kern)
...
   3045         if (sock->type == SOCK_PACKET)
   3046                 sock->ops = &packet_ops_spkt;
...

> Odd to have code in target_to_host_sockaddr and not in
> host_to_target_sockaddr.

In the case of host_to_target_sockaddr(), there is no "if (sa_family ==
AF_PACKET) {" as it was in target_to_host_sockaddr(), it's why I didn't
add it (and I don't like to add code I don't test).

> 
>> -       lladdr = (struct target_sockaddr_ll *)addr;
>> -       lladdr->sll_ifindex = tswap32(lladdr->sll_ifindex);
>> -       lladdr->sll_hatype = tswap16(lladdr->sll_hatype);
>> +            lladdr = (struct target_sockaddr_ll *)addr;
>> +            lladdr->sll_ifindex = tswap32(lladdr->sll_ifindex);
>> +            lladdr->sll_hatype = tswap16(lladdr->sll_hatype);
>> +        }
>>      }
>>      unlock_user(target_saddr, target_addr, 0);
>>
>> @@ -2509,7 +2519,12 @@ static abi_long do_socketcall(int num, abi_ulong vptr)
>>      /* now when we have the args, actually handle the call */
>>      switch (num) {
>>      case SOCKOP_socket: /* domain, type, protocol */
>> -        return do_socket(a[0], a[1], a[2]);
>> +        if (a[0] == AF_PACKET ||
>> +            a[1] == TARGET_SOCK_PACKET) {
>> +            return do_socket(a[0], a[1], tswap16(a[2]));
>> +        } else {
>> +            return do_socket(a[0], a[1], a[2]);
>> +        }
>>      case SOCKOP_bind: /* sockfd, addr, addrlen */
>>          return do_bind(a[0], a[1], a[2]);
>>      case SOCKOP_connect: /* sockfd, addr, addrlen */
>> @@ -7500,7 +7515,13 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
>>  #endif
>>  #ifdef TARGET_NR_socket
>>      case TARGET_NR_socket:
>> -        ret = do_socket(arg1, arg2, arg3);
>> +        if (arg1 == AF_PACKET ||
>> +            arg2 == TARGET_SOCK_PACKET) {
>> +            /* in this case, socket() needs a network endian short */
>> +            ret = do_socket(arg1, arg2, tswap16(arg3));
>> +        } else {
>> +            ret = do_socket(arg1, arg2, arg3);
>> +        }
> 
> This doesn't make sense to me. The argument to socket()
> is passed in via a register; so if the guest code correctly
> passes the protocol value as htons(whatever) then that will
> be the value in arg3 and we do not need to swap anything.
> 
> I see we had this discussion about the previous version of the
> patch too... and my argument that we don't need the tswap16
> in the socketcall code path either still makes sense to me.

I agree with you, I think I have confused socketcall() that passes
parameters by memory, and socket() that passes parameters by registers.
I will remove this part and resend a patch.

> 
>>          fd_trans_unregister(ret);
>>          break;
>>  #endif
>> --
>> 2.4.3
> 
> thanks
> -- PMM
> 

Thank you for your comments,
Laurent
Laurent Vivier Oct. 27, 2015, 10:47 a.m. UTC | #3
Le 27/10/2015 04:09, Laurent Vivier a écrit :
> 
> 
> Le 26/10/2015 15:40, Peter Maydell a écrit :
>> On 6 October 2015 at 18:11, Laurent Vivier <laurent@vivier.eu> wrote:
>>> This is obsolete, but if we want to use dhcp with some distros (like debian
>>> ppc 8.2 jessie), we need it.
>>>
>>> At the bind level, we are not able to know the socket type so we try to
>>> guess it by analyzing the name. We manage only the case "ethX",
>>> "ethX" in spk_device is similar to set htons(0x6574) in sll_protocol in the
>>> normal case, and as this protocol does not exist, it's ok.
>>>
>>> SOCK_PACKET uses network endian to encode protocol in socket()
>>>
>>> in PACKET(7) :
>>>                                  protocol is the  IEEE  802.3  protocol
>>> number in network order.  See the <linux/if_ether.h> include file for a
>>> list of allowed protocols.  When protocol is  set  to  htons(ETH_P_ALL)
>>> then all protocols are received.  All incoming packets of that protocol
>>> type will be passed to the packet socket before they are passed to  the
>>> protocols implemented in the kernel.
>>>
>>> Signed-off-by: Laurent Vivier <laurent@vivier.eu>
>>> ---
>>> This patch is a remix of an old patch sent in 2012:
>>> https://patchwork.ozlabs.org/patch/208892/
>>>
>>>  linux-user/syscall.c | 33 +++++++++++++++++++++++++++------
>>>  1 file changed, 27 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
>>> index 64be431..71cc1e2 100644
>>> --- a/linux-user/syscall.c
>>> +++ b/linux-user/syscall.c
>>> @@ -111,6 +111,7 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
>>>  #include <linux/route.h>
>>>  #include <linux/filter.h>
>>>  #include <linux/blkpg.h>
>>> +#include <linux/if_packet.h>
>>>  #include "linux_loop.h"
>>>  #include "uname.h"
>>>
>>> @@ -1198,11 +1199,20 @@ static inline abi_long target_to_host_sockaddr(struct sockaddr *addr,
>>>      memcpy(addr, target_saddr, len);
>>>      addr->sa_family = sa_family;
>>>      if (sa_family == AF_PACKET) {
>>> -       struct target_sockaddr_ll *lladdr;
>>> +        /* Manage an obsolete case :
>>> +         * if socket type is SOCK_PACKET, bind by name otherwise by index
>>> +         * but we are not able to know socket type, so check if the name
>>> +         * is usable...
>>> +         * see linux/net/packet/af_packet.c: packet_bind_spkt()
>>> +         */
>>> +        if (strncmp((char *)((struct sockaddr_pkt *)addr)->spkt_device,
>>> +                    "eth", 3) != 0) {
>>> +            struct target_sockaddr_ll *lladdr;
>>
>> This confuses me. The packet(7) manpage suggests there are two flavours
>> of packet socket:
>>  (1) legacy AF_INET + SOCK_PACKET
>>  (2) new style AF_PACKET + SOCK_RAW / SOCK_DGRAM
>>
>> but this comment suggests it's trying to handle AF_PACKET + SOCK_PACKET ?
> 
> In fact, I've started not from the man page, but from a non working dhcp
> client, originally with a m68k target and etch-m68k distro, and I've met
> again this problem on a ppc target and jessie distro.
> 
>>
>> If AF_PACKET was introduced as the new way of doing things, it's not
>> clear why it would be the family type used in the legacy approach's
>> sockaddr_pkt (though there seems to be code around that does this).
>> I suspect that 2.0 kernels just didn't check af_family here at all.
> 
> by digging into the code, I have found:
> 
> $ apt-get source isc-dhcp-client
> $ vi isc-dhcp-4.3.1/common/lpf.c
> 
> ...
>  72 int if_register_lpf (info)
>  73         struct interface_info *info;
>  74 {
> ...
>  79         if ((sock = socket(PF_PACKET, SOCK_PACKET,
>  80                            htons((short)ETH_P_ALL))) < 0) {
> ...
> 
> So we can see socket() is used with domain PF_PACKET and type SOCK_PACKET.
> 
> Next, the interface name is put into the sa_data of sockaddr, and bind()
> is used with AF_PACKET:
> 
>  94         /* Bind to the interface name */
>  95         memset (&sa, 0, sizeof sa);
>  96         sa.sa_family = AF_PACKET;
>  97         strncpy (sa.sa_data, (const char *)info -> ifp, sizeof
> sa.sa_data);
>  98         if (bind (sock, &sa, sizeof sa)) {
> 
> ifp is initialized from a list of all discovered interfaces in
> common/discover.c:
> ...
>  238 int
>  239 begin_iface_scan(struct iface_conf_list *ifaces) {
> ...
>  283         if (ioctl(ifaces->sock, SIOCGLIFCONF, &ifaces->conf) < 0) {
> ...
>  918 void
>  919 discover_interfaces(int state) {
> ...
>  924         struct interface_info *tmp;
> ...
>  939         if (!begin_iface_scan(&ifaces)) {
> ...
>  955         while (next_iface(&info, &err, &ifaces)) {
>  956
>  957                 /* See if we've seen an interface that matches this
> one. */
>  958                 for (tmp = interfaces; tmp; tmp = tmp->next) {
>  959                         if (!strcmp(tmp->name, info.name))
>  960                                 break;
>  961                 }
> 
> ...
>  987                         strcpy(tmp->name, info.name);
> ...
> 1050         }
> ...
> 1063         for (tmp = interfaces ; tmp != NULL ; tmp = tmp->next) {
> 1064                 if (tmp->ifp == NULL) {
> 1065                         struct ifreq *tif;
> 1066
> 1067                         tif = (struct ifreq *)dmalloc(sizeof(struct
> ifreq),
> 1068                                                       MDL);
> 1069                         if (tif == NULL)
> 1070                                 log_fatal("no space for ifp mockup.");
> 1071                         strcpy(tif->ifr_name, tmp->name);
> 1072                         tmp->ifp = tif;
> 1073                 }
> 1074         }
> 
>>
>> The code in the kernel's packet_recvmsg fills in the spkt_family
>> field with the netdevice's type field, which is an ARPHRD_* constant
>> as far as I can tell (I could well be wrong here).
> 
> kernel 4.3.0-rc3, net/packet/af_packet.c:
> 
>    2961
>    2962 static int packet_bind_spkt(struct socket *sock, struct sockaddr
> *uaddr,
>    2963                             int addr_len)
>    2964 {
>    2965         struct sock *sk = sock->sk;
>    2966         char name[15];
>    2967         struct net_device *dev;
>    2968         int err = -ENODEV;
>    2969
>    2970         /*
>    2971          *      Check legality
>    2972          */
>    2973
>    2974         if (addr_len != sizeof(struct sockaddr))
>    2975                 return -EINVAL;
>    2976         strlcpy(name, uaddr->sa_data, sizeof(name));
>    2977
>    2978         dev = dev_get_by_name(sock_net(sk), name);
>    2979         if (dev)
>    2980                 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
>    2981         return err;
>    2982 }
> ...
>    4246 static const struct proto_ops packet_ops_spkt = {
>    4247         .family =       PF_PACKET,
> ...
>    4250         .bind =         packet_bind_spkt,
> ...
>    3022
>    3023 static int packet_create(struct net *net, struct socket *sock,
> int protocol,
>    3024                          int kern)
> ...
>    3045         if (sock->type == SOCK_PACKET)
>    3046                 sock->ops = &packet_ops_spkt;
> ...
> 
>> Odd to have code in target_to_host_sockaddr and not in
>> host_to_target_sockaddr.
> 
> In the case of host_to_target_sockaddr(), there is no "if (sa_family ==
> AF_PACKET) {" as it was in target_to_host_sockaddr(), it's why I didn't
> add it (and I don't like to add code I don't test).
> 
>>
>>> -       lladdr = (struct target_sockaddr_ll *)addr;
>>> -       lladdr->sll_ifindex = tswap32(lladdr->sll_ifindex);
>>> -       lladdr->sll_hatype = tswap16(lladdr->sll_hatype);
>>> +            lladdr = (struct target_sockaddr_ll *)addr;
>>> +            lladdr->sll_ifindex = tswap32(lladdr->sll_ifindex);
>>> +            lladdr->sll_hatype = tswap16(lladdr->sll_hatype);
>>> +        }
>>>      }
>>>      unlock_user(target_saddr, target_addr, 0);
>>>
>>> @@ -2509,7 +2519,12 @@ static abi_long do_socketcall(int num, abi_ulong vptr)
>>>      /* now when we have the args, actually handle the call */
>>>      switch (num) {
>>>      case SOCKOP_socket: /* domain, type, protocol */
>>> -        return do_socket(a[0], a[1], a[2]);
>>> +        if (a[0] == AF_PACKET ||
>>> +            a[1] == TARGET_SOCK_PACKET) {
>>> +            return do_socket(a[0], a[1], tswap16(a[2]));
>>> +        } else {
>>> +            return do_socket(a[0], a[1], a[2]);
>>> +        }
>>>      case SOCKOP_bind: /* sockfd, addr, addrlen */
>>>          return do_bind(a[0], a[1], a[2]);
>>>      case SOCKOP_connect: /* sockfd, addr, addrlen */
>>> @@ -7500,7 +7515,13 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
>>>  #endif
>>>  #ifdef TARGET_NR_socket
>>>      case TARGET_NR_socket:
>>> -        ret = do_socket(arg1, arg2, arg3);
>>> +        if (arg1 == AF_PACKET ||
>>> +            arg2 == TARGET_SOCK_PACKET) {
>>> +            /* in this case, socket() needs a network endian short */
>>> +            ret = do_socket(arg1, arg2, tswap16(arg3));
>>> +        } else {
>>> +            ret = do_socket(arg1, arg2, arg3);
>>> +        }
>>
>> This doesn't make sense to me. The argument to socket()
>> is passed in via a register; so if the guest code correctly
>> passes the protocol value as htons(whatever) then that will
>> be the value in arg3 and we do not need to swap anything.
>>
>> I see we had this discussion about the previous version of the
>> patch too... and my argument that we don't need the tswap16
>> in the socketcall code path either still makes sense to me.
> 
> I agree with you, I think I have confused socketcall() that passes
> parameters by memory, and socket() that passes parameters by registers.
> I will remove this part and resend a patch.

And for the socketcall part, we need the tswap16():

for instance,

    int a = htons(0x0003);

On a LE host:

    a = 0x00000300

On a BE host:

    a = 0x00000003

If the guest is BE, it will put in memory:

    0x00 0x00 0x00 0x03

Then a LE host, will read:

    int b = 0x03000000

but get_user_ual() in do_socketcall() will byte-swap it and put
0x00000003 in a[2].

so without the byte-swap, we call do_socket(..., 0x0003),
whereas the syscall is waiting for htons(0x0003) -> 0x0300 as we are on
LE host.

I'm sorry, I can't explain that better...

>>
>>>          fd_trans_unregister(ret);
>>>          break;
>>>  #endif
>>> --
>>> 2.4.3
>>
>> thanks
>> -- PMM
>>
> 
> Thank you for your comments,
> Laurent
>
Peter Maydell Oct. 27, 2015, 11:35 a.m. UTC | #4
On 27 October 2015 at 10:47, Laurent Vivier <laurent@vivier.eu> wrote:
> And for the socketcall part, we need the tswap16():
>
> for instance,
>
>     int a = htons(0x0003);
>
> On a LE host:
>
>     a = 0x00000300
>
> On a BE host:
>
>     a = 0x00000003
>
> If the guest is BE, it will put in memory:
>
>     0x00 0x00 0x00 0x03
>
> Then a LE host, will read:
>
>     int b = 0x03000000
>
> but get_user_ual() in do_socketcall() will byte-swap it and put
> 0x00000003 in a[2].
>
> so without the byte-swap, we call do_socket(..., 0x0003),
> whereas the syscall is waiting for htons(0x0003) -> 0x0300 as we are on
> LE host.

So, I thought through this this morning, and I think the swapping
issues here are not specific to socketcall. If the socket syscall
ABI requires an argument of "htons(3)", then this is actually
a *different* ABI for BE vs LE systems. On a BE system this is
asking for "3", but on LE it is asking for "0x300". (Argument
is generally passed in a register.) So we need to be able to tell
when the host kernel wants this sort of difference and fix it up.

For socketcall, the current swapping we have will correctly pass
the value the user wrote into the array-of-longs into the syscall,
because if the value to be passed is 0x11223344 (assume 32-bit long),
for BE guest LE host we have:
 in register 0x11223344
 in memory 0x11 0x22 0x33 0x44
 byteswapped back by get_user_ual: 0x11223344
and for LE guest LE host:
 in register 0x11223344
 in memory 0x44 0x33 0x22 0x11
 read back by get_user_ual: 0x11223344
But we still have the same issue that if the guest believes the
kernel wants a value of 0x3 but in fact it wants 0x300 we need to
fix things up.

So the fix needs to go into do_socket(), and it needs to be
specific to the PF*/SOCK* values that indicate socket types
that want a network-order-16-bit value, which I think is
 (domain == AF_PACKET || (domain == AF_INET && type == SOCK_PACKET))

(this is pretty close to what your patch had to start with,
so apologies for taking a while to work through it. Endianness
always confuses me...)

Still thinking about the other part of your patch, because
"does this start with 'eth'" is not very pretty...

thanks
-- PMM
Peter Maydell Oct. 27, 2015, 11:39 a.m. UTC | #5
On 27 October 2015 at 11:35, Peter Maydell <peter.maydell@linaro.org> wrote:
> Still thinking about the other part of your patch, because
> "does this start with 'eth'" is not very pretty...

...can we use the TargetFdTrans machinery from your signalfd
patch to associate custom sockaddr conversion functions with
a file descriptor if it's created via socket(PF_PACKET, SOCK_PACKET, ...)?

thanks
-- PMM
Laurent Vivier Oct. 27, 2015, 11:49 a.m. UTC | #6
Le 27/10/2015 12:39, Peter Maydell a écrit :
> On 27 October 2015 at 11:35, Peter Maydell <peter.maydell@linaro.org> wrote:
>> Still thinking about the other part of your patch, because
>> "does this start with 'eth'" is not very pretty...

I agree with you, but I didn't find better.

> ...can we use the TargetFdTrans machinery from your signalfd
> patch to associate custom sockaddr conversion functions with
> a file descriptor if it's created via socket(PF_PACKET, SOCK_PACKET, ...)?

It was my first idea, but it didn't fit well: this machinery is to
translate data (recv/send), not the parameters, but as you say we can
add some functions in the structure. I'll have a look at this.

Laurent
Peter Maydell Oct. 27, 2015, 11:50 a.m. UTC | #7
On 27 October 2015 at 03:09, Laurent Vivier <laurent@vivier.eu> wrote:
> Le 26/10/2015 15:40, Peter Maydell a écrit :
>> This confuses me. The packet(7) manpage suggests there are two flavours
>> of packet socket:
>>  (1) legacy AF_INET + SOCK_PACKET
>>  (2) new style AF_PACKET + SOCK_RAW / SOCK_DGRAM
>>
>> but this comment suggests it's trying to handle AF_PACKET + SOCK_PACKET ?
>
> In fact, I've started not from the man page, but from a non working dhcp
> client, originally with a m68k target and etch-m68k distro, and I've met
> again this problem on a ppc target and jessie distro.

> kernel 4.3.0-rc3, net/packet/af_packet.c:
>
>    2961
>    2962 static int packet_bind_spkt(struct socket *sock, struct sockaddr
> *uaddr,
>    2963                             int addr_len)
>    2964 {
>    2965         struct sock *sk = sock->sk;
>    2966         char name[15];
>    2967         struct net_device *dev;
>    2968         int err = -ENODEV;
>    2969
>    2970         /*
>    2971          *      Check legality
>    2972          */
>    2973
>    2974         if (addr_len != sizeof(struct sockaddr))
>    2975                 return -EINVAL;
>    2976         strlcpy(name, uaddr->sa_data, sizeof(name));
>    2977
>    2978         dev = dev_get_by_name(sock_net(sk), name);
>    2979         if (dev)
>    2980                 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
>    2981         return err;
>    2982 }
> ...
>    4246 static const struct proto_ops packet_ops_spkt = {
>    4247         .family =       PF_PACKET,
> ...
>    4250         .bind =         packet_bind_spkt,
> ...
>    3022
>    3023 static int packet_create(struct net *net, struct socket *sock,
> int protocol,
>    3024                          int kern)
> ...
>    3045         if (sock->type == SOCK_PACKET)
>    3046                 sock->ops = &packet_ops_spkt;
> ...

Yes, but also:
http://lxr.free-electrons.com/source/net/socket.c#L1109

1114         if (family == PF_INET && type == SOCK_PACKET) {
1115                 static int warned;
1116                 if (!warned) {
1117                         warned = 1;
1118                         pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
1119                                 current->comm);
1120                 }
1121                 family = PF_PACKET;
1122         }

So I think my conclusion is:
 * Original 2.0 kernels used PF_INET + SOCK_PACKET
 * When the non-legacy stuff was added and this compat warning
   came in, the (accidental?) result was that PF_PACKET + SOCK_PACKET
   gave a warning, PF_PACKET + SOCK_PACKET gave legacy behaviour
   without a warning, and PF_PACKET + SOCK_RAW/SOCK_DGRAM gave
   the new interface
 * Some userspace programs responded not by updating to the new
   non-legacy interface, but by moving to PF_PACKET + SOCK_PACKET
   which just suppresses the warning

So we should special case both PF_INET + SOCK_PACKET and
PF_PACKET + SOCK_PACKET (but not any other PF_* + SOCK_PACKET).

thanks
-- PMM
Peter Maydell Oct. 27, 2015, 11:52 a.m. UTC | #8
On 27 October 2015 at 11:49, Laurent Vivier <laurent@vivier.eu> wrote:
>
>
> Le 27/10/2015 12:39, Peter Maydell a écrit :
>> On 27 October 2015 at 11:35, Peter Maydell <peter.maydell@linaro.org> wrote:
>>> Still thinking about the other part of your patch, because
>>> "does this start with 'eth'" is not very pretty...
>
> I agree with you, but I didn't find better.
>
>> ...can we use the TargetFdTrans machinery from your signalfd
>> patch to associate custom sockaddr conversion functions with
>> a file descriptor if it's created via socket(PF_PACKET, SOCK_PACKET, ...)?
>
> It was my first idea, but it didn't fit well: this machinery is to
> translate data (recv/send), not the parameters, but as you say we can
> add some functions in the structure. I'll have a look at this.

Yes, now we have a generic "special case magic for an fd" we can
start to use it for a wider set of things.

You might want to separate the "fix the protocol value in socket()
calls" change into a different patch from the "fix the sockaddr
conversion" change (since the former is easy to review now) --
up to you.

thanks
-- PMM
Laurent Vivier Oct. 27, 2015, 11:54 a.m. UTC | #9
Le 27/10/2015 12:35, Peter Maydell a écrit :
> On 27 October 2015 at 10:47, Laurent Vivier <laurent@vivier.eu> wrote:
>> And for the socketcall part, we need the tswap16():
>>
>> for instance,
>>
>>     int a = htons(0x0003);
>>
>> On a LE host:
>>
>>     a = 0x00000300
>>
>> On a BE host:
>>
>>     a = 0x00000003
>>
>> If the guest is BE, it will put in memory:
>>
>>     0x00 0x00 0x00 0x03
>>
>> Then a LE host, will read:
>>
>>     int b = 0x03000000
>>
>> but get_user_ual() in do_socketcall() will byte-swap it and put
>> 0x00000003 in a[2].
>>
>> so without the byte-swap, we call do_socket(..., 0x0003),
>> whereas the syscall is waiting for htons(0x0003) -> 0x0300 as we are on
>> LE host.
> 
> So, I thought through this this morning, and I think the swapping
> issues here are not specific to socketcall. If the socket syscall
> ABI requires an argument of "htons(3)", then this is actually
> a *different* ABI for BE vs LE systems. On a BE system this is
> asking for "3", but on LE it is asking for "0x300". (Argument
> is generally passed in a register.) So we need to be able to tell
> when the host kernel wants this sort of difference and fix it up.
> 
> For socketcall, the current swapping we have will correctly pass
> the value the user wrote into the array-of-longs into the syscall,
> because if the value to be passed is 0x11223344 (assume 32-bit long),
> for BE guest LE host we have:
>  in register 0x11223344
>  in memory 0x11 0x22 0x33 0x44
>  byteswapped back by get_user_ual: 0x11223344
> and for LE guest LE host:
>  in register 0x11223344
>  in memory 0x44 0x33 0x22 0x11
>  read back by get_user_ual: 0x11223344
> But we still have the same issue that if the guest believes the
> kernel wants a value of 0x3 but in fact it wants 0x300 we need to
> fix things up.
> 
> So the fix needs to go into do_socket(), and it needs to be
> specific to the PF*/SOCK* values that indicate socket types
> that want a network-order-16-bit value, which I think is
>  (domain == AF_PACKET || (domain == AF_INET && type == SOCK_PACKET))

OK, I will try with my use case.

> 
> (this is pretty close to what your patch had to start with,
> so apologies for taking a while to work through it. Endianness
> always confuses me...)

No problem, It tooks me 3 years to explain that correctly :) ...

> Still thinking about the other part of your patch, because
> "does this start with 'eth'" is not very pretty...

I agree but I didn't find a better way...

Laurent
Laurent Vivier Oct. 27, 2015, 11:54 a.m. UTC | #10
Le 27/10/2015 12:50, Peter Maydell a écrit :
> On 27 October 2015 at 03:09, Laurent Vivier <laurent@vivier.eu> wrote:
>> Le 26/10/2015 15:40, Peter Maydell a écrit :
>>> This confuses me. The packet(7) manpage suggests there are two flavours
>>> of packet socket:
>>>  (1) legacy AF_INET + SOCK_PACKET
>>>  (2) new style AF_PACKET + SOCK_RAW / SOCK_DGRAM
>>>
>>> but this comment suggests it's trying to handle AF_PACKET + SOCK_PACKET ?
>>
>> In fact, I've started not from the man page, but from a non working dhcp
>> client, originally with a m68k target and etch-m68k distro, and I've met
>> again this problem on a ppc target and jessie distro.
> 
>> kernel 4.3.0-rc3, net/packet/af_packet.c:
>>
>>    2961
>>    2962 static int packet_bind_spkt(struct socket *sock, struct sockaddr
>> *uaddr,
>>    2963                             int addr_len)
>>    2964 {
>>    2965         struct sock *sk = sock->sk;
>>    2966         char name[15];
>>    2967         struct net_device *dev;
>>    2968         int err = -ENODEV;
>>    2969
>>    2970         /*
>>    2971          *      Check legality
>>    2972          */
>>    2973
>>    2974         if (addr_len != sizeof(struct sockaddr))
>>    2975                 return -EINVAL;
>>    2976         strlcpy(name, uaddr->sa_data, sizeof(name));
>>    2977
>>    2978         dev = dev_get_by_name(sock_net(sk), name);
>>    2979         if (dev)
>>    2980                 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
>>    2981         return err;
>>    2982 }
>> ...
>>    4246 static const struct proto_ops packet_ops_spkt = {
>>    4247         .family =       PF_PACKET,
>> ...
>>    4250         .bind =         packet_bind_spkt,
>> ...
>>    3022
>>    3023 static int packet_create(struct net *net, struct socket *sock,
>> int protocol,
>>    3024                          int kern)
>> ...
>>    3045         if (sock->type == SOCK_PACKET)
>>    3046                 sock->ops = &packet_ops_spkt;
>> ...
> 
> Yes, but also:
> http://lxr.free-electrons.com/source/net/socket.c#L1109
> 
> 1114         if (family == PF_INET && type == SOCK_PACKET) {
> 1115                 static int warned;
> 1116                 if (!warned) {
> 1117                         warned = 1;
> 1118                         pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n",
> 1119                                 current->comm);
> 1120                 }
> 1121                 family = PF_PACKET;
> 1122         }
> 
> So I think my conclusion is:
>  * Original 2.0 kernels used PF_INET + SOCK_PACKET
>  * When the non-legacy stuff was added and this compat warning
>    came in, the (accidental?) result was that PF_PACKET + SOCK_PACKET
>    gave a warning, PF_PACKET + SOCK_PACKET gave legacy behaviour
>    without a warning, and PF_PACKET + SOCK_RAW/SOCK_DGRAM gave
>    the new interface
>  * Some userspace programs responded not by updating to the new
>    non-legacy interface, but by moving to PF_PACKET + SOCK_PACKET
>    which just suppresses the warning
> 
> So we should special case both PF_INET + SOCK_PACKET and
> PF_PACKET + SOCK_PACKET (but not any other PF_* + SOCK_PACKET).

I'll try that too...

Laurent
Laurent Vivier Oct. 27, 2015, 11:56 a.m. UTC | #11
Le 27/10/2015 12:52, Peter Maydell a écrit :
> On 27 October 2015 at 11:49, Laurent Vivier <laurent@vivier.eu> wrote:
>>
>>
>> Le 27/10/2015 12:39, Peter Maydell a écrit :
>>> On 27 October 2015 at 11:35, Peter Maydell <peter.maydell@linaro.org> wrote:
>>>> Still thinking about the other part of your patch, because
>>>> "does this start with 'eth'" is not very pretty...
>>
>> I agree with you, but I didn't find better.
>>
>>> ...can we use the TargetFdTrans machinery from your signalfd
>>> patch to associate custom sockaddr conversion functions with
>>> a file descriptor if it's created via socket(PF_PACKET, SOCK_PACKET, ...)?
>>
>> It was my first idea, but it didn't fit well: this machinery is to
>> translate data (recv/send), not the parameters, but as you say we can
>> add some functions in the structure. I'll have a look at this.
> 
> Yes, now we have a generic "special case magic for an fd" we can
> start to use it for a wider set of things.
> 
> You might want to separate the "fix the protocol value in socket()
> calls" change into a different patch from the "fix the sockaddr
> conversion" change (since the former is easy to review now) --
> up to you.

Lets go for two patches...

Laurent
diff mbox

Patch

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 64be431..71cc1e2 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -111,6 +111,7 @@  int __clone2(int (*fn)(void *), void *child_stack_base,
 #include <linux/route.h>
 #include <linux/filter.h>
 #include <linux/blkpg.h>
+#include <linux/if_packet.h>
 #include "linux_loop.h"
 #include "uname.h"
 
@@ -1198,11 +1199,20 @@  static inline abi_long target_to_host_sockaddr(struct sockaddr *addr,
     memcpy(addr, target_saddr, len);
     addr->sa_family = sa_family;
     if (sa_family == AF_PACKET) {
-	struct target_sockaddr_ll *lladdr;
+        /* Manage an obsolete case :
+         * if socket type is SOCK_PACKET, bind by name otherwise by index
+         * but we are not able to know socket type, so check if the name
+         * is usable...
+         * see linux/net/packet/af_packet.c: packet_bind_spkt()
+         */
+        if (strncmp((char *)((struct sockaddr_pkt *)addr)->spkt_device,
+                    "eth", 3) != 0) {
+            struct target_sockaddr_ll *lladdr;
 
-	lladdr = (struct target_sockaddr_ll *)addr;
-	lladdr->sll_ifindex = tswap32(lladdr->sll_ifindex);
-	lladdr->sll_hatype = tswap16(lladdr->sll_hatype);
+            lladdr = (struct target_sockaddr_ll *)addr;
+            lladdr->sll_ifindex = tswap32(lladdr->sll_ifindex);
+            lladdr->sll_hatype = tswap16(lladdr->sll_hatype);
+        }
     }
     unlock_user(target_saddr, target_addr, 0);
 
@@ -2509,7 +2519,12 @@  static abi_long do_socketcall(int num, abi_ulong vptr)
     /* now when we have the args, actually handle the call */
     switch (num) {
     case SOCKOP_socket: /* domain, type, protocol */
-        return do_socket(a[0], a[1], a[2]);
+        if (a[0] == AF_PACKET ||
+            a[1] == TARGET_SOCK_PACKET) {
+            return do_socket(a[0], a[1], tswap16(a[2]));
+        } else {
+            return do_socket(a[0], a[1], a[2]);
+        }
     case SOCKOP_bind: /* sockfd, addr, addrlen */
         return do_bind(a[0], a[1], a[2]);
     case SOCKOP_connect: /* sockfd, addr, addrlen */
@@ -7500,7 +7515,13 @@  abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
 #endif
 #ifdef TARGET_NR_socket
     case TARGET_NR_socket:
-        ret = do_socket(arg1, arg2, arg3);
+        if (arg1 == AF_PACKET ||
+            arg2 == TARGET_SOCK_PACKET) {
+            /* in this case, socket() needs a network endian short */
+            ret = do_socket(arg1, arg2, tswap16(arg3));
+        } else {
+            ret = do_socket(arg1, arg2, arg3);
+        }
         fd_trans_unregister(ret);
         break;
 #endif