diff mbox series

[ovs-dev,no-slow,2/6] ofproto-dpif: Reorganize upcall handling.

Message ID 1513895115-35879-2-git-send-email-jpettit@ovn.org
State Superseded
Headers show
Series [ovs-dev,no-slow,1/6] ofproto-dpif: Add ability to look up an ofproto by UUID. | expand

Commit Message

Justin Pettit Dec. 21, 2017, 10:25 p.m. UTC
- This reduces the number of times upcall cookies are processed.
    - It separate true miss calls from slow-path actions.

The reorganization will also be useful for a future commit.

Signed-off-by: Justin Pettit <jpettit@ovn.org>
---
 ofproto/ofproto-dpif-upcall.c | 91 +++++++++++++++++++++----------------------
 1 file changed, 45 insertions(+), 46 deletions(-)

Comments

Gregory Rose Dec. 28, 2017, 11:22 p.m. UTC | #1
On 12/21/2017 2:25 PM, Justin Pettit wrote:
>      - This reduces the number of times upcall cookies are processed.
>      - It separate true miss calls from slow-path actions.
>
> The reorganization will also be useful for a future commit.
>
> Signed-off-by: Justin Pettit <jpettit@ovn.org>

Justin,

The problem I'm seeing arises when this patch is applied.  I'm puzzled 
though because if
I create a script to do the exact same thing as the test I see no problems.

Here is my script:

ovs-ofctl add-flow br0 "actions=normal"
ip netns add at_ns0
ip netns add at_ns1
ip link add p0 type veth peer name ovs-p0
ip link set p0 netns at_ns0
ip link set dev ovs-p0 up
ovs-vsctl add-port br0 ovs-p0 -- set interface ovs-p0 
external-ids:iface-id="p0"
ip netns exec at_ns0 ip addr add 10.1.1.1/24 dev p0
ip netns exec at_ns0 ip link set dev p0 up
ip link add p1 type veth peer name ovs-p1
ip link set p1 netns at_ns1
ip link set dev ovs-p1 up
ovs-vsctl add-port br0 ovs-p1 -- set interface ovs-p1 
external-ids:iface-id="p1"
ip netns exec at_ns1 ip addr add 10.1.1.2/24 dev p1
ip netns exec at_ns1 ip link set dev p1 up
ip netns exec at_ns0 ping -q -c 3 -i 0.3 -w 2 10.1.1.2
ip netns exec at_ns0 ping -s 1600 -q -c 3 -i 0.3 -w 2 10.1.1.2
ip netns exec at_ns0 ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.2
ovs-vsctl del-port br0 ovs-p1
ovs-vsctl del-port br0 ovs-p0
ip netns del at_ns0
ip netns del at_ns1
ovs-ofctl del-flows br0

SFAICT it emulates exactly what the system-traffic.at test 001 does.  
And it works fine... /shrug.

What distribution, kernel, etc are you using for your check-kmod 
testing?  I'll try to emulate that
exactly and then see if I can get similar results.

Thanks,

- Greg


> ---
>   ofproto/ofproto-dpif-upcall.c | 91 +++++++++++++++++++++----------------------
>   1 file changed, 45 insertions(+), 46 deletions(-)
>
> diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
> index 02cf5415bc3d..46b75fe35a2b 100644
> --- a/ofproto/ofproto-dpif-upcall.c
> +++ b/ofproto/ofproto-dpif-upcall.c
> @@ -183,6 +183,7 @@ struct udpif {
>   enum upcall_type {
>       BAD_UPCALL,                 /* Some kind of bug somewhere. */
>       MISS_UPCALL,                /* A flow miss.  */
> +    SLOW_PATH_UPCALL,           /* Slow path upcall.  */
>       SFLOW_UPCALL,               /* sFlow sample. */
>       FLOW_SAMPLE_UPCALL,         /* Per-flow sampling. */
>       IPFIX_UPCALL                /* Per-bridge sampling. */
> @@ -210,8 +211,7 @@ struct upcall {
>       uint16_t mru;                  /* If !0, Maximum receive unit of
>                                         fragmented IP packet */
>   
> -    enum dpif_upcall_type type;    /* Datapath type of the upcall. */
> -    const struct nlattr *userdata; /* Userdata for DPIF_UC_ACTION Upcalls. */
> +    enum upcall_type type;         /* Type of the upcall. */
>       const struct nlattr *actions;  /* Flow actions in DPIF_UC_ACTION Upcalls. */
>   
>       bool xout_initialized;         /* True if 'xout' must be uninitialized. */
> @@ -235,6 +235,8 @@ struct upcall {
>       size_t key_len;                /* Datapath flow key length. */
>       const struct nlattr *out_tun_key;  /* Datapath output tunnel key. */
>   
> +    union user_action_cookie cookie;
> +
>       uint64_t odp_actions_stub[1024 / 8]; /* Stub for odp_actions. */
>   };
>   
> @@ -367,7 +369,8 @@ static int ukey_acquire(struct udpif *, const struct dpif_flow *,
>   static void ukey_delete__(struct udpif_key *);
>   static void ukey_delete(struct umap *, struct udpif_key *);
>   static enum upcall_type classify_upcall(enum dpif_upcall_type type,
> -                                        const struct nlattr *userdata);
> +                                        const struct nlattr *userdata,
> +                                        union user_action_cookie *cookie);
>   
>   static void put_op_init(struct ukey_op *op, struct udpif_key *ukey,
>                           enum dpif_flow_put_flags flags);
> @@ -813,7 +816,6 @@ recv_upcalls(struct handler *handler)
>   
>           upcall->key = dupcall->key;
>           upcall->key_len = dupcall->key_len;
> -        upcall->ufid = &dupcall->ufid;
>   
>           upcall->out_tun_key = dupcall->out_tun_key;
>           upcall->actions = dupcall->actions;
> @@ -969,11 +971,13 @@ udpif_revalidator(void *arg)
>   }
>   
>   static enum upcall_type
> -classify_upcall(enum dpif_upcall_type type, const struct nlattr *userdata)
> +classify_upcall(enum dpif_upcall_type type, const struct nlattr *userdata,
> +                union user_action_cookie *cookie)
>   {
> -    union user_action_cookie cookie;
>       size_t userdata_len;
>   
> +    memset(cookie, 0, sizeof *cookie);
> +
>       /* First look at the upcall type. */
>       switch (type) {
>       case DPIF_UC_ACTION:
> @@ -994,29 +998,30 @@ classify_upcall(enum dpif_upcall_type type, const struct nlattr *userdata)
>           return BAD_UPCALL;
>       }
>       userdata_len = nl_attr_get_size(userdata);
> -    if (userdata_len < sizeof cookie.type
> -        || userdata_len > sizeof cookie) {
> +    if (userdata_len < sizeof cookie->type
> +        || userdata_len > sizeof *cookie) {
>           VLOG_WARN_RL(&rl, "action upcall cookie has unexpected size %"PRIuSIZE,
>                        userdata_len);
>           return BAD_UPCALL;
>       }
> -    memset(&cookie, 0, sizeof cookie);
> -    memcpy(&cookie, nl_attr_get(userdata), userdata_len);
> -    if (userdata_len == MAX(8, sizeof cookie.sflow)
> -        && cookie.type == USER_ACTION_COOKIE_SFLOW) {
> +
> +    memcpy(cookie, nl_attr_get(userdata), userdata_len);
> +
> +    if (userdata_len == MAX(8, sizeof cookie->sflow)
> +        && cookie->type == USER_ACTION_COOKIE_SFLOW) {
>           return SFLOW_UPCALL;
> -    } else if (userdata_len == MAX(8, sizeof cookie.slow_path)
> -               && cookie.type == USER_ACTION_COOKIE_SLOW_PATH) {
> -        return MISS_UPCALL;
> -    } else if (userdata_len == MAX(8, sizeof cookie.flow_sample)
> -               && cookie.type == USER_ACTION_COOKIE_FLOW_SAMPLE) {
> +    } else if (userdata_len == MAX(8, sizeof cookie->slow_path)
> +               && cookie->type == USER_ACTION_COOKIE_SLOW_PATH) {
> +        return SLOW_PATH_UPCALL;
> +    } else if (userdata_len == MAX(8, sizeof cookie->flow_sample)
> +               && cookie->type == USER_ACTION_COOKIE_FLOW_SAMPLE) {
>           return FLOW_SAMPLE_UPCALL;
> -    } else if (userdata_len == MAX(8, sizeof cookie.ipfix)
> -               && cookie.type == USER_ACTION_COOKIE_IPFIX) {
> +    } else if (userdata_len == MAX(8, sizeof cookie->ipfix)
> +               && cookie->type == USER_ACTION_COOKIE_IPFIX) {
>           return IPFIX_UPCALL;
>       } else {
>           VLOG_WARN_RL(&rl, "invalid user cookie of type %"PRIu16
> -                     " and size %"PRIuSIZE, cookie.type, userdata_len);
> +                     " and size %"PRIuSIZE, cookie->type, userdata_len);
>           return BAD_UPCALL;
>       }
>   }
> @@ -1078,6 +1083,11 @@ upcall_receive(struct upcall *upcall, const struct dpif_backer *backer,
>   {
>       int error;
>   
> +    upcall->type = classify_upcall(type, userdata, &upcall->cookie);
> +    if (upcall->type == BAD_UPCALL) {
> +        return EAGAIN;
> +    }
> +
>       error = xlate_lookup(backer, flow, &upcall->ofproto, &upcall->ipfix,
>                            &upcall->sflow, NULL, &upcall->in_port);
>       if (error) {
> @@ -1090,8 +1100,6 @@ upcall_receive(struct upcall *upcall, const struct dpif_backer *backer,
>       upcall->packet = packet;
>       upcall->ufid = ufid;
>       upcall->pmd_id = pmd_id;
> -    upcall->type = type;
> -    upcall->userdata = userdata;
>       ofpbuf_use_stub(&upcall->odp_actions, upcall->odp_actions_stub,
>                       sizeof upcall->odp_actions_stub);
>       ofpbuf_init(&upcall->put_actions, 0);
> @@ -1127,7 +1135,7 @@ upcall_xlate(struct udpif *udpif, struct upcall *upcall,
>                     upcall->flow, upcall->in_port, NULL,
>                     stats.tcp_flags, upcall->packet, wc, odp_actions);
>   
> -    if (upcall->type == DPIF_UC_MISS) {
> +    if (upcall->type == MISS_UPCALL) {
>           xin.resubmit_stats = &stats;
>   
>           if (xin.frozen_state) {
> @@ -1176,7 +1184,7 @@ upcall_xlate(struct udpif *udpif, struct upcall *upcall,
>       /* This function is also called for slow-pathed flows.  As we are only
>        * going to create new datapath flows for actual datapath misses, there is
>        * no point in creating a ukey otherwise. */
> -    if (upcall->type == DPIF_UC_MISS) {
> +    if (upcall->type == MISS_UPCALL) {
>           upcall->ukey = ukey_create_from_upcall(upcall, wc);
>       }
>   }
> @@ -1212,7 +1220,7 @@ should_install_flow(struct udpif *udpif, struct upcall *upcall)
>   {
>       unsigned int flow_limit;
>   
> -    if (upcall->type != DPIF_UC_MISS) {
> +    if (upcall->type != MISS_UPCALL) {
>           return false;
>       } else if (upcall->recirc && !upcall->have_recirc_ref) {
>           VLOG_DBG_RL(&rl, "upcall: no reference for recirc flow");
> @@ -1325,6 +1333,7 @@ dpif_read_actions(struct udpif *udpif, struct upcall *upcall,
>           break;
>       case BAD_UPCALL:
>       case MISS_UPCALL:
> +    case SLOW_PATH_UPCALL:
>       default:
>           break;
>       }
> @@ -1336,53 +1345,46 @@ static int
>   process_upcall(struct udpif *udpif, struct upcall *upcall,
>                  struct ofpbuf *odp_actions, struct flow_wildcards *wc)
>   {
> -    const struct nlattr *userdata = upcall->userdata;
>       const struct dp_packet *packet = upcall->packet;
>       const struct flow *flow = upcall->flow;
>       size_t actions_len = 0;
> -    enum upcall_type upcall_type = classify_upcall(upcall->type, userdata);
>   
> -    switch (upcall_type) {
> +    switch (upcall->type) {
>       case MISS_UPCALL:
> +    case SLOW_PATH_UPCALL:
>           upcall_xlate(udpif, upcall, odp_actions, wc);
>           return 0;
>   
>       case SFLOW_UPCALL:
>           if (upcall->sflow) {
> -            union user_action_cookie cookie;
>               struct dpif_sflow_actions sflow_actions;
>   
>               memset(&sflow_actions, 0, sizeof sflow_actions);
> -            memset(&cookie, 0, sizeof cookie);
> -            memcpy(&cookie, nl_attr_get(userdata), sizeof cookie.sflow);
>   
> -            actions_len = dpif_read_actions(udpif, upcall, flow, upcall_type,
> -                                            &sflow_actions);
> +            actions_len = dpif_read_actions(udpif, upcall, flow,
> +                                            upcall->type, &sflow_actions);
>               dpif_sflow_received(upcall->sflow, packet, flow,
> -                                flow->in_port.odp_port, &cookie,
> +                                flow->in_port.odp_port, &upcall->cookie,
>                                   actions_len > 0 ? &sflow_actions : NULL);
>           }
>           break;
>   
>       case IPFIX_UPCALL:
>           if (upcall->ipfix) {
> -            union user_action_cookie cookie;
>               struct flow_tnl output_tunnel_key;
>               struct dpif_ipfix_actions ipfix_actions;
>   
> -            memset(&cookie, 0, sizeof cookie);
> -            memcpy(&cookie, nl_attr_get(userdata), sizeof cookie.ipfix);
>               memset(&ipfix_actions, 0, sizeof ipfix_actions);
>   
>               if (upcall->out_tun_key) {
>                   odp_tun_key_from_attr(upcall->out_tun_key, &output_tunnel_key);
>               }
>   
> -            actions_len = dpif_read_actions(udpif, upcall, flow, upcall_type,
> -                                            &ipfix_actions);
> +            actions_len = dpif_read_actions(udpif, upcall, flow,
> +                                            upcall->type, &ipfix_actions);
>               dpif_ipfix_bridge_sample(upcall->ipfix, packet, flow,
>                                        flow->in_port.odp_port,
> -                                     cookie.ipfix.output_odp_port,
> +                                     upcall->cookie.ipfix.output_odp_port,
>                                        upcall->out_tun_key ?
>                                            &output_tunnel_key : NULL,
>                                        actions_len > 0 ? &ipfix_actions: NULL);
> @@ -1391,24 +1393,21 @@ process_upcall(struct udpif *udpif, struct upcall *upcall,
>   
>       case FLOW_SAMPLE_UPCALL:
>           if (upcall->ipfix) {
> -            union user_action_cookie cookie;
>               struct flow_tnl output_tunnel_key;
>               struct dpif_ipfix_actions ipfix_actions;
>   
> -            memset(&cookie, 0, sizeof cookie);
> -            memcpy(&cookie, nl_attr_get(userdata), sizeof cookie.flow_sample);
>               memset(&ipfix_actions, 0, sizeof ipfix_actions);
>   
>               if (upcall->out_tun_key) {
>                   odp_tun_key_from_attr(upcall->out_tun_key, &output_tunnel_key);
>               }
>   
> -            actions_len = dpif_read_actions(udpif, upcall, flow, upcall_type,
> -                                            &ipfix_actions);
> +            actions_len = dpif_read_actions(udpif, upcall, flow,
> +                                            upcall->type, &ipfix_actions);
>               /* The flow reflects exactly the contents of the packet.
>                * Sample the packet using it. */
>               dpif_ipfix_flow_sample(upcall->ipfix, packet, flow,
> -                                   &cookie, flow->in_port.odp_port,
> +                                   &upcall->cookie, flow->in_port.odp_port,
>                                      upcall->out_tun_key ?
>                                          &output_tunnel_key : NULL,
>                                      actions_len > 0 ? &ipfix_actions: NULL);
Ben Pfaff Jan. 2, 2018, 4:46 p.m. UTC | #2
On Thu, Dec 21, 2017 at 02:25:11PM -0800, Justin Pettit wrote:
>     - This reduces the number of times upcall cookies are processed.
>     - It separate true miss calls from slow-path actions.
> 
> The reorganization will also be useful for a future commit.
> 
> Signed-off-by: Justin Pettit <jpettit@ovn.org>

Assuming you can figure out what Greg is seeing:

Acked-by: Ben Pfaff <blp@ovn.org>
Gregory Rose Jan. 2, 2018, 7:14 p.m. UTC | #3
On 12/28/2017 3:22 PM, Gregory Rose wrote:
> On 12/21/2017 2:25 PM, Justin Pettit wrote:
>>      - This reduces the number of times upcall cookies are processed.
>>      - It separate true miss calls from slow-path actions.
>>
>> The reorganization will also be useful for a future commit.
>>
>> Signed-off-by: Justin Pettit <jpettit@ovn.org>
>

Another clue - after applying this patch I begin seeing this error 
message in classify_upcall()
in ofproto_dpif_upcall.c.

2018-01-02T18:56:58.442Z|00001|ofproto_dpif_upcall(handler18)|WARN|invalid 
user cookie of type 0 and size 4

This occurs even in the manual test I outlined below.

After this occurs in the 'make check-kmod TESTSUITEFLAGS="1"' test I'm 
running I also then see these errors:

2018-01-02T18:40:02.717Z|00039|netdev_linux|WARN|ovs-p1: removing 
policing failed: No such device
2018-01-02T18:40:02.718Z|00040|ofproto|WARN|br0: cannot get STP status 
on nonexistent port 2
2018-01-02T18:40:02.718Z|00041|ofproto|WARN|br0: cannot get RSTP status 
on nonexistent port 2
2018-01-02T18:40:02.719Z|00042|bridge|INFO|bridge br0: deleted interface 
ovs-p1 on port 2
2018-01-02T18:40:02.723Z|00043|bridge|WARN|could not open network device 
ovs-p1 (No such device)
2018-01-02T18:40:02.742Z|00044|bridge|INFO|bridge br0: deleted interface 
ovs-p0 on port 1
2018-01-02T18:40:02.746Z|00045|bridge|WARN|could not open network device 
ovs-p1 (No such device)
2018-01-02T18:40:02.750Z|00046|bridge|WARN|could not open network device 
ovs-p0

Still digging...

- Greg

> Justin,
>
> The problem I'm seeing arises when this patch is applied.  I'm puzzled 
> though because if
> I create a script to do the exact same thing as the test I see no 
> problems.
>
> Here is my script:
>
> ovs-ofctl add-flow br0 "actions=normal"
> ip netns add at_ns0
> ip netns add at_ns1
> ip link add p0 type veth peer name ovs-p0
> ip link set p0 netns at_ns0
> ip link set dev ovs-p0 up
> ovs-vsctl add-port br0 ovs-p0 -- set interface ovs-p0 
> external-ids:iface-id="p0"
> ip netns exec at_ns0 ip addr add 10.1.1.1/24 dev p0
> ip netns exec at_ns0 ip link set dev p0 up
> ip link add p1 type veth peer name ovs-p1
> ip link set p1 netns at_ns1
> ip link set dev ovs-p1 up
> ovs-vsctl add-port br0 ovs-p1 -- set interface ovs-p1 
> external-ids:iface-id="p1"
> ip netns exec at_ns1 ip addr add 10.1.1.2/24 dev p1
> ip netns exec at_ns1 ip link set dev p1 up
> ip netns exec at_ns0 ping -q -c 3 -i 0.3 -w 2 10.1.1.2
> ip netns exec at_ns0 ping -s 1600 -q -c 3 -i 0.3 -w 2 10.1.1.2
> ip netns exec at_ns0 ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.2
> ovs-vsctl del-port br0 ovs-p1
> ovs-vsctl del-port br0 ovs-p0
> ip netns del at_ns0
> ip netns del at_ns1
> ovs-ofctl del-flows br0
>
> SFAICT it emulates exactly what the system-traffic.at test 001 does.  
> And it works fine... /shrug.
>
> What distribution, kernel, etc are you using for your check-kmod 
> testing?  I'll try to emulate that
> exactly and then see if I can get similar results.
>
> Thanks,
>
> - Greg
>
>
>> ---
>>   ofproto/ofproto-dpif-upcall.c | 91 
>> +++++++++++++++++++++----------------------
>>   1 file changed, 45 insertions(+), 46 deletions(-)
>>
>> diff --git a/ofproto/ofproto-dpif-upcall.c 
>> b/ofproto/ofproto-dpif-upcall.c
>> index 02cf5415bc3d..46b75fe35a2b 100644
>> --- a/ofproto/ofproto-dpif-upcall.c
>> +++ b/ofproto/ofproto-dpif-upcall.c
>> @@ -183,6 +183,7 @@ struct udpif {
>>   enum upcall_type {
>>       BAD_UPCALL,                 /* Some kind of bug somewhere. */
>>       MISS_UPCALL,                /* A flow miss.  */
>> +    SLOW_PATH_UPCALL,           /* Slow path upcall.  */
>>       SFLOW_UPCALL,               /* sFlow sample. */
>>       FLOW_SAMPLE_UPCALL,         /* Per-flow sampling. */
>>       IPFIX_UPCALL                /* Per-bridge sampling. */
>> @@ -210,8 +211,7 @@ struct upcall {
>>       uint16_t mru;                  /* If !0, Maximum receive unit of
>>                                         fragmented IP packet */
>>   -    enum dpif_upcall_type type;    /* Datapath type of the upcall. */
>> -    const struct nlattr *userdata; /* Userdata for DPIF_UC_ACTION 
>> Upcalls. */
>> +    enum upcall_type type;         /* Type of the upcall. */
>>       const struct nlattr *actions;  /* Flow actions in 
>> DPIF_UC_ACTION Upcalls. */
>>         bool xout_initialized;         /* True if 'xout' must be 
>> uninitialized. */
>> @@ -235,6 +235,8 @@ struct upcall {
>>       size_t key_len;                /* Datapath flow key length. */
>>       const struct nlattr *out_tun_key;  /* Datapath output tunnel 
>> key. */
>>   +    union user_action_cookie cookie;
>> +
>>       uint64_t odp_actions_stub[1024 / 8]; /* Stub for odp_actions. */
>>   };
>>   @@ -367,7 +369,8 @@ static int ukey_acquire(struct udpif *, const 
>> struct dpif_flow *,
>>   static void ukey_delete__(struct udpif_key *);
>>   static void ukey_delete(struct umap *, struct udpif_key *);
>>   static enum upcall_type classify_upcall(enum dpif_upcall_type type,
>> -                                        const struct nlattr *userdata);
>> +                                        const struct nlattr *userdata,
>> +                                        union user_action_cookie 
>> *cookie);
>>     static void put_op_init(struct ukey_op *op, struct udpif_key *ukey,
>>                           enum dpif_flow_put_flags flags);
>> @@ -813,7 +816,6 @@ recv_upcalls(struct handler *handler)
>>             upcall->key = dupcall->key;
>>           upcall->key_len = dupcall->key_len;
>> -        upcall->ufid = &dupcall->ufid;
>>             upcall->out_tun_key = dupcall->out_tun_key;
>>           upcall->actions = dupcall->actions;
>> @@ -969,11 +971,13 @@ udpif_revalidator(void *arg)
>>   }
>>   
>>   static enum upcall_type
>> -classify_upcall(enum dpif_upcall_type type, const struct nlattr 
>> *userdata)
>> +classify_upcall(enum dpif_upcall_type type, const struct nlattr 
>> *userdata,
>> +                union user_action_cookie *cookie)
>>   {
>> -    union user_action_cookie cookie;
>>       size_t userdata_len;
>>   +    memset(cookie, 0, sizeof *cookie);
>> +
>>       /* First look at the upcall type. */
>>       switch (type) {
>>       case DPIF_UC_ACTION:
>> @@ -994,29 +998,30 @@ classify_upcall(enum dpif_upcall_type type, 
>> const struct nlattr *userdata)
>>           return BAD_UPCALL;
>>       }
>>       userdata_len = nl_attr_get_size(userdata);
>> -    if (userdata_len < sizeof cookie.type
>> -        || userdata_len > sizeof cookie) {
>> +    if (userdata_len < sizeof cookie->type
>> +        || userdata_len > sizeof *cookie) {
>>           VLOG_WARN_RL(&rl, "action upcall cookie has unexpected size 
>> %"PRIuSIZE,
>>                        userdata_len);
>>           return BAD_UPCALL;
>>       }
>> -    memset(&cookie, 0, sizeof cookie);
>> -    memcpy(&cookie, nl_attr_get(userdata), userdata_len);
>> -    if (userdata_len == MAX(8, sizeof cookie.sflow)
>> -        && cookie.type == USER_ACTION_COOKIE_SFLOW) {
>> +
>> +    memcpy(cookie, nl_attr_get(userdata), userdata_len);
>> +
>> +    if (userdata_len == MAX(8, sizeof cookie->sflow)
>> +        && cookie->type == USER_ACTION_COOKIE_SFLOW) {
>>           return SFLOW_UPCALL;
>> -    } else if (userdata_len == MAX(8, sizeof cookie.slow_path)
>> -               && cookie.type == USER_ACTION_COOKIE_SLOW_PATH) {
>> -        return MISS_UPCALL;
>> -    } else if (userdata_len == MAX(8, sizeof cookie.flow_sample)
>> -               && cookie.type == USER_ACTION_COOKIE_FLOW_SAMPLE) {
>> +    } else if (userdata_len == MAX(8, sizeof cookie->slow_path)
>> +               && cookie->type == USER_ACTION_COOKIE_SLOW_PATH) {
>> +        return SLOW_PATH_UPCALL;
>> +    } else if (userdata_len == MAX(8, sizeof cookie->flow_sample)
>> +               && cookie->type == USER_ACTION_COOKIE_FLOW_SAMPLE) {
>>           return FLOW_SAMPLE_UPCALL;
>> -    } else if (userdata_len == MAX(8, sizeof cookie.ipfix)
>> -               && cookie.type == USER_ACTION_COOKIE_IPFIX) {
>> +    } else if (userdata_len == MAX(8, sizeof cookie->ipfix)
>> +               && cookie->type == USER_ACTION_COOKIE_IPFIX) {
>>           return IPFIX_UPCALL;
>>       } else {
>>           VLOG_WARN_RL(&rl, "invalid user cookie of type %"PRIu16
>> -                     " and size %"PRIuSIZE, cookie.type, userdata_len);
>> +                     " and size %"PRIuSIZE, cookie->type, 
>> userdata_len);
>>           return BAD_UPCALL;
>>       }
>>   }
>> @@ -1078,6 +1083,11 @@ upcall_receive(struct upcall *upcall, const 
>> struct dpif_backer *backer,
>>   {
>>       int error;
>>   +    upcall->type = classify_upcall(type, userdata, &upcall->cookie);
>> +    if (upcall->type == BAD_UPCALL) {
>> +        return EAGAIN;
>> +    }
>> +
>>       error = xlate_lookup(backer, flow, &upcall->ofproto, 
>> &upcall->ipfix,
>>                            &upcall->sflow, NULL, &upcall->in_port);
>>       if (error) {
>> @@ -1090,8 +1100,6 @@ upcall_receive(struct upcall *upcall, const 
>> struct dpif_backer *backer,
>>       upcall->packet = packet;
>>       upcall->ufid = ufid;
>>       upcall->pmd_id = pmd_id;
>> -    upcall->type = type;
>> -    upcall->userdata = userdata;
>>       ofpbuf_use_stub(&upcall->odp_actions, upcall->odp_actions_stub,
>>                       sizeof upcall->odp_actions_stub);
>>       ofpbuf_init(&upcall->put_actions, 0);
>> @@ -1127,7 +1135,7 @@ upcall_xlate(struct udpif *udpif, struct upcall 
>> *upcall,
>>                     upcall->flow, upcall->in_port, NULL,
>>                     stats.tcp_flags, upcall->packet, wc, odp_actions);
>>   -    if (upcall->type == DPIF_UC_MISS) {
>> +    if (upcall->type == MISS_UPCALL) {
>>           xin.resubmit_stats = &stats;
>>             if (xin.frozen_state) {
>> @@ -1176,7 +1184,7 @@ upcall_xlate(struct udpif *udpif, struct upcall 
>> *upcall,
>>       /* This function is also called for slow-pathed flows.  As we 
>> are only
>>        * going to create new datapath flows for actual datapath 
>> misses, there is
>>        * no point in creating a ukey otherwise. */
>> -    if (upcall->type == DPIF_UC_MISS) {
>> +    if (upcall->type == MISS_UPCALL) {
>>           upcall->ukey = ukey_create_from_upcall(upcall, wc);
>>       }
>>   }
>> @@ -1212,7 +1220,7 @@ should_install_flow(struct udpif *udpif, struct 
>> upcall *upcall)
>>   {
>>       unsigned int flow_limit;
>>   -    if (upcall->type != DPIF_UC_MISS) {
>> +    if (upcall->type != MISS_UPCALL) {
>>           return false;
>>       } else if (upcall->recirc && !upcall->have_recirc_ref) {
>>           VLOG_DBG_RL(&rl, "upcall: no reference for recirc flow");
>> @@ -1325,6 +1333,7 @@ dpif_read_actions(struct udpif *udpif, struct 
>> upcall *upcall,
>>           break;
>>       case BAD_UPCALL:
>>       case MISS_UPCALL:
>> +    case SLOW_PATH_UPCALL:
>>       default:
>>           break;
>>       }
>> @@ -1336,53 +1345,46 @@ static int
>>   process_upcall(struct udpif *udpif, struct upcall *upcall,
>>                  struct ofpbuf *odp_actions, struct flow_wildcards *wc)
>>   {
>> -    const struct nlattr *userdata = upcall->userdata;
>>       const struct dp_packet *packet = upcall->packet;
>>       const struct flow *flow = upcall->flow;
>>       size_t actions_len = 0;
>> -    enum upcall_type upcall_type = classify_upcall(upcall->type, 
>> userdata);
>>   -    switch (upcall_type) {
>> +    switch (upcall->type) {
>>       case MISS_UPCALL:
>> +    case SLOW_PATH_UPCALL:
>>           upcall_xlate(udpif, upcall, odp_actions, wc);
>>           return 0;
>>         case SFLOW_UPCALL:
>>           if (upcall->sflow) {
>> -            union user_action_cookie cookie;
>>               struct dpif_sflow_actions sflow_actions;
>>                 memset(&sflow_actions, 0, sizeof sflow_actions);
>> -            memset(&cookie, 0, sizeof cookie);
>> -            memcpy(&cookie, nl_attr_get(userdata), sizeof 
>> cookie.sflow);
>>   -            actions_len = dpif_read_actions(udpif, upcall, flow, 
>> upcall_type,
>> - &sflow_actions);
>> +            actions_len = dpif_read_actions(udpif, upcall, flow,
>> +                                            upcall->type, 
>> &sflow_actions);
>>               dpif_sflow_received(upcall->sflow, packet, flow,
>> -                                flow->in_port.odp_port, &cookie,
>> +                                flow->in_port.odp_port, 
>> &upcall->cookie,
>>                                   actions_len > 0 ? &sflow_actions : 
>> NULL);
>>           }
>>           break;
>>         case IPFIX_UPCALL:
>>           if (upcall->ipfix) {
>> -            union user_action_cookie cookie;
>>               struct flow_tnl output_tunnel_key;
>>               struct dpif_ipfix_actions ipfix_actions;
>>   -            memset(&cookie, 0, sizeof cookie);
>> -            memcpy(&cookie, nl_attr_get(userdata), sizeof 
>> cookie.ipfix);
>>               memset(&ipfix_actions, 0, sizeof ipfix_actions);
>>                 if (upcall->out_tun_key) {
>>                   odp_tun_key_from_attr(upcall->out_tun_key, 
>> &output_tunnel_key);
>>               }
>>   -            actions_len = dpif_read_actions(udpif, upcall, flow, 
>> upcall_type,
>> - &ipfix_actions);
>> +            actions_len = dpif_read_actions(udpif, upcall, flow,
>> +                                            upcall->type, 
>> &ipfix_actions);
>>               dpif_ipfix_bridge_sample(upcall->ipfix, packet, flow,
>> flow->in_port.odp_port,
>> - cookie.ipfix.output_odp_port,
>> + upcall->cookie.ipfix.output_odp_port,
>>                                        upcall->out_tun_key ?
>> &output_tunnel_key : NULL,
>>                                        actions_len > 0 ? 
>> &ipfix_actions: NULL);
>> @@ -1391,24 +1393,21 @@ process_upcall(struct udpif *udpif, struct 
>> upcall *upcall,
>>         case FLOW_SAMPLE_UPCALL:
>>           if (upcall->ipfix) {
>> -            union user_action_cookie cookie;
>>               struct flow_tnl output_tunnel_key;
>>               struct dpif_ipfix_actions ipfix_actions;
>>   -            memset(&cookie, 0, sizeof cookie);
>> -            memcpy(&cookie, nl_attr_get(userdata), sizeof 
>> cookie.flow_sample);
>>               memset(&ipfix_actions, 0, sizeof ipfix_actions);
>>                 if (upcall->out_tun_key) {
>>                   odp_tun_key_from_attr(upcall->out_tun_key, 
>> &output_tunnel_key);
>>               }
>>   -            actions_len = dpif_read_actions(udpif, upcall, flow, 
>> upcall_type,
>> - &ipfix_actions);
>> +            actions_len = dpif_read_actions(udpif, upcall, flow,
>> +                                            upcall->type, 
>> &ipfix_actions);
>>               /* The flow reflects exactly the contents of the packet.
>>                * Sample the packet using it. */
>>               dpif_ipfix_flow_sample(upcall->ipfix, packet, flow,
>> -                                   &cookie, flow->in_port.odp_port,
>> +                                   &upcall->cookie, 
>> flow->in_port.odp_port,
>>                                      upcall->out_tun_key ?
>>                                          &output_tunnel_key : NULL,
>>                                      actions_len > 0 ? 
>> &ipfix_actions: NULL);
>
Justin Pettit Jan. 2, 2018, 7:42 p.m. UTC | #4
> On Dec 28, 2017, at 3:22 PM, Gregory Rose <gvrose8192@gmail.com> wrote:
> 
> SFAICT it emulates exactly what the system-traffic.at test 001 does.  And it works fine... /shrug.
> 
> What distribution, kernel, etc are you using for your check-kmod testing?  I'll try to emulate that
> exactly and then see if I can get similar results.

I'm using Ubuntu 16.04 with kernel 4.4.0-104-generic.  I sent you a link on our Slack channel to the internal tester that runs different OSs.  It fails a few of tests, but they're the same ones that fail on master.  (We need to address those, but they shouldn't be related to my patches.)

--Justin
Gregory Rose Jan. 3, 2018, 5:37 p.m. UTC | #5
On 1/2/2018 11:42 AM, Justin Pettit wrote:
>> On Dec 28, 2017, at 3:22 PM, Gregory Rose <gvrose8192@gmail.com> wrote:
>>
>> SFAICT it emulates exactly what the system-traffic.at test 001 does.  And it works fine... /shrug.
>>
>> What distribution, kernel, etc are you using for your check-kmod testing?  I'll try to emulate that
>> exactly and then see if I can get similar results.
> I'm using Ubuntu 16.04 with kernel 4.4.0-104-generic.  I sent you a link on our Slack channel to the internal tester that runs different OSs.  It fails a few of tests, but they're the same ones that fail on master.  (We need to address those, but they shouldn't be related to my patches.)
>
> --Justin
>
>

Justin,

I have done more testing last night and this morning and have a couple 
of findings.

First, the tests themselves *all* succeed.  However, they are marked as 
failed because of warnings that
occur during OVS_TRAFFIC_VSWITCHD_STOP in system-traffic.at.  If I 
comment out
OVS_TRAFFIC_VSWITCHD_STOP then the test runs successfully.

AT_SETUP([datapath - ping between two ports])
OVS_TRAFFIC_VSWITCHD_START()

AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])

ADD_NAMESPACES(at_ns0, at_ns1)

ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")

NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | 
FORMAT_PING], [0], [dnl
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
NS_CHECK_EXEC([at_ns0], [ping -s 1600 -q -c 3 -i 0.3 -w 2 10.1.1.2 | 
FORMAT_PING], [0], [dnl
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])
NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.2 | 
FORMAT_PING], [0], [dnl
3 packets transmitted, 3 received, 0% packet loss, time 0ms
])

dnl OVS_TRAFFIC_VSWITCHD_STOP
AT_CLEANUP

## ------------------------------ ##
## openvswitch 2.8.90 test suite. ##
## ------------------------------ ##
   1: datapath - ping between two ports               ok

## ------------- ##
## Test results. ##
## ------------- ##

1 test was successful.

I'm now debugging the OVS_TRAFFIC_VSWITCHD_STOP macro and trying to 
determine what
is causing the problem.  Here are the log messages:

2018-01-03T17:30:52.340Z|00039|netdev_linux|WARN|ovs-p1: removing 
policing failed: No such device
2018-01-03T17:30:52.341Z|00040|ofproto|WARN|br0: cannot get STP status 
on nonexistent port 2
2018-01-03T17:30:52.341Z|00041|ofproto|WARN|br0: cannot get RSTP status 
on nonexistent port 2
2018-01-03T17:30:52.343Z|00042|bridge|INFO|bridge br0: deleted interface 
ovs-p1 on port 2
2018-01-03T17:30:52.346Z|00043|bridge|WARN|could not open network device 
ovs-p1 (No such device)
2018-01-03T17:30:52.360Z|00044|bridge|INFO|bridge br0: deleted interface 
ovs-p0 on port 1
2018-01-03T17:30:52.364Z|00045|bridge|WARN|could not open network device 
ovs-p0 (No such device)
2018-01-03T17:30:52.367Z|00046|bridge|WARN|could not open network device 
ovs-p1 (No such device)

It is the WARNS from the OVS_TRAFFIC_VSWITCHD_STOP part of the test that 
are causing all tests to fail.

Again, I see this on multiple systems.  They are all VMs though so I'm 
wondering if the internal test that
you are referring to was run on bare metal?

Thanks,

- Greg
Ben Pfaff Jan. 3, 2018, 11:57 p.m. UTC | #6
On Thu, Dec 21, 2017 at 02:25:11PM -0800, Justin Pettit wrote:
>     - This reduces the number of times upcall cookies are processed.
>     - It separate true miss calls from slow-path actions.
> 
> The reorganization will also be useful for a future commit.
> 
> Signed-off-by: Justin Pettit <jpettit@ovn.org>

I took a second look at this patch, giving it more scrutiny.  I can't
yet explain why this line is deleted.  It seems like ->ufid is still
used in the same way as it was before:

> @@ -813,7 +816,6 @@ recv_upcalls(struct handler *handler)
>  
>          upcall->key = dupcall->key;
>          upcall->key_len = dupcall->key_len;
> -        upcall->ufid = &dupcall->ufid;
>  
>          upcall->out_tun_key = dupcall->out_tun_key;
>          upcall->actions = dupcall->actions;
Gregory Rose Jan. 5, 2018, 4:05 p.m. UTC | #7
On 1/3/2018 9:37 AM, Gregory Rose wrote:
> On 1/2/2018 11:42 AM, Justin Pettit wrote:
>>> On Dec 28, 2017, at 3:22 PM, Gregory Rose <gvrose8192@gmail.com> wrote:
>>>
>>> SFAICT it emulates exactly what the system-traffic.at test 001 
>>> does.  And it works fine... /shrug.
>>>
>>> What distribution, kernel, etc are you using for your check-kmod 
>>> testing?  I'll try to emulate that
>>> exactly and then see if I can get similar results.
>> I'm using Ubuntu 16.04 with kernel 4.4.0-104-generic.  I sent you a 
>> link on our Slack channel to the internal tester that runs different 
>> OSs.  It fails a few of tests, but they're the same ones that fail on 
>> master.  (We need to address those, but they shouldn't be related to 
>> my patches.)
>>
>> --Justin
>>
>>
>

I've created a script that runs a test outside of the m4sh autotest 
framework that exactly emulates
what test 001 of the 'make check-kmod' test performs.

When running the test I've created with this patch applied I 
consistently see the following error
message:

2018-01-05T15:53:14.440Z|00001|ofproto_dpif_upcall(handler1)|WARN|invalid 
user cookie of type 0 and size 4

But the ping test does succeed.  Without this patch applied the error 
messsage does not occur.

I've attached the script.  I'm not sure why the test on the internal 
server you pointed me to does not have
this error in it but as mentioned before I can reproduce it reliably on 
several different VMs with both Ubuntu
and RHEL based distributions and various kernels.

For now that's about as far as I can take my investigation since I have 
a few other things I need to work
on.  If you can think of another test I should run or something for me 
to check into let me know.

Thanks,

- Greg

> Justin,
>
> I have done more testing last night and this morning and have a couple 
> of findings.
>
> First, the tests themselves *all* succeed.  However, they are marked 
> as failed because of warnings that
> occur during OVS_TRAFFIC_VSWITCHD_STOP in system-traffic.at.  If I 
> comment out
> OVS_TRAFFIC_VSWITCHD_STOP then the test runs successfully.
>
> AT_SETUP([datapath - ping between two ports])
> OVS_TRAFFIC_VSWITCHD_START()
>
> AT_CHECK([ovs-ofctl add-flow br0 "actions=normal"])
>
> ADD_NAMESPACES(at_ns0, at_ns1)
>
> ADD_VETH(p0, at_ns0, br0, "10.1.1.1/24")
> ADD_VETH(p1, at_ns1, br0, "10.1.1.2/24")
>
> NS_CHECK_EXEC([at_ns0], [ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | 
> FORMAT_PING], [0], [dnl
> 3 packets transmitted, 3 received, 0% packet loss, time 0ms
> ])
> NS_CHECK_EXEC([at_ns0], [ping -s 1600 -q -c 3 -i 0.3 -w 2 10.1.1.2 | 
> FORMAT_PING], [0], [dnl
> 3 packets transmitted, 3 received, 0% packet loss, time 0ms
> ])
> NS_CHECK_EXEC([at_ns0], [ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.2 | 
> FORMAT_PING], [0], [dnl
> 3 packets transmitted, 3 received, 0% packet loss, time 0ms
> ])
>
> dnl OVS_TRAFFIC_VSWITCHD_STOP
> AT_CLEANUP
>
> ## ------------------------------ ##
> ## openvswitch 2.8.90 test suite. ##
> ## ------------------------------ ##
>   1: datapath - ping between two ports               ok
>
> ## ------------- ##
> ## Test results. ##
> ## ------------- ##
>
> 1 test was successful.
>
> I'm now debugging the OVS_TRAFFIC_VSWITCHD_STOP macro and trying to 
> determine what
> is causing the problem.  Here are the log messages:
>
> 2018-01-03T17:30:52.340Z|00039|netdev_linux|WARN|ovs-p1: removing 
> policing failed: No such device
> 2018-01-03T17:30:52.341Z|00040|ofproto|WARN|br0: cannot get STP status 
> on nonexistent port 2
> 2018-01-03T17:30:52.341Z|00041|ofproto|WARN|br0: cannot get RSTP 
> status on nonexistent port 2
> 2018-01-03T17:30:52.343Z|00042|bridge|INFO|bridge br0: deleted 
> interface ovs-p1 on port 2
> 2018-01-03T17:30:52.346Z|00043|bridge|WARN|could not open network 
> device ovs-p1 (No such device)
> 2018-01-03T17:30:52.360Z|00044|bridge|INFO|bridge br0: deleted 
> interface ovs-p0 on port 1
> 2018-01-03T17:30:52.364Z|00045|bridge|WARN|could not open network 
> device ovs-p0 (No such device)
> 2018-01-03T17:30:52.367Z|00046|bridge|WARN|could not open network 
> device ovs-p1 (No such device)
>
> It is the WARNS from the OVS_TRAFFIC_VSWITCHD_STOP part of the test 
> that are causing all tests to fail.
>
> Again, I see this on multiple systems.  They are all VMs though so I'm 
> wondering if the internal test that
> you are referring to was run on bare metal?
>
> Thanks,
>
> - Greg
>
#!/bin/bash
if [ ! -f vswitch.ovsschema ]; then
    echo "No schema file found - please copy vswitch.ovsschema to this directory"
    exit 1
fi
rm -f logfile
modprobe openvswitch
touch .conf.db.~lock~
ovsdb-tool create conf.db vswitch.ovsschema
ovsdb-server conf.db --detach --no-chdir --pidfile --log-file --remote=punix:/usr/local/var/run/openvswitch/db.sock
ovs-vsctl --no-wait init
ovs-vswitchd --detach --no-chdir --pidfile --log-file=logfile -vvconn -vofproto_dpif -vunixctl
ovs-vsctl -- add-br br0 -- set Bridge br0 protocols=OpenFlow10,OpenFlow11,OpenFlow12,OpenFlow13,OpenFlow14,OpenFlow15 fail-mode=secure
ovs-ofctl add-flow br0 "actions=normal"
ip netns add at_ns0
ip netns exec at_ns0 sysctl -w net.netfilter.nf_conntrack_helper=0
ip netns add at_ns1
ip netns exec at_ns1 sysctl -w net.netfilter.nf_conntrack_helper=0
ip link add p0 type veth peer name ovs-p0
ip link set p0 netns at_ns0
ip link set dev ovs-p0 up
ovs-vsctl add-port br0 ovs-p0 -- set interface ovs-p0 external-ids:iface-id="p0"
ip netns exec at_ns0 ip addr add 10.1.1.1/24 dev p0
ip netns exec at_ns0 ip link set dev p0 up
ip link add p1 type veth peer name ovs-p1
ip link set p1 netns at_ns1
ip link set dev ovs-p1 up
ovs-vsctl add-port br0 ovs-p1 -- set interface ovs-p1 external-ids:iface-id="p1"
ip netns exec at_ns1 ip addr add 10.1.1.2/24 dev p1
ip netns exec at_ns1 ip link set dev p1 up
ip netns exec at_ns0 ping -q -c 3 -i 0.3 -w 2 10.1.1.2 | grep "transmitted" | sed 's/time.*ms$/time 0ms/'
ip netns exec at_ns0 ping -s 1600 -q -c 3 -i 0.3 -w 2 10.1.1.2 | grep "transmitted" | sed 's/time.*ms$/time 0ms/'
ip netns exec at_ns0 ping -s 3200 -q -c 3 -i 0.3 -w 2 10.1.1.2 | grep "transmitted" | sed 's/time.*ms$/time 0ms/'
ovs-appctl --timeout=10 -t ovs-vswitchd exit --cleanup
ovs-appctl --timeout=10 -t ovsdb-server exit
ip link del ovs-p0
ip link del ovs-p1
ip netns del at_ns0
ip netns del at_ns1
ovs-dpctl del-dp ovs-system
rm -f conf.db
modprobe -r openvswitch
grep WARN logfile
Justin Pettit Jan. 5, 2018, 6:46 p.m. UTC | #8
> On Jan 5, 2018, at 8:05 AM, Gregory Rose <gvrose8192@gmail.com> wrote:
> 
> For now that's about as far as I can take my investigation since I have a few other things I need to work
> on.  If you can think of another test I should run or something for me to check into let me know.

Ben noticed that this seemed to have been removed in error in my patch:

> @@ -813,7 +816,6 @@ recv_upcalls(struct handler *handler)
> 
>         upcall->key = dupcall->key;
>         upcall->key_len = dupcall->key_len;
> -        upcall->ufid = &dupcall->ufid;
> 
>         upcall->out_tun_key = dupcall->out_tun_key;
>         upcall->actions = dupcall->actions;


Can you try adding that line back and see if the problem goes away on your end?

Thanks,

--Justin
Gregory Rose Jan. 5, 2018, 7:20 p.m. UTC | #9
On 1/5/2018 10:46 AM, Justin Pettit wrote:
>> On Jan 5, 2018, at 8:05 AM, Gregory Rose <gvrose8192@gmail.com> wrote:
>>
>> For now that's about as far as I can take my investigation since I have a few other things I need to work
>> on.  If you can think of another test I should run or something for me to check into let me know.
> Ben noticed that this seemed to have been removed in error in my patch:
>
>> @@ -813,7 +816,6 @@ recv_upcalls(struct handler *handler)
>>
>>          upcall->key = dupcall->key;
>>          upcall->key_len = dupcall->key_len;
>> -        upcall->ufid = &dupcall->ufid;
>>
>>          upcall->out_tun_key = dupcall->out_tun_key;
>>          upcall->actions = dupcall->actions;
>
> Can you try adding that line back and see if the problem goes away on your end?
>
> Thanks,
>
> --Justin
>
Justin,

That didn't seem to help.  The cookie->header.type is still equal to 
type USER_ACTION_COOKIE_UNSPEC
in the classify_upcall() function and that causes this message:

2018-01-05T19:12:09.957Z|00001|ofproto_dpif_upcall(handler1)|WARN|invalid 
user cookie of type 0 and size 4

- Greg
Justin Pettit Jan. 9, 2018, 9:33 p.m. UTC | #10
> On Jan 5, 2018, at 11:20 AM, Gregory Rose <gvrose8192@gmail.com> wrote:
> 
> That didn't seem to help.  The cookie->header.type is still equal to type USER_ACTION_COOKIE_UNSPEC
> in the classify_upcall() function and that causes this message:
> 
> 2018-01-05T19:12:09.957Z|00001|ofproto_dpif_upcall(handler1)|WARN|invalid user cookie of type 0 and size 4

I think I found the source of the problem.  This patch was originally part of the series, but then I broke it out as a separate patch:

	https://patchwork.ozlabs.org/patch/851768/

The patch was reviewed this morning and pushed to master.  I plan to send out a v2 of this series today.

Would you mind pulling from master and retrying either the v1 or (forthcoming) v2 of this series?

Thanks!

--Justin
Gregory Rose Jan. 9, 2018, 9:43 p.m. UTC | #11
On 1/9/2018 1:33 PM, Justin Pettit wrote:
>> On Jan 5, 2018, at 11:20 AM, Gregory Rose <gvrose8192@gmail.com> wrote:
>>
>> That didn't seem to help.  The cookie->header.type is still equal to type USER_ACTION_COOKIE_UNSPEC
>> in the classify_upcall() function and that causes this message:
>>
>> 2018-01-05T19:12:09.957Z|00001|ofproto_dpif_upcall(handler1)|WARN|invalid user cookie of type 0 and size 4
> I think I found the source of the problem.  This patch was originally part of the series, but then I broke it out as a separate patch:
>
> 	https://patchwork.ozlabs.org/patch/851768/
>
> The patch was reviewed this morning and pushed to master.  I plan to send out a v2 of this series today.
>
> Would you mind pulling from master and retrying either the v1 or (forthcoming) v2 of this series?
>
> Thanks!
>
> --Justin
>
>

Sure, would be glad to.  Moving a little slow today but I'll look for it.

Thanks,

- Greg
Justin Pettit Jan. 10, 2018, 6:36 a.m. UTC | #12
> On Jan 3, 2018, at 3:57 PM, Ben Pfaff <blp@ovn.org> wrote:
> 
> On Thu, Dec 21, 2017 at 02:25:11PM -0800, Justin Pettit wrote:
>>    - This reduces the number of times upcall cookies are processed.
>>    - It separate true miss calls from slow-path actions.
>> 
>> The reorganization will also be useful for a future commit.
>> 
>> Signed-off-by: Justin Pettit <jpettit@ovn.org>
> 
> I took a second look at this patch, giving it more scrutiny.  I can't
> yet explain why this line is deleted.  It seems like ->ufid is still
> used in the same way as it was before:

Thanks for catching that.  I added it back, and I'll send out a v2 shortly.

--Justin
diff mbox series

Patch

diff --git a/ofproto/ofproto-dpif-upcall.c b/ofproto/ofproto-dpif-upcall.c
index 02cf5415bc3d..46b75fe35a2b 100644
--- a/ofproto/ofproto-dpif-upcall.c
+++ b/ofproto/ofproto-dpif-upcall.c
@@ -183,6 +183,7 @@  struct udpif {
 enum upcall_type {
     BAD_UPCALL,                 /* Some kind of bug somewhere. */
     MISS_UPCALL,                /* A flow miss.  */
+    SLOW_PATH_UPCALL,           /* Slow path upcall.  */
     SFLOW_UPCALL,               /* sFlow sample. */
     FLOW_SAMPLE_UPCALL,         /* Per-flow sampling. */
     IPFIX_UPCALL                /* Per-bridge sampling. */
@@ -210,8 +211,7 @@  struct upcall {
     uint16_t mru;                  /* If !0, Maximum receive unit of
                                       fragmented IP packet */
 
-    enum dpif_upcall_type type;    /* Datapath type of the upcall. */
-    const struct nlattr *userdata; /* Userdata for DPIF_UC_ACTION Upcalls. */
+    enum upcall_type type;         /* Type of the upcall. */
     const struct nlattr *actions;  /* Flow actions in DPIF_UC_ACTION Upcalls. */
 
     bool xout_initialized;         /* True if 'xout' must be uninitialized. */
@@ -235,6 +235,8 @@  struct upcall {
     size_t key_len;                /* Datapath flow key length. */
     const struct nlattr *out_tun_key;  /* Datapath output tunnel key. */
 
+    union user_action_cookie cookie;
+
     uint64_t odp_actions_stub[1024 / 8]; /* Stub for odp_actions. */
 };
 
@@ -367,7 +369,8 @@  static int ukey_acquire(struct udpif *, const struct dpif_flow *,
 static void ukey_delete__(struct udpif_key *);
 static void ukey_delete(struct umap *, struct udpif_key *);
 static enum upcall_type classify_upcall(enum dpif_upcall_type type,
-                                        const struct nlattr *userdata);
+                                        const struct nlattr *userdata,
+                                        union user_action_cookie *cookie);
 
 static void put_op_init(struct ukey_op *op, struct udpif_key *ukey,
                         enum dpif_flow_put_flags flags);
@@ -813,7 +816,6 @@  recv_upcalls(struct handler *handler)
 
         upcall->key = dupcall->key;
         upcall->key_len = dupcall->key_len;
-        upcall->ufid = &dupcall->ufid;
 
         upcall->out_tun_key = dupcall->out_tun_key;
         upcall->actions = dupcall->actions;
@@ -969,11 +971,13 @@  udpif_revalidator(void *arg)
 }
 
 static enum upcall_type
-classify_upcall(enum dpif_upcall_type type, const struct nlattr *userdata)
+classify_upcall(enum dpif_upcall_type type, const struct nlattr *userdata,
+                union user_action_cookie *cookie)
 {
-    union user_action_cookie cookie;
     size_t userdata_len;
 
+    memset(cookie, 0, sizeof *cookie);
+
     /* First look at the upcall type. */
     switch (type) {
     case DPIF_UC_ACTION:
@@ -994,29 +998,30 @@  classify_upcall(enum dpif_upcall_type type, const struct nlattr *userdata)
         return BAD_UPCALL;
     }
     userdata_len = nl_attr_get_size(userdata);
-    if (userdata_len < sizeof cookie.type
-        || userdata_len > sizeof cookie) {
+    if (userdata_len < sizeof cookie->type
+        || userdata_len > sizeof *cookie) {
         VLOG_WARN_RL(&rl, "action upcall cookie has unexpected size %"PRIuSIZE,
                      userdata_len);
         return BAD_UPCALL;
     }
-    memset(&cookie, 0, sizeof cookie);
-    memcpy(&cookie, nl_attr_get(userdata), userdata_len);
-    if (userdata_len == MAX(8, sizeof cookie.sflow)
-        && cookie.type == USER_ACTION_COOKIE_SFLOW) {
+
+    memcpy(cookie, nl_attr_get(userdata), userdata_len);
+
+    if (userdata_len == MAX(8, sizeof cookie->sflow)
+        && cookie->type == USER_ACTION_COOKIE_SFLOW) {
         return SFLOW_UPCALL;
-    } else if (userdata_len == MAX(8, sizeof cookie.slow_path)
-               && cookie.type == USER_ACTION_COOKIE_SLOW_PATH) {
-        return MISS_UPCALL;
-    } else if (userdata_len == MAX(8, sizeof cookie.flow_sample)
-               && cookie.type == USER_ACTION_COOKIE_FLOW_SAMPLE) {
+    } else if (userdata_len == MAX(8, sizeof cookie->slow_path)
+               && cookie->type == USER_ACTION_COOKIE_SLOW_PATH) {
+        return SLOW_PATH_UPCALL;
+    } else if (userdata_len == MAX(8, sizeof cookie->flow_sample)
+               && cookie->type == USER_ACTION_COOKIE_FLOW_SAMPLE) {
         return FLOW_SAMPLE_UPCALL;
-    } else if (userdata_len == MAX(8, sizeof cookie.ipfix)
-               && cookie.type == USER_ACTION_COOKIE_IPFIX) {
+    } else if (userdata_len == MAX(8, sizeof cookie->ipfix)
+               && cookie->type == USER_ACTION_COOKIE_IPFIX) {
         return IPFIX_UPCALL;
     } else {
         VLOG_WARN_RL(&rl, "invalid user cookie of type %"PRIu16
-                     " and size %"PRIuSIZE, cookie.type, userdata_len);
+                     " and size %"PRIuSIZE, cookie->type, userdata_len);
         return BAD_UPCALL;
     }
 }
@@ -1078,6 +1083,11 @@  upcall_receive(struct upcall *upcall, const struct dpif_backer *backer,
 {
     int error;
 
+    upcall->type = classify_upcall(type, userdata, &upcall->cookie);
+    if (upcall->type == BAD_UPCALL) {
+        return EAGAIN;
+    }
+
     error = xlate_lookup(backer, flow, &upcall->ofproto, &upcall->ipfix,
                          &upcall->sflow, NULL, &upcall->in_port);
     if (error) {
@@ -1090,8 +1100,6 @@  upcall_receive(struct upcall *upcall, const struct dpif_backer *backer,
     upcall->packet = packet;
     upcall->ufid = ufid;
     upcall->pmd_id = pmd_id;
-    upcall->type = type;
-    upcall->userdata = userdata;
     ofpbuf_use_stub(&upcall->odp_actions, upcall->odp_actions_stub,
                     sizeof upcall->odp_actions_stub);
     ofpbuf_init(&upcall->put_actions, 0);
@@ -1127,7 +1135,7 @@  upcall_xlate(struct udpif *udpif, struct upcall *upcall,
                   upcall->flow, upcall->in_port, NULL,
                   stats.tcp_flags, upcall->packet, wc, odp_actions);
 
-    if (upcall->type == DPIF_UC_MISS) {
+    if (upcall->type == MISS_UPCALL) {
         xin.resubmit_stats = &stats;
 
         if (xin.frozen_state) {
@@ -1176,7 +1184,7 @@  upcall_xlate(struct udpif *udpif, struct upcall *upcall,
     /* This function is also called for slow-pathed flows.  As we are only
      * going to create new datapath flows for actual datapath misses, there is
      * no point in creating a ukey otherwise. */
-    if (upcall->type == DPIF_UC_MISS) {
+    if (upcall->type == MISS_UPCALL) {
         upcall->ukey = ukey_create_from_upcall(upcall, wc);
     }
 }
@@ -1212,7 +1220,7 @@  should_install_flow(struct udpif *udpif, struct upcall *upcall)
 {
     unsigned int flow_limit;
 
-    if (upcall->type != DPIF_UC_MISS) {
+    if (upcall->type != MISS_UPCALL) {
         return false;
     } else if (upcall->recirc && !upcall->have_recirc_ref) {
         VLOG_DBG_RL(&rl, "upcall: no reference for recirc flow");
@@ -1325,6 +1333,7 @@  dpif_read_actions(struct udpif *udpif, struct upcall *upcall,
         break;
     case BAD_UPCALL:
     case MISS_UPCALL:
+    case SLOW_PATH_UPCALL:
     default:
         break;
     }
@@ -1336,53 +1345,46 @@  static int
 process_upcall(struct udpif *udpif, struct upcall *upcall,
                struct ofpbuf *odp_actions, struct flow_wildcards *wc)
 {
-    const struct nlattr *userdata = upcall->userdata;
     const struct dp_packet *packet = upcall->packet;
     const struct flow *flow = upcall->flow;
     size_t actions_len = 0;
-    enum upcall_type upcall_type = classify_upcall(upcall->type, userdata);
 
-    switch (upcall_type) {
+    switch (upcall->type) {
     case MISS_UPCALL:
+    case SLOW_PATH_UPCALL:
         upcall_xlate(udpif, upcall, odp_actions, wc);
         return 0;
 
     case SFLOW_UPCALL:
         if (upcall->sflow) {
-            union user_action_cookie cookie;
             struct dpif_sflow_actions sflow_actions;
 
             memset(&sflow_actions, 0, sizeof sflow_actions);
-            memset(&cookie, 0, sizeof cookie);
-            memcpy(&cookie, nl_attr_get(userdata), sizeof cookie.sflow);
 
-            actions_len = dpif_read_actions(udpif, upcall, flow, upcall_type,
-                                            &sflow_actions);
+            actions_len = dpif_read_actions(udpif, upcall, flow,
+                                            upcall->type, &sflow_actions);
             dpif_sflow_received(upcall->sflow, packet, flow,
-                                flow->in_port.odp_port, &cookie,
+                                flow->in_port.odp_port, &upcall->cookie,
                                 actions_len > 0 ? &sflow_actions : NULL);
         }
         break;
 
     case IPFIX_UPCALL:
         if (upcall->ipfix) {
-            union user_action_cookie cookie;
             struct flow_tnl output_tunnel_key;
             struct dpif_ipfix_actions ipfix_actions;
 
-            memset(&cookie, 0, sizeof cookie);
-            memcpy(&cookie, nl_attr_get(userdata), sizeof cookie.ipfix);
             memset(&ipfix_actions, 0, sizeof ipfix_actions);
 
             if (upcall->out_tun_key) {
                 odp_tun_key_from_attr(upcall->out_tun_key, &output_tunnel_key);
             }
 
-            actions_len = dpif_read_actions(udpif, upcall, flow, upcall_type,
-                                            &ipfix_actions);
+            actions_len = dpif_read_actions(udpif, upcall, flow,
+                                            upcall->type, &ipfix_actions);
             dpif_ipfix_bridge_sample(upcall->ipfix, packet, flow,
                                      flow->in_port.odp_port,
-                                     cookie.ipfix.output_odp_port,
+                                     upcall->cookie.ipfix.output_odp_port,
                                      upcall->out_tun_key ?
                                          &output_tunnel_key : NULL,
                                      actions_len > 0 ? &ipfix_actions: NULL);
@@ -1391,24 +1393,21 @@  process_upcall(struct udpif *udpif, struct upcall *upcall,
 
     case FLOW_SAMPLE_UPCALL:
         if (upcall->ipfix) {
-            union user_action_cookie cookie;
             struct flow_tnl output_tunnel_key;
             struct dpif_ipfix_actions ipfix_actions;
 
-            memset(&cookie, 0, sizeof cookie);
-            memcpy(&cookie, nl_attr_get(userdata), sizeof cookie.flow_sample);
             memset(&ipfix_actions, 0, sizeof ipfix_actions);
 
             if (upcall->out_tun_key) {
                 odp_tun_key_from_attr(upcall->out_tun_key, &output_tunnel_key);
             }
 
-            actions_len = dpif_read_actions(udpif, upcall, flow, upcall_type,
-                                            &ipfix_actions);
+            actions_len = dpif_read_actions(udpif, upcall, flow,
+                                            upcall->type, &ipfix_actions);
             /* The flow reflects exactly the contents of the packet.
              * Sample the packet using it. */
             dpif_ipfix_flow_sample(upcall->ipfix, packet, flow,
-                                   &cookie, flow->in_port.odp_port,
+                                   &upcall->cookie, flow->in_port.odp_port,
                                    upcall->out_tun_key ?
                                        &output_tunnel_key : NULL,
                                    actions_len > 0 ? &ipfix_actions: NULL);