Message ID | 20240516153832.153496-2-mkp@redhat.com |
---|---|
State | Accepted |
Commit | 16f6885353c2820adc3c217bd2588d35748f0849 |
Delegated to: | Simon Horman |
Headers | show |
Series | [ovs-dev,v2,1/2] ipf: Only add fragments to batch of same dl_type. | expand |
Context | Check | Description |
---|---|---|
ovsrobot/apply-robot | success | apply and check: success |
ovsrobot/github-robot-_Build_and_Test | success | github build: passed |
ovsrobot/intel-ovs-compilation | success | test: success |
Mike Pattrick <mkp@redhat.com> writes: > When conntrack is reassembling packet fragments, the same reassembly > context can be shared across multiple threads handling different packets > simultaneously. Once a full packet is assembled, it is added to a packet > batch for processing, in the case where there are multiple different pmd > threads accessing conntrack simultaneously, there is a race condition > where the reassembled packet may be added to an arbitrary batch even if > the current batch is available. > > When this happens, the packet may be handled incorrectly as it is > inserted into a random openflow execution pipeline, instead of the > pipeline for that packets flow. > > This change makes a best effort attempt to try to add the defragmented > packet to the current batch. directly. This should succeed most of the > time. > > Fixes: 4ea96698f667 ("Userspace datapath: Add fragmentation handling.") > Reported-at: https://issues.redhat.com/browse/FDP-560 > Signed-off-by: Mike Pattrick <mkp@redhat.com> > --- Acked-by: Paolo Valerio <pvalerio@redhat.com> > lib/ipf.c | 27 ++++++++++++++++++++------- > 1 file changed, 20 insertions(+), 7 deletions(-) > > diff --git a/lib/ipf.c b/lib/ipf.c > index 3c8960be3..2d715f5e9 100644 > --- a/lib/ipf.c > +++ b/lib/ipf.c > @@ -506,13 +506,15 @@ ipf_reassemble_v6_frags(struct ipf_list *ipf_list) > } > > /* Called when a frag list state transitions to another state. This is > - * triggered by new fragment for the list being received.*/ > -static void > +* triggered by new fragment for the list being received. Returns a reassembled > +* packet if this fragment has completed one. */ > +static struct reassembled_pkt * > ipf_list_state_transition(struct ipf *ipf, struct ipf_list *ipf_list, > bool ff, bool lf, bool v6) > OVS_REQUIRES(ipf->ipf_lock) > { > enum ipf_list_state curr_state = ipf_list->state; > + struct reassembled_pkt *ret = NULL; > enum ipf_list_state next_state; > switch (curr_state) { > case IPF_LIST_STATE_UNUSED: > @@ -562,12 +564,15 @@ ipf_list_state_transition(struct ipf *ipf, struct ipf_list *ipf_list, > ipf_reassembled_list_add(&ipf->reassembled_pkt_list, rp); > ipf_expiry_list_remove(ipf_list); > next_state = IPF_LIST_STATE_COMPLETED; > + ret = rp; > } else { > next_state = IPF_LIST_STATE_REASS_FAIL; > } > } > } > ipf_list->state = next_state; > + > + return ret; > } > > /* Some sanity checks are redundant, but prudent, in case code paths for > @@ -799,7 +804,8 @@ ipf_is_frag_duped(const struct ipf_frag *frag_list, int last_inuse_idx, > static bool > ipf_process_frag(struct ipf *ipf, struct ipf_list *ipf_list, > struct dp_packet *pkt, uint16_t start_data_byte, > - uint16_t end_data_byte, bool ff, bool lf, bool v6) > + uint16_t end_data_byte, bool ff, bool lf, bool v6, > + struct reassembled_pkt **rp) > OVS_REQUIRES(ipf->ipf_lock) > { > bool duped_frag = ipf_is_frag_duped(ipf_list->frag_list, > @@ -820,7 +826,7 @@ ipf_process_frag(struct ipf *ipf, struct ipf_list *ipf_list, > ipf_list->last_inuse_idx++; > atomic_count_inc(&ipf->nfrag); > ipf_count(ipf, v6, IPF_NFRAGS_ACCEPTED); > - ipf_list_state_transition(ipf, ipf_list, ff, lf, v6); > + *rp = ipf_list_state_transition(ipf, ipf_list, ff, lf, v6); > } else { > OVS_NOT_REACHED(); > } > @@ -853,7 +859,8 @@ ipf_list_init(struct ipf_list *ipf_list, struct ipf_list_key *key, > * to a list of fragemnts. */ > static bool > ipf_handle_frag(struct ipf *ipf, struct dp_packet *pkt, ovs_be16 dl_type, > - uint16_t zone, long long now, uint32_t hash_basis) > + uint16_t zone, long long now, uint32_t hash_basis, > + struct reassembled_pkt **rp) > OVS_REQUIRES(ipf->ipf_lock) > { > struct ipf_list_key key; > @@ -922,7 +929,7 @@ ipf_handle_frag(struct ipf *ipf, struct dp_packet *pkt, ovs_be16 dl_type, > } > > return ipf_process_frag(ipf, ipf_list, pkt, start_data_byte, > - end_data_byte, ff, lf, v6); > + end_data_byte, ff, lf, v6, rp); > } > > /* Filters out fragments from a batch of fragments and adjust the batch. */ > @@ -941,11 +948,17 @@ ipf_extract_frags_from_batch(struct ipf *ipf, struct dp_packet_batch *pb, > || > (dl_type == htons(ETH_TYPE_IPV6) && > ipf_is_valid_v6_frag(ipf, pkt)))) { > + struct reassembled_pkt *rp = NULL; > > ovs_mutex_lock(&ipf->ipf_lock); > - if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis)) { > + if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis, > + &rp)) { > dp_packet_batch_refill(pb, pkt, pb_idx); > } else { > + if (rp && !dp_packet_batch_is_full(pb)) { > + dp_packet_batch_refill(pb, rp->pkt, pb_idx); > + rp->list->reass_execute_ctx = rp->pkt; > + } > dp_packet_delete(pkt); > } > ovs_mutex_unlock(&ipf->ipf_lock); > -- > 2.39.3 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
On Thu, May 16, 2024 at 11:38:32AM -0400, Mike Pattrick wrote: > When conntrack is reassembling packet fragments, the same reassembly > context can be shared across multiple threads handling different packets > simultaneously. Once a full packet is assembled, it is added to a packet > batch for processing, in the case where there are multiple different pmd > threads accessing conntrack simultaneously, there is a race condition > where the reassembled packet may be added to an arbitrary batch even if > the current batch is available. > > When this happens, the packet may be handled incorrectly as it is > inserted into a random openflow execution pipeline, instead of the > pipeline for that packets flow. > > This change makes a best effort attempt to try to add the defragmented > packet to the current batch. directly. This should succeed most of the > time. > > Fixes: 4ea96698f667 ("Userspace datapath: Add fragmentation handling.") > Reported-at: https://issues.redhat.com/browse/FDP-560 > Signed-off-by: Mike Pattrick <mkp@redhat.com> Acked-by: Simon Horman <horms@ovn.org>
Mike Pattrick <mkp@redhat.com> writes: > When conntrack is reassembling packet fragments, the same reassembly > context can be shared across multiple threads handling different packets > simultaneously. Once a full packet is assembled, it is added to a packet > batch for processing, in the case where there are multiple different pmd > threads accessing conntrack simultaneously, there is a race condition > where the reassembled packet may be added to an arbitrary batch even if > the current batch is available. > > When this happens, the packet may be handled incorrectly as it is > inserted into a random openflow execution pipeline, instead of the > pipeline for that packets flow. > > This change makes a best effort attempt to try to add the defragmented > packet to the current batch. directly. This should succeed most of the > time. > > Fixes: 4ea96698f667 ("Userspace datapath: Add fragmentation handling.") > Reported-at: https://issues.redhat.com/browse/FDP-560 > Signed-off-by: Mike Pattrick <mkp@redhat.com> > --- The patch overall looks good to me. I'm considering applying with the following addition: diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py index 6b293770dd..d9b9e0c23f 100755 --- a/utilities/checkpatch.py +++ b/utilities/checkpatch.py @@ -63,7 +63,8 @@ def open_spell_check_dict(): 'dhcpv6', 'opts', 'metadata', 'geneve', 'mutex', 'netdev', 'netdevs', 'subtable', 'virtio', 'qos', 'policer', 'datapath', 'tunctl', 'attr', 'ethernet', - 'ether', 'defrag', 'defragment', 'loopback', 'sflow', + 'ether', 'defrag', 'defragment', 'defragmented', + 'loopback', 'sflow', 'acl', 'initializer', 'recirc', 'xlated', 'unclosed', 'netlink', 'msec', 'usec', 'nsec', 'ms', 'us', 'ns', 'kilobits', 'kbps', 'kilobytes', 'megabytes', 'mbps', unless anyone objects. This is to squelch: == Checking 16f6885353c2 ("ipf: Handle common case of ipf defragmentation.") == WARNING: Possible misspelled word: "defragmented" Did you mean: ['defragment ed', 'defragment-ed', 'defragment'] Lines checked: 129, Warnings: 1, Errors: 0 > lib/ipf.c | 27 ++++++++++++++++++++------- > 1 file changed, 20 insertions(+), 7 deletions(-) > > diff --git a/lib/ipf.c b/lib/ipf.c > index 3c8960be3..2d715f5e9 100644 > --- a/lib/ipf.c > +++ b/lib/ipf.c > @@ -506,13 +506,15 @@ ipf_reassemble_v6_frags(struct ipf_list *ipf_list) > } > > /* Called when a frag list state transitions to another state. This is > - * triggered by new fragment for the list being received.*/ > -static void > +* triggered by new fragment for the list being received. Returns a reassembled > +* packet if this fragment has completed one. */ > +static struct reassembled_pkt * > ipf_list_state_transition(struct ipf *ipf, struct ipf_list *ipf_list, > bool ff, bool lf, bool v6) > OVS_REQUIRES(ipf->ipf_lock) > { > enum ipf_list_state curr_state = ipf_list->state; > + struct reassembled_pkt *ret = NULL; > enum ipf_list_state next_state; > switch (curr_state) { > case IPF_LIST_STATE_UNUSED: > @@ -562,12 +564,15 @@ ipf_list_state_transition(struct ipf *ipf, struct ipf_list *ipf_list, > ipf_reassembled_list_add(&ipf->reassembled_pkt_list, rp); > ipf_expiry_list_remove(ipf_list); > next_state = IPF_LIST_STATE_COMPLETED; > + ret = rp; > } else { > next_state = IPF_LIST_STATE_REASS_FAIL; > } > } > } > ipf_list->state = next_state; > + > + return ret; > } > > /* Some sanity checks are redundant, but prudent, in case code paths for > @@ -799,7 +804,8 @@ ipf_is_frag_duped(const struct ipf_frag *frag_list, int last_inuse_idx, > static bool > ipf_process_frag(struct ipf *ipf, struct ipf_list *ipf_list, > struct dp_packet *pkt, uint16_t start_data_byte, > - uint16_t end_data_byte, bool ff, bool lf, bool v6) > + uint16_t end_data_byte, bool ff, bool lf, bool v6, > + struct reassembled_pkt **rp) > OVS_REQUIRES(ipf->ipf_lock) > { > bool duped_frag = ipf_is_frag_duped(ipf_list->frag_list, > @@ -820,7 +826,7 @@ ipf_process_frag(struct ipf *ipf, struct ipf_list *ipf_list, > ipf_list->last_inuse_idx++; > atomic_count_inc(&ipf->nfrag); > ipf_count(ipf, v6, IPF_NFRAGS_ACCEPTED); > - ipf_list_state_transition(ipf, ipf_list, ff, lf, v6); > + *rp = ipf_list_state_transition(ipf, ipf_list, ff, lf, v6); > } else { > OVS_NOT_REACHED(); > } > @@ -853,7 +859,8 @@ ipf_list_init(struct ipf_list *ipf_list, struct ipf_list_key *key, > * to a list of fragemnts. */ > static bool > ipf_handle_frag(struct ipf *ipf, struct dp_packet *pkt, ovs_be16 dl_type, > - uint16_t zone, long long now, uint32_t hash_basis) > + uint16_t zone, long long now, uint32_t hash_basis, > + struct reassembled_pkt **rp) > OVS_REQUIRES(ipf->ipf_lock) > { > struct ipf_list_key key; > @@ -922,7 +929,7 @@ ipf_handle_frag(struct ipf *ipf, struct dp_packet *pkt, ovs_be16 dl_type, > } > > return ipf_process_frag(ipf, ipf_list, pkt, start_data_byte, > - end_data_byte, ff, lf, v6); > + end_data_byte, ff, lf, v6, rp); > } > > /* Filters out fragments from a batch of fragments and adjust the batch. */ > @@ -941,11 +948,17 @@ ipf_extract_frags_from_batch(struct ipf *ipf, struct dp_packet_batch *pb, > || > (dl_type == htons(ETH_TYPE_IPV6) && > ipf_is_valid_v6_frag(ipf, pkt)))) { > + struct reassembled_pkt *rp = NULL; > > ovs_mutex_lock(&ipf->ipf_lock); > - if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis)) { > + if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis, > + &rp)) { > dp_packet_batch_refill(pb, pkt, pb_idx); > } else { > + if (rp && !dp_packet_batch_is_full(pb)) { > + dp_packet_batch_refill(pb, rp->pkt, pb_idx); > + rp->list->reass_execute_ctx = rp->pkt; > + } > dp_packet_delete(pkt); > } > ovs_mutex_unlock(&ipf->ipf_lock);
On 6/5/24 16:54, Aaron Conole wrote: > Mike Pattrick <mkp@redhat.com> writes: > >> When conntrack is reassembling packet fragments, the same reassembly >> context can be shared across multiple threads handling different packets >> simultaneously. Once a full packet is assembled, it is added to a packet >> batch for processing, in the case where there are multiple different pmd >> threads accessing conntrack simultaneously, there is a race condition >> where the reassembled packet may be added to an arbitrary batch even if >> the current batch is available. >> >> When this happens, the packet may be handled incorrectly as it is >> inserted into a random openflow execution pipeline, instead of the >> pipeline for that packets flow. >> >> This change makes a best effort attempt to try to add the defragmented >> packet to the current batch. directly. This should succeed most of the >> time. >> >> Fixes: 4ea96698f667 ("Userspace datapath: Add fragmentation handling.") >> Reported-at: https://issues.redhat.com/browse/FDP-560 >> Signed-off-by: Mike Pattrick <mkp@redhat.com> >> --- > > The patch overall looks good to me. I'm considering applying with the > following addition: > > diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py > index 6b293770dd..d9b9e0c23f 100755 > --- a/utilities/checkpatch.py > +++ b/utilities/checkpatch.py > @@ -63,7 +63,8 @@ def open_spell_check_dict(): > 'dhcpv6', 'opts', 'metadata', 'geneve', 'mutex', > 'netdev', 'netdevs', 'subtable', 'virtio', 'qos', > 'policer', 'datapath', 'tunctl', 'attr', 'ethernet', > - 'ether', 'defrag', 'defragment', 'loopback', 'sflow', > + 'ether', 'defrag', 'defragment', 'defragmented', > + 'loopback', 'sflow', > 'acl', 'initializer', 'recirc', 'xlated', 'unclosed', > 'netlink', 'msec', 'usec', 'nsec', 'ms', 'us', 'ns', > 'kilobits', 'kbps', 'kilobytes', 'megabytes', 'mbps', > > > unless anyone objects. This is to squelch: > > == Checking 16f6885353c2 ("ipf: Handle common case of ipf defragmentation.") == > WARNING: Possible misspelled word: "defragmented" > Did you mean: ['defragment ed', 'defragment-ed', 'defragment'] > Lines checked: 129, Warnings: 1, Errors: 0 It doesn't affect CI today, so can be a separate patch, I think. We have a few more words like this in relatively recent commits, like 'poller' or 'autovalidator', these can be bundled in that separate commit as well. Though updating the dictionary along with the patch that is using the word sounds OK to me as well. Best regards, Ilya Maximets.
Ilya Maximets <i.maximets@ovn.org> writes: > On 6/5/24 16:54, Aaron Conole wrote: >> Mike Pattrick <mkp@redhat.com> writes: >> >>> When conntrack is reassembling packet fragments, the same reassembly >>> context can be shared across multiple threads handling different packets >>> simultaneously. Once a full packet is assembled, it is added to a packet >>> batch for processing, in the case where there are multiple different pmd >>> threads accessing conntrack simultaneously, there is a race condition >>> where the reassembled packet may be added to an arbitrary batch even if >>> the current batch is available. >>> >>> When this happens, the packet may be handled incorrectly as it is >>> inserted into a random openflow execution pipeline, instead of the >>> pipeline for that packets flow. >>> >>> This change makes a best effort attempt to try to add the defragmented >>> packet to the current batch. directly. This should succeed most of the >>> time. >>> >>> Fixes: 4ea96698f667 ("Userspace datapath: Add fragmentation handling.") >>> Reported-at: https://issues.redhat.com/browse/FDP-560 >>> Signed-off-by: Mike Pattrick <mkp@redhat.com> >>> --- >> >> The patch overall looks good to me. I'm considering applying with the >> following addition: >> >> diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py >> index 6b293770dd..d9b9e0c23f 100755 >> --- a/utilities/checkpatch.py >> +++ b/utilities/checkpatch.py >> @@ -63,7 +63,8 @@ def open_spell_check_dict(): >> 'dhcpv6', 'opts', 'metadata', 'geneve', 'mutex', >> 'netdev', 'netdevs', 'subtable', 'virtio', 'qos', >> 'policer', 'datapath', 'tunctl', 'attr', 'ethernet', >> - 'ether', 'defrag', 'defragment', 'loopback', 'sflow', >> + 'ether', 'defrag', 'defragment', 'defragmented', >> + 'loopback', 'sflow', >> 'acl', 'initializer', 'recirc', 'xlated', 'unclosed', >> 'netlink', 'msec', 'usec', 'nsec', 'ms', 'us', 'ns', >> 'kilobits', 'kbps', 'kilobytes', 'megabytes', 'mbps', >> >> >> unless anyone objects. This is to squelch: >> >> == Checking 16f6885353c2 ("ipf: Handle common case of ipf defragmentation.") == >> WARNING: Possible misspelled word: "defragmented" >> Did you mean: ['defragment ed', 'defragment-ed', 'defragment'] >> Lines checked: 129, Warnings: 1, Errors: 0 > > It doesn't affect CI today, so can be a separate patch, I think. We > have a few more > words like this in relatively recent commits, like 'poller' or > 'autovalidator', these > can be bundled in that separate commit as well. Good point - okay, I'll make a separate commit for it. > Though updating the dictionary along with the patch that is using the > word sounds OK > to me as well. Actually, I was thinking that it has the downside of sometimes making backports more tricky as well, so I ended up not including this change. > Best regards, Ilya Maximets. > _______________________________________________ > dev mailing list > dev@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Mike Pattrick <mkp@redhat.com> writes: > When conntrack is reassembling packet fragments, the same reassembly > context can be shared across multiple threads handling different packets > simultaneously. Once a full packet is assembled, it is added to a packet > batch for processing, in the case where there are multiple different pmd > threads accessing conntrack simultaneously, there is a race condition > where the reassembled packet may be added to an arbitrary batch even if > the current batch is available. > > When this happens, the packet may be handled incorrectly as it is > inserted into a random openflow execution pipeline, instead of the > pipeline for that packets flow. > > This change makes a best effort attempt to try to add the defragmented > packet to the current batch. directly. This should succeed most of the > time. > > Fixes: 4ea96698f667 ("Userspace datapath: Add fragmentation handling.") > Reported-at: https://issues.redhat.com/browse/FDP-560 > Signed-off-by: Mike Pattrick <mkp@redhat.com> > --- Applied and backported to branches down to 2.17 Thanks, Mike, Paolo, and Simon.
Ilya Maximets <i.maximets@ovn.org> writes: > On 6/5/24 16:54, Aaron Conole wrote: >> Mike Pattrick <mkp@redhat.com> writes: >> >>> When conntrack is reassembling packet fragments, the same reassembly >>> context can be shared across multiple threads handling different packets >>> simultaneously. Once a full packet is assembled, it is added to a packet >>> batch for processing, in the case where there are multiple different pmd >>> threads accessing conntrack simultaneously, there is a race condition >>> where the reassembled packet may be added to an arbitrary batch even if >>> the current batch is available. >>> >>> When this happens, the packet may be handled incorrectly as it is >>> inserted into a random openflow execution pipeline, instead of the >>> pipeline for that packets flow. >>> >>> This change makes a best effort attempt to try to add the defragmented >>> packet to the current batch. directly. This should succeed most of the >>> time. >>> >>> Fixes: 4ea96698f667 ("Userspace datapath: Add fragmentation handling.") >>> Reported-at: https://issues.redhat.com/browse/FDP-560 >>> Signed-off-by: Mike Pattrick <mkp@redhat.com> >>> --- >> >> The patch overall looks good to me. I'm considering applying with the >> following addition: >> >> diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py >> index 6b293770dd..d9b9e0c23f 100755 >> --- a/utilities/checkpatch.py >> +++ b/utilities/checkpatch.py >> @@ -63,7 +63,8 @@ def open_spell_check_dict(): >> 'dhcpv6', 'opts', 'metadata', 'geneve', 'mutex', >> 'netdev', 'netdevs', 'subtable', 'virtio', 'qos', >> 'policer', 'datapath', 'tunctl', 'attr', 'ethernet', >> - 'ether', 'defrag', 'defragment', 'loopback', 'sflow', >> + 'ether', 'defrag', 'defragment', 'defragmented', >> + 'loopback', 'sflow', >> 'acl', 'initializer', 'recirc', 'xlated', 'unclosed', >> 'netlink', 'msec', 'usec', 'nsec', 'ms', 'us', 'ns', >> 'kilobits', 'kbps', 'kilobytes', 'megabytes', 'mbps', >> >> >> unless anyone objects. This is to squelch: >> >> == Checking 16f6885353c2 ("ipf: Handle common case of ipf defragmentation.") == >> WARNING: Possible misspelled word: "defragmented" >> Did you mean: ['defragment ed', 'defragment-ed', 'defragment'] >> Lines checked: 129, Warnings: 1, Errors: 0 > > It doesn't affect CI today, so can be a separate patch, I think. We > have a few more > words like this in relatively recent commits, like 'poller' or > 'autovalidator', these > can be bundled in that separate commit as well. > > Though updating the dictionary along with the patch that is using the > word sounds OK > to me as well. That makes sense to me. I've been thinking of adding a spell-check test to the robot. Rather than the existing apply check doing the spell checking. The spell checker would only ever generate a warning. WDYT? > Best regards, Ilya Maximets.
diff --git a/lib/ipf.c b/lib/ipf.c index 3c8960be3..2d715f5e9 100644 --- a/lib/ipf.c +++ b/lib/ipf.c @@ -506,13 +506,15 @@ ipf_reassemble_v6_frags(struct ipf_list *ipf_list) } /* Called when a frag list state transitions to another state. This is - * triggered by new fragment for the list being received.*/ -static void +* triggered by new fragment for the list being received. Returns a reassembled +* packet if this fragment has completed one. */ +static struct reassembled_pkt * ipf_list_state_transition(struct ipf *ipf, struct ipf_list *ipf_list, bool ff, bool lf, bool v6) OVS_REQUIRES(ipf->ipf_lock) { enum ipf_list_state curr_state = ipf_list->state; + struct reassembled_pkt *ret = NULL; enum ipf_list_state next_state; switch (curr_state) { case IPF_LIST_STATE_UNUSED: @@ -562,12 +564,15 @@ ipf_list_state_transition(struct ipf *ipf, struct ipf_list *ipf_list, ipf_reassembled_list_add(&ipf->reassembled_pkt_list, rp); ipf_expiry_list_remove(ipf_list); next_state = IPF_LIST_STATE_COMPLETED; + ret = rp; } else { next_state = IPF_LIST_STATE_REASS_FAIL; } } } ipf_list->state = next_state; + + return ret; } /* Some sanity checks are redundant, but prudent, in case code paths for @@ -799,7 +804,8 @@ ipf_is_frag_duped(const struct ipf_frag *frag_list, int last_inuse_idx, static bool ipf_process_frag(struct ipf *ipf, struct ipf_list *ipf_list, struct dp_packet *pkt, uint16_t start_data_byte, - uint16_t end_data_byte, bool ff, bool lf, bool v6) + uint16_t end_data_byte, bool ff, bool lf, bool v6, + struct reassembled_pkt **rp) OVS_REQUIRES(ipf->ipf_lock) { bool duped_frag = ipf_is_frag_duped(ipf_list->frag_list, @@ -820,7 +826,7 @@ ipf_process_frag(struct ipf *ipf, struct ipf_list *ipf_list, ipf_list->last_inuse_idx++; atomic_count_inc(&ipf->nfrag); ipf_count(ipf, v6, IPF_NFRAGS_ACCEPTED); - ipf_list_state_transition(ipf, ipf_list, ff, lf, v6); + *rp = ipf_list_state_transition(ipf, ipf_list, ff, lf, v6); } else { OVS_NOT_REACHED(); } @@ -853,7 +859,8 @@ ipf_list_init(struct ipf_list *ipf_list, struct ipf_list_key *key, * to a list of fragemnts. */ static bool ipf_handle_frag(struct ipf *ipf, struct dp_packet *pkt, ovs_be16 dl_type, - uint16_t zone, long long now, uint32_t hash_basis) + uint16_t zone, long long now, uint32_t hash_basis, + struct reassembled_pkt **rp) OVS_REQUIRES(ipf->ipf_lock) { struct ipf_list_key key; @@ -922,7 +929,7 @@ ipf_handle_frag(struct ipf *ipf, struct dp_packet *pkt, ovs_be16 dl_type, } return ipf_process_frag(ipf, ipf_list, pkt, start_data_byte, - end_data_byte, ff, lf, v6); + end_data_byte, ff, lf, v6, rp); } /* Filters out fragments from a batch of fragments and adjust the batch. */ @@ -941,11 +948,17 @@ ipf_extract_frags_from_batch(struct ipf *ipf, struct dp_packet_batch *pb, || (dl_type == htons(ETH_TYPE_IPV6) && ipf_is_valid_v6_frag(ipf, pkt)))) { + struct reassembled_pkt *rp = NULL; ovs_mutex_lock(&ipf->ipf_lock); - if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis)) { + if (!ipf_handle_frag(ipf, pkt, dl_type, zone, now, hash_basis, + &rp)) { dp_packet_batch_refill(pb, pkt, pb_idx); } else { + if (rp && !dp_packet_batch_is_full(pb)) { + dp_packet_batch_refill(pb, rp->pkt, pb_idx); + rp->list->reass_execute_ctx = rp->pkt; + } dp_packet_delete(pkt); } ovs_mutex_unlock(&ipf->ipf_lock);
When conntrack is reassembling packet fragments, the same reassembly context can be shared across multiple threads handling different packets simultaneously. Once a full packet is assembled, it is added to a packet batch for processing, in the case where there are multiple different pmd threads accessing conntrack simultaneously, there is a race condition where the reassembled packet may be added to an arbitrary batch even if the current batch is available. When this happens, the packet may be handled incorrectly as it is inserted into a random openflow execution pipeline, instead of the pipeline for that packets flow. This change makes a best effort attempt to try to add the defragmented packet to the current batch. directly. This should succeed most of the time. Fixes: 4ea96698f667 ("Userspace datapath: Add fragmentation handling.") Reported-at: https://issues.redhat.com/browse/FDP-560 Signed-off-by: Mike Pattrick <mkp@redhat.com> --- lib/ipf.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-)