@@ -180,6 +180,9 @@ ofctrl_init(struct ovn_extend_table *group_table,
static void
run_S_NEW(void)
{
+ if (rconn_get_version(swconn) < 0) {
+ return;
+ }
struct ofpbuf *buf = ofpraw_alloc(OFPRAW_NXT_TLV_TABLE_REQUEST,
rconn_get_version(swconn), 0);
xid = queue_msg(buf);
@@ -804,6 +807,9 @@ add_meter_mod(const struct ofputil_meter_mod *mm, struct ovs_list *msgs)
static void
add_ct_flush_zone(uint16_t zone_id, struct ovs_list *msgs)
{
+ if (rconn_get_version(swconn) < 0) {
+ return;
+ }
struct ofpbuf *msg = ofpraw_alloc(OFPRAW_NXT_CT_FLUSH_ZONE,
rconn_get_version(swconn), 0);
struct nx_zone_id *nzi = ofpbuf_put_zeros(msg, sizeof *nzi);
@@ -127,11 +127,15 @@ pinctrl_setup(void)
/* Fetch the switch configuration. The response later will allow us to
* change the miss_send_len to UINT16_MAX, so that we can enable
* asynchronous messages. */
+ int version = rconn_get_version(swconn);
+ if (version < 0) {
+ return;
+ }
queue_msg(ofpraw_alloc(OFPRAW_OFPT_GET_CONFIG_REQUEST,
- rconn_get_version(swconn), 0));
+ version, 0));
/* Set a packet-in format that supports userdata. */
- queue_msg(ofputil_encode_set_packet_in_format(rconn_get_version(swconn),
+ queue_msg(ofputil_encode_set_packet_in_format(version,
OFPUTIL_PACKET_IN_NXT2));
}
@@ -139,7 +143,10 @@ static void
set_switch_config(struct rconn *swconn_,
const struct ofputil_switch_config *config)
{
- enum ofp_version version = rconn_get_version(swconn_);
+ int version = rconn_get_version(swconn_);
+ if (version < 0) {
+ return;
+ }
struct ofpbuf *request = ofputil_encode_set_config(config, version);
queue_msg(request);
}
@@ -152,9 +159,12 @@ set_actions_and_enqueue_msg(const struct dp_packet *packet,
/* Copy metadata from 'md' into the packet-out via "set_field"
* actions, then add actions from 'userdata'.
*/
+ int version = rconn_get_version(swconn);
+ if (version < 0) {
+ return;
+ }
uint64_t ofpacts_stub[4096 / 8];
struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
- enum ofp_version version = rconn_get_version(swconn);
reload_metadata(&ofpacts, md);
enum ofperr error = ofpacts_pull_openflow_actions(userdata, userdata->size,
@@ -376,7 +386,10 @@ pinctrl_handle_put_dhcp_opts(
struct dp_packet *pkt_in, struct ofputil_packet_in *pin,
struct ofpbuf *userdata, struct ofpbuf *continuation)
{
- enum ofp_version version = rconn_get_version(swconn);
+ int version = rconn_get_version(swconn);
+ if (version < 0) {
+ return;
+ }
enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
struct dp_packet *pkt_out_ptr = NULL;
uint32_t success = 0;
@@ -667,7 +680,10 @@ pinctrl_handle_put_dhcpv6_opts(
struct ofpbuf *userdata, struct ofpbuf *continuation OVS_UNUSED)
{
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
- enum ofp_version version = rconn_get_version(swconn);
+ int version = rconn_get_version(swconn);
+ if (version < 0) {
+ return;
+ }
enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
struct dp_packet *pkt_out_ptr = NULL;
uint32_t success = 0;
@@ -861,7 +877,10 @@ pinctrl_handle_dns_lookup(
struct controller_ctx *ctx)
{
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
- enum ofp_version version = rconn_get_version(swconn);
+ int version = rconn_get_version(swconn);
+ if (version < 0) {
+ return;
+ }
enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
struct dp_packet *pkt_out_ptr = NULL;
uint32_t success = 0;
@@ -1431,6 +1450,10 @@ ipv6_ra_send(struct ipv6_ra_state *ra)
if (time_msec() < ra->next_announce) {
return ra->next_announce;
}
+ int version = rconn_get_version(swconn);
+ if (version < 0) {
+ return ra->next_announce;
+ }
uint64_t packet_stub[128 / 8];
struct dp_packet packet;
@@ -1471,7 +1494,6 @@ ipv6_ra_send(struct ipv6_ra_state *ra)
};
match_set_in_port(&po.flow_metadata, OFPP_CONTROLLER);
- enum ofp_version version = rconn_get_version(swconn);
enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
queue_msg(ofputil_encode_packet_out(&po, proto));
dp_packet_uninit(&packet);
@@ -1897,6 +1919,10 @@ send_garp(struct garp_data *garp, long long int current_time)
return garp->announce_time;
}
+ int version = rconn_get_version(swconn);
+ if (version < 0) {
+ return garp->announce_time;
+ }
/* Compose a GARP request packet. */
uint64_t packet_stub[128 / 8];
struct dp_packet packet;
@@ -1912,7 +1938,6 @@ send_garp(struct garp_data *garp, long long int current_time)
/* Compose actions. The garp request is output on localnet ofport. */
uint64_t ofpacts_stub[4096 / 8];
struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
- enum ofp_version version = rconn_get_version(swconn);
ofpact_put_OUTPUT(&ofpacts)->port = garp->ofport;
struct ofputil_packet_out po = {
@@ -2372,7 +2397,10 @@ pinctrl_handle_put_nd_ra_opts(
struct ofpbuf *continuation)
{
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
- enum ofp_version version = rconn_get_version(swconn);
+ int version = rconn_get_version(swconn);
+ if (version < 0) {
+ return;
+ }
enum ofputil_protocol proto = ofputil_protocol_from_ofp_version(version);
struct dp_packet *pkt_out_ptr = NULL;
uint32_t success = 0;
ovn-controller abort was found in pinctrl_run() when debugging an occasional test case failure of: ovn-controller.at: Chassis external-id Backtrace: (gdb) bt 0 0x00007fd0f84878d7 in raise () from /lib64/libc.so.6 1 0x00007fd0f848953a in abort () from /lib64/libc.so.6 2 0x00000000004a6c9e in ovs_abort_valist (err_no=err_no@entry=0, format=format@entry=0x55d050 "%s: assertion %s failed in %s()", args=args@entry=0x7fff24390158) at lib/util.c:360 3 0x00000000004ad8b0 in vlog_abort_valist (module_=<optimized out>, message=0x55d050 "%s: assertion %s failed in %s()", args=args@entry=0x7fff24390158) at lib/vlog.c:1219 4 0x00000000004ad937 in vlog_abort (module=module@entry=0x7f6b20 <this_module>, message=message@entry=0x55d050 "%s: assertion %s failed in %s()") at lib/vlog.c:1233 5 0x00000000004a6a4e in ovs_assert_failure (where=where@entry=0x552ec3 "lib/ofp-msgs.c:1062", function=function@entry=0x553a20 <__func__.9268> "raw_instance_get", condition=condition@entry=0x552c00 "version >= info->min_version && version <= info->max_version") at lib/util.c:80 6 0x0000000000471fb4 in raw_instance_get (info=<optimized out>, version=<optimized out>) at lib/ofp-msgs.c:1062 7 0x0000000000472533 in ofpraw_put__ (raw=raw@entry=OFPRAW_NXT_SET_PACKET_IN_FORMAT, version=version@entry=255 '\377', xid=xid@entry=83886080, extra_tailroom=extra_tailroom@entry=0, buf=buf@entry=0x123b340) at lib/ofp-msgs.c:712 8 0x000000000047299c in ofpraw_alloc_xid (extra_tailroom=0, xid=83886080, version=255 '\377', raw=OFPRAW_NXT_SET_PACKET_IN_FORMAT) at lib/ofp-msgs.c:588 9 ofpraw_alloc (raw=raw@entry=OFPRAW_NXT_SET_PACKET_IN_FORMAT, version=<optimized out>, extra_tailroom=extra_tailroom@entry=0) at lib/ofp-msgs.c:579 10 0x000000000047383a in ofputil_encode_set_packet_in_format (ofp_version=<optimized out>, format=format@entry=OFPUTIL_PACKET_IN_NXT2) at lib/ofp-packet.c:70 11 0x00000000004145d3 in pinctrl_setup () at ovn/controller/pinctrl.c:134 12 pinctrl_run (ctx=ctx@entry=0x7fff243904f0, br_int=br_int@entry=0x1216a50, chassis=chassis@entry=0x1239f90, chassis_index=chassis_index@entry=0x7fff243a1c00, local_datapaths=local_datapaths@entry=0x7fff243a1c20, active_tunnels=active_tunnels@entry=0x7fff243a1c80) at ovn/controller/pinctrl.c:1258 13 0x00000000004076ce in main (argc=6, argv=0x7fff243a3e38) at ovn/controller/ovn-controller.c:1177 Root cause: When entering pinctrl_setup() the rconn got Broken pipe error: 2018-05-19T06:10:06.697Z|00105|stream_fd|DBG|send: Broken pipe 2018-05-19T06:10:06.697Z|00106|vconn|DBG|unix:/home/hzhou/src/ovs/tests/testsuite.dir/2571/hv/br-int.mgmt: sent (Broken pipe): OFPT_GET_CONFIG_REQUEST (OF1.3) (xid=0x4): 2018-05-19T06:10:06.697Z|00107|rconn|WARN|unix:/home/hzhou/src/ovs/tests/testsuite.dir/2571/hv/br-int.mgmt: connection dropped (Broken pipe) 2018-05-19T06:10:06.697Z|00109|rconn|DBG|unix:/home/hzhou/src/ovs/tests/testsuite.dir/2571/hv/br-int.mgmt: entering BACKOFF 2018-05-19T06:10:06.698Z|00110|util|EMER|lib/ofp-msgs.c:1062: assertion version >= info->min_version && version <= info->max_version failed in raw_instance_get() (The connection is closed because the test case set datapath_type to foo, which is invalid for OVS but the test case doesn't care.) There are two message sendings in pinctrl_setup(). The first message sending detected the connection lost and triggered disconnect() which set rconn status to BACKOFF. However, before sending the second message, it calls rconn_get_version() again. When rconn is not connected, the rconn_get_version() returns -1, which is used when calling ofputil_encode_set_packet_in_format(), which finally triggered the abort(). This problem exists not only in pinctrl_setup(), but many other places in pinctrl and ofctrl modules. In those places when calling rconn_get_version(), the connection status may not be connected, and rconn_get_version() may return -1, but the return value is not checked and directly used for following function calls, so potentially ovn-controller can crash in these places. This patch fixes these problems. Signed-off-by: Han Zhou <hzhou8@ebay.com> --- Notes: v1->v2: correct the assignment from int to enum ovn/controller/ofctrl.c | 6 ++++++ ovn/controller/pinctrl.c | 48 ++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 44 insertions(+), 10 deletions(-)