Message ID | 1229175685-18716-3-git-send-email-gerrit@erg.abdn.ac.uk |
---|---|
State | Changes Requested, archived |
Delegated to: | David Miller |
Headers | show |
2008/12/13 Gerrit Renker <gerrit@erg.abdn.ac.uk>: > +/** > + * ccid_request_module - Pre-load CCID module for later use > + * This should be called only from process context (e.g. during connection > + * setup) and is necessary for later calls to ccid_new (typically in software > + * interrupt), so that it has the modules available when they are needed. > + */ > +static int ccid_request_module(u8 id) > +{ > + if (!in_atomic()) { > + ccids_read_lock(); > + if (ccids[id] == NULL) { > + ccids_read_unlock(); > + return request_module("net-dccp-ccid-%d", id); > + } > + ccids_read_unlock(); > + } > + return 0; > +} Just a random thought: does this lock really do anything useful here? Best Regards, Michał Mirosław
| > +static int ccid_request_module(u8 id) | > +{ | > + if (!in_atomic()) { | > + ccids_read_lock(); | > + if (ccids[id] == NULL) { | > + ccids_read_unlock(); | > + return request_module("net-dccp-ccid-%d", id); | > + } | > + ccids_read_unlock(); | > + } | > + return 0; | > +} | | Just a random thought: does this lock really do anything useful here? | Reading the (shared) 'ccids' array is the solution chosen to check whether the module for CCID with number 'id' is already loaded. It would be bad to call request_module() each time a new DCCP socket is opened. Using the 'ccids' array may not be the only way to check whether a given module (whose name depends on the value of 'id') is loaded. But if this solution is chosen, then it requires to protect the read access to 'ccids', which is shared among all DCCP sockets. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2008/12/13 Gerrit Renker <gerrit@erg.abdn.ac.uk>: > | > +static int ccid_request_module(u8 id) > | > +{ > | > + if (!in_atomic()) { > | > + ccids_read_lock(); > | > + if (ccids[id] == NULL) { > | > + ccids_read_unlock(); > | > + return request_module("net-dccp-ccid-%d", id); > | > + } > | > + ccids_read_unlock(); > | > + } > | > + return 0; > | > +} > | > | Just a random thought: does this lock really do anything useful here? > | > Reading the (shared) 'ccids' array is the solution chosen to check whether > the module for CCID with number 'id' is already loaded. > > It would be bad to call request_module() each time a new DCCP socket is > opened. Using the 'ccids' array may not be the only way to check whether > a given module (whose name depends on the value of 'id') is loaded. > > But if this solution is chosen, then it requires to protect the read > access to 'ccids', which is shared among all DCCP sockets. Since the lock is dropped after checking ccids[id] then there's a window where multiple request_module()s can be called if multiple applications create a DCCP socket at a same time. The code below should do the same without a lock (ccids is a static array, so ccids[N] is always at the same place). static int ccid_request_module(u8 id) { if (!in_atomic()) { rmb(); if (ccids[id] == NULL) return request_module("net-dccp-ccid-%d", id); } return 0; } Best Regards, Michał Mirosław N§˛ćěr¸yúčŘb˛XŹśÇ§vŘ^)Ţş{.nÇ+ˇ§z×^ž)í ćčw*jgŹą¨śÝ˘j/ęäzšŢŕ2ިčÚ&˘)ߥŤaśÚţřŽGŤéhŽćj:+v¨wčŮĽ
Em Sat, Dec 13, 2008 at 02:41:22PM +0100, Gerrit Renker escreveu: > This adds auto-loading of CCIDs (when module loading is enabled) > for the purpose of feature negotiation. > > The problem with loading the CCIDs at the end of feature negotiation is > that this would happen in software interrupt context. Besides, if the host > advertises CCIDs during negotiation, it should have them ready to use, in > case an agreeing peer wants to use it for the connection. > > Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk> > Signed-off-by: Ian McDonald <ian.mcdonald@jandi.co.nz> > --- > net/dccp/ccid.h | 1 + > net/dccp/ccid.c | 39 +++++++++++++++++++++++++++++---------- > net/dccp/feat.c | 5 +++++ > 3 files changed, 35 insertions(+), 10 deletions(-) > > --- a/net/dccp/ccid.h > +++ b/net/dccp/ccid.h > @@ -108,6 +108,7 @@ extern int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len); > extern int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, > char __user *, int __user *); > > +extern int ccid_request_modules(u8 const *ccid_array, u8 array_len); > extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, > gfp_t gfp); > > --- a/net/dccp/ccid.c > +++ b/net/dccp/ccid.c > @@ -196,22 +196,41 @@ int ccid_unregister(struct ccid_operations *ccid_ops) > > EXPORT_SYMBOL_GPL(ccid_unregister); > > +/** > + * ccid_request_module - Pre-load CCID module for later use > + * This should be called only from process context (e.g. during connection > + * setup) and is necessary for later calls to ccid_new (typically in software > + * interrupt), so that it has the modules available when they are needed. > + */ > +static int ccid_request_module(u8 id) > +{ > + if (!in_atomic()) { Shouldn't the above be in a BUG_ON? It looks strange to simply refuse to do that and return OK. > + ccids_read_lock(); > + if (ccids[id] == NULL) { > + ccids_read_unlock(); > + return request_module("net-dccp-ccid-%d", id); > + } > + ccids_read_unlock(); > + } > + return 0; > +} > + > +int ccid_request_modules(u8 const *ccid_array, u8 array_len) > +{ > +#ifdef CONFIG_MODULES > + while (array_len--) > + if (ccid_request_module(ccid_array[array_len])) > + return -1; > +#endif > + return 0; > +} > + > struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp) > { > struct ccid_operations *ccid_ops; > struct ccid *ccid = NULL; > > ccids_read_lock(); > -#ifdef CONFIG_MODULES > - if (ccids[id] == NULL) { > - /* We only try to load if in process context */ > - ccids_read_unlock(); > - if (gfp & GFP_ATOMIC) > - goto out; > - request_module("net-dccp-ccid-%d", id); > - ccids_read_lock(); > - } > -#endif > ccid_ops = ccids[id]; > if (ccid_ops == NULL) > goto out_unlock; > --- a/net/dccp/feat.c > +++ b/net/dccp/feat.c > @@ -1163,6 +1163,11 @@ int dccp_feat_init(struct sock *sk) > ccid_get_builtin_ccids(&rx.val, &rx.len)) > return -ENOBUFS; > > + /* Pre-load all CCID modules that are going to be advertised */ > + rc = -EUNATCH; > + if (ccid_request_modules(tx.val, tx.len)) > + goto free_ccid_lists; > + > if (!dccp_feat_prefer(sysctl_dccp_feat_tx_ccid, tx.val, tx.len) || > !dccp_feat_prefer(sysctl_dccp_feat_rx_ccid, rx.val, rx.len)) > goto free_ccid_lists; > -- > To unsubscribe from this list: send the line "unsubscribe dccp" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
> Since the lock is dropped after checking ccids[id] then there's > a window where multiple request_module()s can be called if multiple > applications create a DCCP socket at a same time. The code below > should do the same without a lock (ccids is a static array, > so ccids[N] is always at the same place). > > static int ccid_request_module(u8 id) > { > if (!in_atomic()) { > rmb(); > if (ccids[id] == NULL) > return request_module("net-dccp-ccid-%d", id); > } > return 0; > } > I think that the code (not yours) is in general misleading. It stems from an earlier phase of the DCCP development. Now, with the present patch set, the rationale is * all CCIDs that are advertised must be loaded * this is a subset of the configured CCIDs and contains at least one CCID * the request_module is only ever executed once, when the first DCCP application tries to pre-load the CCIDs it wants to advertise Hence I think we have a chance by going completely lockless here, by loading all configured CCIDs at runtime. In this manner the per-connection check "are all advertised CCIDs are loaded?" falls under the table, we do not need to worry about concurrent access, and loading DCCP implies that all needed CCIDs are there. Arnaldo would you be okay with such an approach ? I would be willing to revise the patch set accordingly. Gerrit -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
| Hence I think we have a chance by going completely lockless here, by | loading all configured CCIDs at runtime. In this manner the per-connection | check "are all advertised CCIDs are loaded?" falls under the table, we | do not need to worry about concurrent access, and loading DCCP implies that | all needed CCIDs are there. Unfortunately this won't work since the CCIDs depend on dccp.ko being fully loaded, so requiring that CCID module are loaded during the loading process of dccp.ko creates a cyclic dependency. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
| > +/** | > + * ccid_request_module - Pre-load CCID module for later use | > + * This should be called only from process context (e.g. during connection | > + * setup) and is necessary for later calls to ccid_new (typically in software | > + * interrupt), so that it has the modules available when they are needed. | > + */ | > +static int ccid_request_module(u8 id) | > +{ | > + if (!in_atomic()) { | | Shouldn't the above be in a BUG_ON? It looks strange to simply refuse to | do that and return OK. | | > + ccids_read_lock(); | > + if (ccids[id] == NULL) { | > + ccids_read_unlock(); | > + return request_module("net-dccp-ccid-%d", id); | > + } | > + ccids_read_unlock(); | > + } | > + return 0; | > +} We can do this if you want, but in a way this is a suggestion for the existing code, compare with the original from ccid_new() below ccids_read_lock(); #ifdef CONFIG_MODULES if (ccids[id] == NULL) { /* We only try to load if in process context */ ccids_read_unlock(); if (gfp & GFP_ATOMIC) goto out; request_module("net-dccp-ccid-%d", id); ccids_read_lock(); } #endif ==> gfp is atomic when called from dccp_feat_update_ccid(), which is why in the previous feature-negotiation code DCCP crashed when trying to negotiate any CCID module that had not been loaded: new_ccid = ccid_new(new_ccid_nr, sk, rx, GFP_ATOMIC); meant that the module would not be reloaded. ==> On the other hand, when called via ccid_hc_{rx,tx}_new(), gfp=GFP_KERNEL and so an attempted module load would succeed. This explains why loading worked when starting with the default (non-negotiated) CCIDs. ==> So how do we remain? I am ok turning the above into a BUG_ON. As an alternative, since the function is only called in a single place (the initialisation of feat.c), we could consider removing the function entirely and merge its contents into dccp_feat_init(). Would that be better? -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
| > | > +static int ccid_request_module(u8 id) | > | > +{ | > | > + if (!in_atomic()) { | > | > + ccids_read_lock(); | > | > + if (ccids[id] == NULL) { | > | > + ccids_read_unlock(); | > | > + return request_module("net-dccp-ccid-%d", id); | > | > + } | > | > + ccids_read_unlock(); | > | > + } | > | > + return 0; | > | > +} | > | | > | Just a random thought: does this lock really do anything useful here? | > | | > Reading the (shared) 'ccids' array is the solution chosen to check whether | > the module for CCID with number 'id' is already loaded. | > | > It would be bad to call request_module() each time a new DCCP socket is | > opened. Using the 'ccids' array may not be the only way to check whether | > a given module (whose name depends on the value of 'id') is loaded. | > | > But if this solution is chosen, then it requires to protect the read | > access to 'ccids', which is shared among all DCCP sockets. | | Since the lock is dropped after checking ccids[id] then there's | a window where multiple request_module()s can be called if multiple | applications create a DCCP socket at a same time. The code below | should do the same without a lock (ccids is a static array, | so ccids[N] is always at the same place). | | static int ccid_request_module(u8 id) | { | if (!in_atomic()) { | rmb(); | if (ccids[id] == NULL) | return request_module("net-dccp-ccid-%d", id); | } | return 0; | } | Sorry Michael, but this is really just a "random thought". What you are in effect saying is that reader/writer locks can be replaced with just a read memory barrier. Please have a more detailed look at net/dccp/ccid.c. I also checked how other subsystems handle comparable situations of module loading: the implementation details differ, but the principle is the same: there are mutexes, semaphores, and spinlocks in use to protect those shared structures that are related to the loaded module. Hence your suggestion does not improve the code. I maintain that it is correct. And it has proven to work in the test tree for more than one year, including tests with up to 100 parallel (iperf) connections. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
From: Gerrit Renker <gerrit@erg.abdn.ac.uk> Date: Tue, 16 Dec 2008 06:29:54 +0100 > | Hence I think we have a chance by going completely lockless here, by > | loading all configured CCIDs at runtime. In this manner the per-connection > | check "are all advertised CCIDs are loaded?" falls under the table, we > | do not need to worry about concurrent access, and loading DCCP implies that > | all needed CCIDs are there. > Unfortunately this won't work since the CCIDs depend on dccp.ko being > fully loaded, so requiring that CCID module are loaded during the > loading process of dccp.ko creates a cyclic dependency. I don't like this stuff at all. Every new connection you're going to loop over the CCID table and grab that CCID read lock N times. The first time it will do something meaningful, and then %99.999999999999 of subsequent calls will do nothing. What kind of overhead is deserved by that access pattern? And, if the first thing the first connection is going to do is load all the modules, there is ZERO reason to make them modular. It's just useless seperation and it adds all of this rediculious synchronization. If it's modular "for the sake of development" I'm sure you can simply reload the dccp.ko module when you make some CCID algorithm change. I'm tossing this patch set until we get something better in this area. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Em Tue, Dec 16, 2008 at 01:40:45AM -0800, David Miller escreveu: > From: Gerrit Renker <gerrit@erg.abdn.ac.uk> > Date: Tue, 16 Dec 2008 06:29:54 +0100 > > > | Hence I think we have a chance by going completely lockless here, by > > | loading all configured CCIDs at runtime. In this manner the per-connection > > | check "are all advertised CCIDs are loaded?" falls under the table, we > > | do not need to worry about concurrent access, and loading DCCP implies that > > | all needed CCIDs are there. > > Unfortunately this won't work since the CCIDs depend on dccp.ko being > > fully loaded, so requiring that CCID module are loaded during the > > loading process of dccp.ko creates a cyclic dependency. > > I don't like this stuff at all. > > Every new connection you're going to loop over the CCID > table and grab that CCID read lock N times. > > The first time it will do something meaningful, and then > %99.999999999999 of subsequent calls will do nothing. > > What kind of overhead is deserved by that access pattern? > > And, if the first thing the first connection is going to do > is load all the modules, there is ZERO reason to make them > modular. It's just useless seperation and it adds all of > this rediculious synchronization. > > If it's modular "for the sake of development" I'm sure you > can simply reload the dccp.ko module when you make some > CCID algorithm change. > > I'm tossing this patch set until we get something better > in this area. I guess that looking at tcp_set_congestion_control() could be a good start. 8-) - Arnaldo -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2008/12/16 Gerrit Renker <gerrit@erg.abdn.ac.uk>: > | Since the lock is dropped after checking ccids[id] then there's > | a window where multiple request_module()s can be called if multiple > | applications create a DCCP socket at a same time. The code below > | should do the same without a lock (ccids is a static array, > | so ccids[N] is always at the same place). > | > | static int ccid_request_module(u8 id) > | { > | if (!in_atomic()) { > | rmb(); > | if (ccids[id] == NULL) > | return request_module("net-dccp-ccid-%d", id); > | } > | return 0; > | } > | > Sorry Michael, but this is really just a "random thought". What you are > in effect saying is that reader/writer locks can be replaced with just a > read memory barrier. > Please have a more detailed look at net/dccp/ccid.c. I also checked how > other subsystems handle comparable situations of module loading: the > implementation details differ, but the principle is the same: there are > mutexes, semaphores, and spinlocks in use to protect those shared > structures that are related to the loaded module. > > Hence your suggestion does not improve the code. I maintain that it is > correct. And it has proven to work in the test tree for more than one > year, including tests with up to 100 parallel (iperf) connections. The read-lock is just a memory barrier here (not a read memory barrier as I wrote before) and it's not needed here. If I read the code correctly you are testing a single pointer to be NULL and don't really care about ordering wrt module initialization. You can actually annotate ccids[] as read_mostly (it's changed only on module load/unload) and protect it with RCU instead of the home-grown rwlock you are using. Best Regards, Michał Mirosław N§˛ćěr¸yúčŘb˛XŹśÇ§vŘ^)Ţş{.nÇ+ˇ§z×^ž)í ćčw*jgŹą¨śÝ˘j/ęäzšŢŕ2ިčÚ&˘)ߥŤaśÚţřŽGŤéhŽćj:+v¨wčŮĽ
From: Arnaldo Carvalho de Melo <acme@redhat.com> Date: Tue, 16 Dec 2008 09:19:08 -0200 > I guess that looking at tcp_set_congestion_control() could be a good > start. 8-) Certainly. But it seems to me we are dealing with a different situation here in DCCP. On the TCP side we have: 1) A default congestion control module, already loaded and which has been selected by the admin via a sysctl setting. 2) A socket option facility to select a non-default congestion control algorithm to use. These are both outside of the fast path. Whereas the DCCP case is right in the connection creation fast path and unconditionally executes, because it is trying to figure out what CCID algorithms it can advertise. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Em Tue, Dec 16, 2008 at 01:32:00PM -0800, David Miller escreveu: > From: Arnaldo Carvalho de Melo <acme@redhat.com> > Date: Tue, 16 Dec 2008 09:19:08 -0200 > > > I guess that looking at tcp_set_congestion_control() could be a good > > start. 8-) > > Certainly. > > But it seems to me we are dealing with a different situation here > in DCCP. > > On the TCP side we have: > > 1) A default congestion control module, already loaded and > which has been selected by the admin via a sysctl setting. Which we should have in DCCP too, its even in the RFC that CCID2 should be always available, making a good choice for a default one. > 2) A socket option facility to select a non-default congestion > control algorithm to use. This is the same for DCCP, i.e. the app can tell the ones it wants via setsockopt, and if the default is all they want, no need to ask for something to be loaded, i.e. ccid2 (or the one that we think should be the default, like reno -> BIC -> CUBIC) should be always available. > These are both outside of the fast path. > > Whereas the DCCP case is right in the connection creation fast path > and unconditionally executes, because it is trying to figure out what > CCID algorithms it can advertise. Well, there must be some way to locklessly advertise what was previously loaded (and thus can't ever be unloaded). - Arnaldo -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
From: Arnaldo Carvalho de Melo <acme@redhat.com> Date: Tue, 16 Dec 2008 20:25:59 -0200 > Em Tue, Dec 16, 2008 at 01:32:00PM -0800, David Miller escreveu: > > From: Arnaldo Carvalho de Melo <acme@redhat.com> > > Date: Tue, 16 Dec 2008 09:19:08 -0200 > > > > Whereas the DCCP case is right in the connection creation fast path > > and unconditionally executes, because it is trying to figure out what > > CCID algorithms it can advertise. > > Well, there must be some way to locklessly advertise what was previously > loaded (and thus can't ever be unloaded). This gets us back to my original objection. Can these things be unloaded? If not, and they get unconditionally all loaded up on the first DCCP connection, why make them seperate modules at all? If they can get unloaded, then you need synchronization. I would recommend that everything gets built into dccp.ko and thus the table is fixed and never changes and thus no locking nor any of this funny mod loading is needed at all. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Em Tue, Dec 16, 2008 at 03:11:51PM -0800, David Miller escreveu: > From: Arnaldo Carvalho de Melo <acme@redhat.com> > Date: Tue, 16 Dec 2008 20:25:59 -0200 > > > Em Tue, Dec 16, 2008 at 01:32:00PM -0800, David Miller escreveu: > > > From: Arnaldo Carvalho de Melo <acme@redhat.com> > > > Date: Tue, 16 Dec 2008 09:19:08 -0200 > > > > > > Whereas the DCCP case is right in the connection creation fast path > > > and unconditionally executes, because it is trying to figure out what > > > CCID algorithms it can advertise. > > > > Well, there must be some way to locklessly advertise what was previously > > loaded (and thus can't ever be unloaded). > > This gets us back to my original objection. > > Can these things be unloaded? If not, and they get unconditionally > all loaded up on the first DCCP connection, why make them seperate > modules at all? > > If they can get unloaded, then you need synchronization. > > I would recommend that everything gets built into dccp.ko > and thus the table is fixed and never changes and thus no > locking nor any of this funny mod loading is needed at all. Perhaps we can have something like we have with the tcp congestion modules: the modules that are non experimental, because they are already in an RFC and/or the implementation was deemed stable, should be linked with dccp.ko and would by default be advertised, i.e. they are the system wide available CCIDs as configured at kernel build time. But then, if the user, after he creates the socket, on the slow path, does a setsockopt asking for a newer CCID (CCID4 is in the works, for instance) to be advertised for this specific connection or if it asks for some in the static set of CCIDs _not_ to be advertised, then the feature negotiation code will advertise the selected set. Only applications wanting newer stuff will incur the cost of the synch at connection time while the majority will not incur such costs by using the penguin peed static CCIDs. I.e. a VOIP app would say that it is not interested in CCID2, wanting only CCID3 or CCID4. IOW we're back to my suggestion on looking at tcp_set_congestion_control(). :-) - Arnaldo -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Em Wed, Dec 17, 2008 at 04:20:38PM -0200, Arnaldo Carvalho de Melo escreveu: > > IOW we're back to my suggestion on looking at > > tcp_set_congestion_control(). :-) > > I tried to test this using ttcp over loopback but the tree seems broken > somehow, with or without this patch I'm getting: > > Could not activate 0 at /home/acme/git/net-next-2.6/net/dccp/feat.c:1176 > > I tried doing a quick chase on this one but failed miserably, Gerrit, > any ideas? Well, without the patch the problem was that dccp_ccid2 was not being autoloaded, as soon as I manually loaded it, ttcp worked. Now to see why... - Arnaldo -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Quoting Arnaldo: | Em Wed, Dec 17, 2008 at 04:20:38PM -0200, Arnaldo Carvalho de Melo escreveu: | > > IOW we're back to my suggestion on looking at | > > tcp_set_congestion_control(). :-) | > | > I tried to test this using ttcp over loopback but the tree seems broken | > somehow, with or without this patch I'm getting: | > | > Could not activate 0 at /home/acme/git/net-next-2.6/net/dccp/feat.c:1176 | > | > I tried doing a quick chase on this one but failed miserably, Gerrit, | > any ideas? | | Well, without the patch the problem was that dccp_ccid2 was not being | autoloaded, as soon as I manually loaded it, ttcp worked. Now to see | why... I have acked your patch but haven't had time to compile and test it. Will do this in due course and integrate it into the test tree. With regard to the error message, this says that the feature with index 0 could not be activated. According to table 6.4 in RFC 4340 this is a bug because it tries to activate a reserved feature. Will do in-depth testing later on today. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[Sorry for being late in replying, this is the answer from yesterday] I think that if we look too much into the direction of TCP we might miss some of the notable differences. In TCP the congestion module stays the same for the lifetime of a connection and there is no real negotiation. If the TCP congestion control module is a sender-side only modification then that even works transparently for any unmodified standard TCP client. In DCCP the negotiation is more like SIP where both endpoints compare their capabilities and make a hanshake to vote for a common value. (Although it is not supported by the current implementation, it is in theory possible to re-negotiate a CCID in the middle of a connection. This could for example make sense when * a satellite connection suddenly changes, e.g. reduced bandwidth due to rain fading; * a mobile IP device makes a handoff from Ethernet to WiFi.) Before a connection, a user can specify/select candidates: * he can advertise simply just "everything that is there"; * the other extreme is to use just a single CCID (where connection might fail due to lack of alternatives). Only the CCIDs requested currently by the users on a system need to be loaded, but it would make life much simpler if the configured modules were already loaded. With regard to "experimental versus standardised" CCIDs, it takes actually quite long before a RFC becomes sufficiently standardised. For the experimental CCIDs I think that David's suggestion is preferable: * for the sake of development we can create a special way of integrating whatever kind of new CCID we are testing; * instead of complicating the mainline mechanism by implementing answers for all (including pathological) possible cases. Thinking ahead, if in a few years there is suddenly an abundant choice of practically useful/useable CCIDs then it may make sense to divide CCIDs into separate modules again, after conquering their implementation. From both of your answers I see consensus that at least the standardised CCIDs should be integrated with dccp.ko. And there is already Arnaldo's patch to realise this, which is great. Will integrate Arnaldo's patch with the test tree and the current patch set. Will be back once this is accomplished. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
| > Hence your suggestion does not improve the code. I maintain that it is | > correct. And it has proven to work in the test tree for more than one | > year, including tests with up to 100 parallel (iperf) connections. | | The read-lock is just a memory barrier here (not a read memory barrier | as I wrote | before) and it's not needed here. If I read the code correctly you are | testing a single | pointer to be NULL and don't really care about ordering wrt module | initialization. | | You can actually annotate ccids[] as read_mostly (it's changed only on module | load/unload) and protect it with RCU instead of the home-grown rwlock | you are using. | Hm the details are not so important here. What is important is that your observation and questioning whether the code makes sense has lead to finding a deeper problem. Without your posting that may not have happened. So many thanks indeed, if you have any more observations, please keep them coming. -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Em Thu, Dec 18, 2008 at 06:41:10AM +0100, Gerrit Renker escreveu: > Quoting Arnaldo: > | Em Wed, Dec 17, 2008 at 04:20:38PM -0200, Arnaldo Carvalho de Melo escreveu: > | > > IOW we're back to my suggestion on looking at > | > > tcp_set_congestion_control(). :-) > | > > | > I tried to test this using ttcp over loopback but the tree seems broken > | > somehow, with or without this patch I'm getting: > | > > | > Could not activate 0 at /home/acme/git/net-next-2.6/net/dccp/feat.c:1176 > | > > | > I tried doing a quick chase on this one but failed miserably, Gerrit, > | > any ideas? > | > | Well, without the patch the problem was that dccp_ccid2 was not being > | autoloaded, as soon as I manually loaded it, ttcp worked. Now to see > | why... > I have acked your patch but haven't had time to compile and test it. > Will do this in due course and integrate it into the test tree. > > With regard to the error message, this says that the feature with index 0 > could not be activated. According to table 6.4 in RFC 4340 this is a bug I figured that out later, after some more tweaking and systemtapping it ended up with index 5 failing and that was the ackvec code that was not being included, just the stubs that always return NULL on the allocation routine, after that was fixed by removing CONFIG_IP_DCCP_ACKVEC, i.e. always including that code since CCID2 now is always included, all worked with a simple, over loopback, ttcp test. > because it tries to activate a reserved feature. > Will do in-depth testing later on today. Thanks! - Arnaldo -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Em Thu, Dec 18, 2008 at 06:46:31AM +0100, Gerrit Renker escreveu: > [Sorry for being late in replying, this is the answer from yesterday] > > I think that if we look too much into the direction of TCP we > might miss some of the notable differences. The look at TCP code was more in the sense that TCP doesn't takes the lock always, having the default congestion control module builtin and always "advertised"/used, but allows changing it in a slow path function. With the new patch this is what we have now in DCCP too. > In TCP the congestion module stays the same for the lifetime of a > connection and there is no real negotiation. If the TCP congestion > control module is a sender-side only modification then that even > works transparently for any unmodified standard TCP client. > > In DCCP the negotiation is more like SIP where both endpoints compare > their capabilities and make a hanshake to vote for a common value. > > (Although it is not supported by the current implementation, it > is in theory possible to re-negotiate a CCID in the middle of > a connection. This could for example make sense when > * a satellite connection suddenly changes, e.g. reduced > bandwidth due to rain fading; > * a mobile IP device makes a handoff from Ethernet to WiFi.) And that, at least for CCID2 and CCID3 now can be done wihtout having to first check if they are loaded, taking locks, etc, that was the crux of this discussion. > Before a connection, a user can specify/select candidates: > > * he can advertise simply just "everything that is there"; > * the other extreme is to use just a single CCID (where > connection might fail due to lack of alternatives). > > Only the CCIDs requested currently by the users on a system need > to be loaded, but it would make life much simpler if the configured > modules were already loaded. > > With regard to "experimental versus standardised" CCIDs, it takes > actually quite long before a RFC becomes sufficiently standardised. > > For the experimental CCIDs I think that David's suggestion is preferable: > > * for the sake of development we can create a special way of integrating > whatever kind of new CCID we are testing; > * instead of complicating the mainline mechanism by implementing answers > for all (including pathological) possible cases. I believe that the RFCv2 patch I submitted accomplishes these two goals with a minimal patch to the current code. And if we look higher up in the stack, say at sock_create we can see that we have this module loading verification all over again done at socket creation time for all families, but there it uses RCU, etc, but it used to be something familiar replaced by Stephen Hemminger in this cset: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=55737fda0bc73cb20f702301d8b52938a5a43630 Looks familiar? Yes, thats is where the ccid locking scheme came from. > Thinking ahead, if in a few years there is suddenly an abundant choice of > practically useful/useable CCIDs then it may make sense to divide CCIDs > into separate modules again, after conquering their implementation. > > > >From both of your answers I see consensus that at least the standardised > CCIDs should be integrated with dccp.ko. And there is already Arnaldo's > patch to realise this, which is great. > > Will integrate Arnaldo's patch with the test tree and the current patch > set. Will be back once this is accomplished. Thanks a lot, - Arnaldo -- To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
--- a/net/dccp/ccid.h +++ b/net/dccp/ccid.h @@ -108,6 +108,7 @@ extern int ccid_get_builtin_ccids(u8 **ccid_array, u8 *array_len); extern int ccid_getsockopt_builtin_ccids(struct sock *sk, int len, char __user *, int __user *); +extern int ccid_request_modules(u8 const *ccid_array, u8 array_len); extern struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp); --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -196,22 +196,41 @@ int ccid_unregister(struct ccid_operations *ccid_ops) EXPORT_SYMBOL_GPL(ccid_unregister); +/** + * ccid_request_module - Pre-load CCID module for later use + * This should be called only from process context (e.g. during connection + * setup) and is necessary for later calls to ccid_new (typically in software + * interrupt), so that it has the modules available when they are needed. + */ +static int ccid_request_module(u8 id) +{ + if (!in_atomic()) { + ccids_read_lock(); + if (ccids[id] == NULL) { + ccids_read_unlock(); + return request_module("net-dccp-ccid-%d", id); + } + ccids_read_unlock(); + } + return 0; +} + +int ccid_request_modules(u8 const *ccid_array, u8 array_len) +{ +#ifdef CONFIG_MODULES + while (array_len--) + if (ccid_request_module(ccid_array[array_len])) + return -1; +#endif + return 0; +} + struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp) { struct ccid_operations *ccid_ops; struct ccid *ccid = NULL; ccids_read_lock(); -#ifdef CONFIG_MODULES - if (ccids[id] == NULL) { - /* We only try to load if in process context */ - ccids_read_unlock(); - if (gfp & GFP_ATOMIC) - goto out; - request_module("net-dccp-ccid-%d", id); - ccids_read_lock(); - } -#endif ccid_ops = ccids[id]; if (ccid_ops == NULL) goto out_unlock; --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -1163,6 +1163,11 @@ int dccp_feat_init(struct sock *sk) ccid_get_builtin_ccids(&rx.val, &rx.len)) return -ENOBUFS; + /* Pre-load all CCID modules that are going to be advertised */ + rc = -EUNATCH; + if (ccid_request_modules(tx.val, tx.len)) + goto free_ccid_lists; + if (!dccp_feat_prefer(sysctl_dccp_feat_tx_ccid, tx.val, tx.len) || !dccp_feat_prefer(sysctl_dccp_feat_rx_ccid, rx.val, rx.len)) goto free_ccid_lists;