Message ID | 20180712134107.24967-1-colin.king@canonical.com |
---|---|
State | New |
Headers | show |
Series | [SRU,BIONIC] UBUNTU: SAUCE: (noup) zfs to 0.7.5-1ubuntu16.3 | expand |
On 12.07.2018 15:41, Colin King wrote: > From: Colin Ian King <colin.king@canonical.com> > > BugLink: http://bugs.launchpad.net/bugs/1781364 > > This is a sync of zfs 0.7.5-1ubuntu16.3 that fixes a mount/umount deadlock > for upstream ZFS commit ac09630d8b0b ("Fix zpl_mount() deadlock") > > ZFS commit 93b43af inadvertently introduced the following scenario which > can result in a deadlock. This issue was most easily reproduced by > LXD containers using a ZFS storage backend but should be reproducible > under any workload which is frequently mounting and unmounting. > > -- THREAD A -- > spa_sync() > spa_sync_upgrades() > rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); <- Waiting on B > > -- THREAD B -- > mount_fs() > zpl_mount() > zpl_mount_impl() > dmu_objset_hold() > dmu_objset_hold_flags() > dsl_pool_hold() > dsl_pool_config_enter() > rrw_enter(&dp->dp_config_rwlock, RW_READER, tag); > sget() > sget_userns() > grab_super() > down_write(&s->s_umount); <- Waiting on C > > -- THREAD C -- > cleanup_mnt() > deactivate_super() > down_write(&s->s_umount); > deactivate_locked_super() > zpl_kill_sb() > kill_anon_super() > generic_shutdown_super() > sync_filesystem() > zpl_sync_fs() > zfs_sync() > zil_commit() > txg_wait_synced() <- Waiting on A > > Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> > (backport from ZFS upstream commit ac09630d8b0bf6c92084a30fdaefd03fd0adbdc1) > Signed-off-by: Colin Ian King <colin.king@canonical.com> Acked-by: Stefan Bader <stefan.bader@canonical.com> > --- > zfs/META | 2 +- > zfs/include/sys/zfs_vfsops.h | 1 + > zfs/module/zfs/zpl_super.c | 11 ++++++++++- > 3 files changed, 12 insertions(+), 2 deletions(-) > > diff --git a/zfs/META b/zfs/META > index 2110eef1b19d..c0a1e936665d 100644 > --- a/zfs/META > +++ b/zfs/META > @@ -2,7 +2,7 @@ Meta: 1 > Name: zfs > Branch: 1.0 > Version: 0.7.5 > -Release: 1ubuntu15 > +Release: 1ubuntu16.3 > Release-Tags: relext > License: CDDL > Author: OpenZFS on Linux > diff --git a/zfs/include/sys/zfs_vfsops.h b/zfs/include/sys/zfs_vfsops.h > index 2326da422183..927153b25af9 100644 > --- a/zfs/include/sys/zfs_vfsops.h > +++ b/zfs/include/sys/zfs_vfsops.h > @@ -32,6 +32,7 @@ > #include <sys/zil.h> > #include <sys/sa.h> > #include <sys/rrwlock.h> > +#include <sys/dsl_dataset.h> > #include <sys/zfs_ioctl.h> > > #ifdef __cplusplus > diff --git a/zfs/module/zfs/zpl_super.c b/zfs/module/zfs/zpl_super.c > index 848bb3c10627..6292e2c2e2df 100644 > --- a/zfs/module/zfs/zpl_super.c > +++ b/zfs/module/zfs/zpl_super.c > @@ -271,8 +271,17 @@ zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm) > if (err) > return (ERR_PTR(-err)); > > + /* > + * The dsl pool lock must be released prior to calling sget(). > + * It is possible sget() may block on the lock in grab_super() > + * while deactivate_super() holds that same lock and waits for > + * a txg sync. If the dsl_pool lock is held over over sget() > + * this can prevent the pool sync and cause a deadlock. > + */ > + dsl_pool_rele(dmu_objset_pool(os), FTAG); > s = zpl_sget(fs_type, zpl_test_super, set_anon_super, flags, os); > - dmu_objset_rele(os, FTAG); > + dsl_dataset_rele(dmu_objset_ds(os), FTAG); > + > if (IS_ERR(s)) > return (ERR_CAST(s)); > >
On 07/12/18 15:41, Colin King wrote: > From: Colin Ian King <colin.king@canonical.com> > > BugLink: http://bugs.launchpad.net/bugs/1781364 > > This is a sync of zfs 0.7.5-1ubuntu16.3 that fixes a mount/umount deadlock > for upstream ZFS commit ac09630d8b0b ("Fix zpl_mount() deadlock") > > ZFS commit 93b43af inadvertently introduced the following scenario which > can result in a deadlock. This issue was most easily reproduced by > LXD containers using a ZFS storage backend but should be reproducible > under any workload which is frequently mounting and unmounting. > > -- THREAD A -- > spa_sync() > spa_sync_upgrades() > rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); <- Waiting on B > > -- THREAD B -- > mount_fs() > zpl_mount() > zpl_mount_impl() > dmu_objset_hold() > dmu_objset_hold_flags() > dsl_pool_hold() > dsl_pool_config_enter() > rrw_enter(&dp->dp_config_rwlock, RW_READER, tag); > sget() > sget_userns() > grab_super() > down_write(&s->s_umount); <- Waiting on C > > -- THREAD C -- > cleanup_mnt() > deactivate_super() > down_write(&s->s_umount); > deactivate_locked_super() > zpl_kill_sb() > kill_anon_super() > generic_shutdown_super() > sync_filesystem() > zpl_sync_fs() > zfs_sync() > zil_commit() > txg_wait_synced() <- Waiting on A > > Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> > (backport from ZFS upstream commit ac09630d8b0bf6c92084a30fdaefd03fd0adbdc1) > Signed-off-by: Colin Ian King <colin.king@canonical.com> Acked-by: Kleber Sacilotto de Souza <kleber.souza@canonical.com> > --- > zfs/META | 2 +- > zfs/include/sys/zfs_vfsops.h | 1 + > zfs/module/zfs/zpl_super.c | 11 ++++++++++- > 3 files changed, 12 insertions(+), 2 deletions(-) > > diff --git a/zfs/META b/zfs/META > index 2110eef1b19d..c0a1e936665d 100644 > --- a/zfs/META > +++ b/zfs/META > @@ -2,7 +2,7 @@ Meta: 1 > Name: zfs > Branch: 1.0 > Version: 0.7.5 > -Release: 1ubuntu15 > +Release: 1ubuntu16.3 > Release-Tags: relext > License: CDDL > Author: OpenZFS on Linux > diff --git a/zfs/include/sys/zfs_vfsops.h b/zfs/include/sys/zfs_vfsops.h > index 2326da422183..927153b25af9 100644 > --- a/zfs/include/sys/zfs_vfsops.h > +++ b/zfs/include/sys/zfs_vfsops.h > @@ -32,6 +32,7 @@ > #include <sys/zil.h> > #include <sys/sa.h> > #include <sys/rrwlock.h> > +#include <sys/dsl_dataset.h> > #include <sys/zfs_ioctl.h> > > #ifdef __cplusplus > diff --git a/zfs/module/zfs/zpl_super.c b/zfs/module/zfs/zpl_super.c > index 848bb3c10627..6292e2c2e2df 100644 > --- a/zfs/module/zfs/zpl_super.c > +++ b/zfs/module/zfs/zpl_super.c > @@ -271,8 +271,17 @@ zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm) > if (err) > return (ERR_PTR(-err)); > > + /* > + * The dsl pool lock must be released prior to calling sget(). > + * It is possible sget() may block on the lock in grab_super() > + * while deactivate_super() holds that same lock and waits for > + * a txg sync. If the dsl_pool lock is held over over sget() > + * this can prevent the pool sync and cause a deadlock. > + */ > + dsl_pool_rele(dmu_objset_pool(os), FTAG); > s = zpl_sget(fs_type, zpl_test_super, set_anon_super, flags, os); > - dmu_objset_rele(os, FTAG); > + dsl_dataset_rele(dmu_objset_ds(os), FTAG); > + > if (IS_ERR(s)) > return (ERR_CAST(s)); > >
On 07/12/18 15:41, Colin King wrote: > From: Colin Ian King <colin.king@canonical.com> > > BugLink: http://bugs.launchpad.net/bugs/1781364 > > This is a sync of zfs 0.7.5-1ubuntu16.3 that fixes a mount/umount deadlock > for upstream ZFS commit ac09630d8b0b ("Fix zpl_mount() deadlock") > > ZFS commit 93b43af inadvertently introduced the following scenario which > can result in a deadlock. This issue was most easily reproduced by > LXD containers using a ZFS storage backend but should be reproducible > under any workload which is frequently mounting and unmounting. > > -- THREAD A -- > spa_sync() > spa_sync_upgrades() > rrw_enter(&dp->dp_config_rwlock, RW_WRITER, FTAG); <- Waiting on B > > -- THREAD B -- > mount_fs() > zpl_mount() > zpl_mount_impl() > dmu_objset_hold() > dmu_objset_hold_flags() > dsl_pool_hold() > dsl_pool_config_enter() > rrw_enter(&dp->dp_config_rwlock, RW_READER, tag); > sget() > sget_userns() > grab_super() > down_write(&s->s_umount); <- Waiting on C > > -- THREAD C -- > cleanup_mnt() > deactivate_super() > down_write(&s->s_umount); > deactivate_locked_super() > zpl_kill_sb() > kill_anon_super() > generic_shutdown_super() > sync_filesystem() > zpl_sync_fs() > zfs_sync() > zil_commit() > txg_wait_synced() <- Waiting on A > > Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> > (backport from ZFS upstream commit ac09630d8b0bf6c92084a30fdaefd03fd0adbdc1) > Signed-off-by: Colin Ian King <colin.king@canonical.com> > --- > zfs/META | 2 +- > zfs/include/sys/zfs_vfsops.h | 1 + > zfs/module/zfs/zpl_super.c | 11 ++++++++++- > 3 files changed, 12 insertions(+), 2 deletions(-) > > diff --git a/zfs/META b/zfs/META > index 2110eef1b19d..c0a1e936665d 100644 > --- a/zfs/META > +++ b/zfs/META > @@ -2,7 +2,7 @@ Meta: 1 > Name: zfs > Branch: 1.0 > Version: 0.7.5 > -Release: 1ubuntu15 > +Release: 1ubuntu16.3 > Release-Tags: relext > License: CDDL > Author: OpenZFS on Linux > diff --git a/zfs/include/sys/zfs_vfsops.h b/zfs/include/sys/zfs_vfsops.h > index 2326da422183..927153b25af9 100644 > --- a/zfs/include/sys/zfs_vfsops.h > +++ b/zfs/include/sys/zfs_vfsops.h > @@ -32,6 +32,7 @@ > #include <sys/zil.h> > #include <sys/sa.h> > #include <sys/rrwlock.h> > +#include <sys/dsl_dataset.h> > #include <sys/zfs_ioctl.h> > > #ifdef __cplusplus > diff --git a/zfs/module/zfs/zpl_super.c b/zfs/module/zfs/zpl_super.c > index 848bb3c10627..6292e2c2e2df 100644 > --- a/zfs/module/zfs/zpl_super.c > +++ b/zfs/module/zfs/zpl_super.c > @@ -271,8 +271,17 @@ zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm) > if (err) > return (ERR_PTR(-err)); > > + /* > + * The dsl pool lock must be released prior to calling sget(). > + * It is possible sget() may block on the lock in grab_super() > + * while deactivate_super() holds that same lock and waits for > + * a txg sync. If the dsl_pool lock is held over over sget() > + * this can prevent the pool sync and cause a deadlock. > + */ > + dsl_pool_rele(dmu_objset_pool(os), FTAG); > s = zpl_sget(fs_type, zpl_test_super, set_anon_super, flags, os); > - dmu_objset_rele(os, FTAG); > + dsl_dataset_rele(dmu_objset_ds(os), FTAG); > + > if (IS_ERR(s)) > return (ERR_CAST(s)); > > Applied to bionic/master-next branch. Thanks, Kleber
diff --git a/zfs/META b/zfs/META index 2110eef1b19d..c0a1e936665d 100644 --- a/zfs/META +++ b/zfs/META @@ -2,7 +2,7 @@ Meta: 1 Name: zfs Branch: 1.0 Version: 0.7.5 -Release: 1ubuntu15 +Release: 1ubuntu16.3 Release-Tags: relext License: CDDL Author: OpenZFS on Linux diff --git a/zfs/include/sys/zfs_vfsops.h b/zfs/include/sys/zfs_vfsops.h index 2326da422183..927153b25af9 100644 --- a/zfs/include/sys/zfs_vfsops.h +++ b/zfs/include/sys/zfs_vfsops.h @@ -32,6 +32,7 @@ #include <sys/zil.h> #include <sys/sa.h> #include <sys/rrwlock.h> +#include <sys/dsl_dataset.h> #include <sys/zfs_ioctl.h> #ifdef __cplusplus diff --git a/zfs/module/zfs/zpl_super.c b/zfs/module/zfs/zpl_super.c index 848bb3c10627..6292e2c2e2df 100644 --- a/zfs/module/zfs/zpl_super.c +++ b/zfs/module/zfs/zpl_super.c @@ -271,8 +271,17 @@ zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm) if (err) return (ERR_PTR(-err)); + /* + * The dsl pool lock must be released prior to calling sget(). + * It is possible sget() may block on the lock in grab_super() + * while deactivate_super() holds that same lock and waits for + * a txg sync. If the dsl_pool lock is held over over sget() + * this can prevent the pool sync and cause a deadlock. + */ + dsl_pool_rele(dmu_objset_pool(os), FTAG); s = zpl_sget(fs_type, zpl_test_super, set_anon_super, flags, os); - dmu_objset_rele(os, FTAG); + dsl_dataset_rele(dmu_objset_ds(os), FTAG); + if (IS_ERR(s)) return (ERR_CAST(s));