Message ID | 20190321020218.5154-8-tytso@mit.edu |
---|---|
State | Accepted, archived |
Headers | show |
Series | [1/9] e2scrub: check to make sure lvm2 is installed | expand |
On Wed, Mar 20, 2019 at 10:02:17PM -0400, Theodore Ts'o wrote: > From: "Darrick J. Wong" <darrick.wong@oracle.com> > > Paul Menzel reported that the e2scrub_all reaper service that runs at > startup takes a long time to run, and Ted Ts'o pointed out that we could > do a lot less work by using lvs as the outer loop in the ext4 filesystem > probe function so that we only have to lsblk the lvm devices containing > ext4 filesystems. > > Therefore, refactor the loops to put lvs first, which should boost speed > a bit. > > [ Made some of the further optimizations suggested by Lukas Czerner. -- TYT ] Those chnages LGTM, Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> --D > > Reported-by: Paul Menzel <pmenzel@molgen.mpg.de> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > Signed-off-by: Theodore Ts'o <tytso@mit.edu> > --- > scrub/e2scrub_all.in | 34 +++++++++++++++++++++++----------- > 1 file changed, 23 insertions(+), 11 deletions(-) > > diff --git a/scrub/e2scrub_all.in b/scrub/e2scrub_all.in > index 4cb90a0de..cad232987 100644 > --- a/scrub/e2scrub_all.in > +++ b/scrub/e2scrub_all.in > @@ -22,6 +22,7 @@ PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin > > scrub_all=0 > snap_size_mb=256 > +reap=0 > conffile="@root_sysconfdir@/e2scrub.conf" > > test -f "${conffile}" && . "${conffile}" > @@ -65,7 +66,7 @@ exitcode() { > while getopts "nrAV" opt; do > case "${opt}" in > "n") DBG="echo Would execute: " ;; > - "r") scrub_args="${scrub_args} -r";; > + "r") scrub_args="${scrub_args} -r"; reap=1;; > "A") scrub_all=1;; > "V") print_version; exitcode 0;; > *) print_help; exitcode 2;; > @@ -88,9 +89,12 @@ if ! type lvcreate >& /dev/null ; then > fi > > # Find scrub targets, make sure we only do this once. > -ls_scrub_targets() { > - lsblk -o NAME,FSTYPE,MOUNTPOINT -p -P -n | while read vars; do > +ls_scan_targets() { > + lvs --name-prefixes -o vg_name,lv_path \ > + -S lv_active=active,lv_role=public --noheadings | \ > + while read vars; do > eval "${vars}" > + eval "$(lsblk -o FSTYPE,MOUNTPOINT -p -P -n "${LVM2_LV_PATH}")" > > # Skip non-ext[234] > case "${FSTYPE}" in > @@ -103,12 +107,6 @@ ls_scrub_targets() { > continue; > fi > > - # Skip non-lvm devices and lvm snapshots > - lvm_vars="$(lvs --nameprefixes -o vg_name,lv_name,lv_role --noheadings "${NAME}" 2> /dev/null)" > - test $? -ne 0 && continue > - eval "${lvm_vars}" > - echo "${LVM2_LV_ROLE}" | grep -q "snapshot" && continue > - > free_space="$(vgs -o vg_free --units m --noheadings --no-suffix "${LVM2_VG_NAME}" 2> /dev/null | sed -e 's/\..*//')" > test "${snap_size_mb}" -gt "${free_space}" && continue > > @@ -120,6 +118,20 @@ ls_scrub_targets() { > done | sort | uniq > } > > +# Find leftover scrub snapshots > +ls_reap_targets() { > + lvs -o lv_path -S lv_role=snapshot -S lv_name=~\(e2scrub$\) --noheadings > +} > + > +# Figure out what we're targeting > +ls_targets() { > + if [ "${reap}" -eq 1 ]; then > + ls_reap_targets > + else > + ls_scan_targets > + fi > +} > + > # systemd doesn't know to do path escaping on the instance variable we pass > # to the e2scrub service, which breaks things if there is a dash in the path > # name. Therefore, do the path escaping ourselves if needed. > @@ -140,10 +152,10 @@ escape_path_for_systemd() { > > # Scrub any mounted fs on lvm by creating a snapshot and fscking that. > stdin="$(realpath /dev/stdin)" > -ls_scrub_targets | while read tgt; do > +ls_targets | while read tgt; do > # If we're not reaping and systemd is present, try invoking the > # systemd service. > - if [ -z "${scrub_args}" ] && type systemctl > /dev/null 2>&1; then > + if [ "${reap}" -ne 1 ] && type systemctl > /dev/null 2>&1; then > tgt_esc="$(escape_path_for_systemd "${tgt}")" > ${DBG} systemctl start "e2scrub@${tgt_esc}" 2> /dev/null < "${stdin}" > res=$? > -- > 2.19.1 >
On Wed, Mar 20, 2019 at 10:02:17PM -0400, Theodore Ts'o wrote: > From: "Darrick J. Wong" <darrick.wong@oracle.com> > > Paul Menzel reported that the e2scrub_all reaper service that runs at > startup takes a long time to run, and Ted Ts'o pointed out that we could > do a lot less work by using lvs as the outer loop in the ext4 filesystem > probe function so that we only have to lsblk the lvm devices containing > ext4 filesystems. > > Therefore, refactor the loops to put lvs first, which should boost speed > a bit. > > [ Made some of the further optimizations suggested by Lukas Czerner. -- TYT ] > > Reported-by: Paul Menzel <pmenzel@molgen.mpg.de> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > Signed-off-by: Theodore Ts'o <tytso@mit.edu> > --- > scrub/e2scrub_all.in | 34 +++++++++++++++++++++++----------- > 1 file changed, 23 insertions(+), 11 deletions(-) > > diff --git a/scrub/e2scrub_all.in b/scrub/e2scrub_all.in > index 4cb90a0de..cad232987 100644 > --- a/scrub/e2scrub_all.in > +++ b/scrub/e2scrub_all.in > @@ -22,6 +22,7 @@ PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin > > scrub_all=0 > snap_size_mb=256 > +reap=0 > conffile="@root_sysconfdir@/e2scrub.conf" > > test -f "${conffile}" && . "${conffile}" > @@ -65,7 +66,7 @@ exitcode() { > while getopts "nrAV" opt; do > case "${opt}" in > "n") DBG="echo Would execute: " ;; > - "r") scrub_args="${scrub_args} -r";; > + "r") scrub_args="${scrub_args} -r"; reap=1;; > "A") scrub_all=1;; > "V") print_version; exitcode 0;; > *) print_help; exitcode 2;; > @@ -88,9 +89,12 @@ if ! type lvcreate >& /dev/null ; then > fi > > # Find scrub targets, make sure we only do this once. > -ls_scrub_targets() { > - lsblk -o NAME,FSTYPE,MOUNTPOINT -p -P -n | while read vars; do > +ls_scan_targets() { > + lvs --name-prefixes -o vg_name,lv_path \ > + -S lv_active=active,lv_role=public --noheadings | \ > + while read vars; do > eval "${vars}" > + eval "$(lsblk -o FSTYPE,MOUNTPOINT -p -P -n "${LVM2_LV_PATH}")" > > # Skip non-ext[234] > case "${FSTYPE}" in > @@ -103,12 +107,6 @@ ls_scrub_targets() { > continue; > fi > > - # Skip non-lvm devices and lvm snapshots > - lvm_vars="$(lvs --nameprefixes -o vg_name,lv_name,lv_role --noheadings "${NAME}" 2> /dev/null)" > - test $? -ne 0 && continue > - eval "${lvm_vars}" > - echo "${LVM2_LV_ROLE}" | grep -q "snapshot" && continue This is wrong. lvs --name-prefixes -o vg_name,lv_path \ -S lv_active=active,lv_role=public --noheadings | \ lv_role=public does not exclude snapshots and so it will fail later in e2scrub when you try to create a snapshot of a thicksnapshot which is not supported. Snapshot of a thinspanshot is allowed though, so we might want to include those. Not sure if it's wise to do it by default, but regardless it's probably something for a separate change. Also I am not sure what's the rush, but it seems you've ignored my other suggestions. -Lukas > - > free_space="$(vgs -o vg_free --units m --noheadings --no-suffix "${LVM2_VG_NAME}" 2> /dev/null | sed -e 's/\..*//')" > test "${snap_size_mb}" -gt "${free_space}" && continue > > @@ -120,6 +118,20 @@ ls_scrub_targets() { > done | sort | uniq > } > > +# Find leftover scrub snapshots > +ls_reap_targets() { > + lvs -o lv_path -S lv_role=snapshot -S lv_name=~\(e2scrub$\) --noheadings > +} > + > +# Figure out what we're targeting > +ls_targets() { > + if [ "${reap}" -eq 1 ]; then > + ls_reap_targets > + else > + ls_scan_targets > + fi > +} > + > # systemd doesn't know to do path escaping on the instance variable we pass > # to the e2scrub service, which breaks things if there is a dash in the path > # name. Therefore, do the path escaping ourselves if needed. > @@ -140,10 +152,10 @@ escape_path_for_systemd() { > > # Scrub any mounted fs on lvm by creating a snapshot and fscking that. > stdin="$(realpath /dev/stdin)" > -ls_scrub_targets | while read tgt; do > +ls_targets | while read tgt; do > # If we're not reaping and systemd is present, try invoking the > # systemd service. > - if [ -z "${scrub_args}" ] && type systemctl > /dev/null 2>&1; then > + if [ "${reap}" -ne 1 ] && type systemctl > /dev/null 2>&1; then > tgt_esc="$(escape_path_for_systemd "${tgt}")" > ${DBG} systemctl start "e2scrub@${tgt_esc}" 2> /dev/null < "${stdin}" > res=$? > -- > 2.19.1 >
On Thu, Mar 21, 2019 at 11:27:42AM +0100, Lukas Czerner wrote: > > lv_role=public does not exclude snapshots and so it will fail later in > e2scrub when you try to create a snapshot of a thicksnapshot which is > not supported. Ah, good point. Thanks for pointing that out. > Also I am not sure what's the rush, but it seems you've ignored my other > suggestions. I did consider them all, but there were reasons why I didn't use them. One of them wasn't practical because I needed LVM2_VG_NAME; when you made that suggestion, I hadn't published the patch needed to fix Debian Bug #924301. Of course, if we use your suggestion of using "-S vg_free > ${snap_size}" it will obviate that need; so thanks for that suggestion. The reason why I didn't take one of your other suggestions is that we need to check whether or not the file system is mounted, and so we needed the mountpoint in lsblk, and once you ask for the mountpoint, we can no longer use awk to select a field, since an unmounted file system shows up as a an empty column. % sudo lsblk -o MOUNTPOINT,FSTYPE,NAME -l LVM2_member nvme0n1p3_crypt ext4 lambda-uroot [SWAP] swap lambda-swap_1 / ext4 lambda-root ext4 lambda-old--files ext4 lambda-library ext4 lambda-test--4k ext4 lambda-scratch ext4 lambda-test--1k lambda-scratch2 lambda-scratch3 ext4 lambda-results lambda-thinpool_tmeta lambda-thinpool_tdata I also really didn't like using grep to select the file system type ext[234], since if it would falsely select a LV name that contained "ext4", e.g.: /home/dave xfs rhel-ext4--sucks :-) We could have used awk to select the field, but that still doesn't deal with the mountpoint column being empty when it is unmounted. I did briefly consider using lsblk -J, but I didn't want to add a dependency on the jq[1] package (and I didn't even know if RHEL/Fedora packages jq). [1] https://packages.debian.org/stretch/jq Cheers, - Ted
On Thu, Mar 21, 2019 at 10:31:41AM -0400, Theodore Ts'o wrote: > On Thu, Mar 21, 2019 at 11:27:42AM +0100, Lukas Czerner wrote: > > > > lv_role=public does not exclude snapshots and so it will fail later in > > e2scrub when you try to create a snapshot of a thicksnapshot which is > > not supported. > > Ah, good point. Thanks for pointing that out. > > > Also I am not sure what's the rush, but it seems you've ignored my other > > suggestions. > > I did consider them all, but there were reasons why I didn't use them. > One of them wasn't practical because I needed LVM2_VG_NAME; when you > made that suggestion, I hadn't published the patch needed to fix > Debian Bug #924301. Of course, if we use your suggestion of using "-S > vg_free > ${snap_size}" it will obviate that need; so thanks for that > suggestion. > > The reason why I didn't take one of your other suggestions is that we > need to check whether or not the file system is mounted, and so we > needed the mountpoint in lsblk, and once you ask for the mountpoint, > we can no longer use awk to select a field, since an unmounted file > system shows up as a an empty column. > > % sudo lsblk -o MOUNTPOINT,FSTYPE,NAME -l > LVM2_member nvme0n1p3_crypt > ext4 lambda-uroot > [SWAP] swap lambda-swap_1 > / ext4 lambda-root > ext4 lambda-old--files > ext4 lambda-library > ext4 lambda-test--4k > ext4 lambda-scratch > ext4 lambda-test--1k > lambda-scratch2 > lambda-scratch3 > ext4 lambda-results > lambda-thinpool_tmeta > lambda-thinpool_tdata So the good news is that it's not really a problem. What you really want is the mountpoint if it exists and the device otherwise right ? Then my suggestion will do just that becuase awk will separate the fieds with FS (field separator) which by default is a single space. But space is special in awk becuase the fields will be separated by a runs of space, so if you pick the fileds just right you'll get what you want. So if I run this on my system # lsblk -o MOUNTPOINT,NAME,FSTYPE -p -n -l `lvs -o lv_path -S lv_active=active,lv_role=public,lv_role!=snapshot,vg_free\>256 --noheadings` I'll get /dev/mapper/vg_perpetuum-ext4--suck /dev/mapper/vg_perpetuum-fedora29 /dev/mapper/vg_perpetuum-fedora29--2 /dev/mapper/vg_perpetuum-freebsd /mnt/kernel /dev/mapper/vg_perpetuum-kernel ext4 /home /dev/mapper/vg_perpetuum-lv_home xfs / /dev/mapper/vg_perpetuum-lv_root xfs [SWAP] /dev/mapper/vg_perpetuum-lv_swap swap /var /dev/mapper/vg_perpetuum-lv_var xfs /dev/mapper/vg_perpetuum-lvol001 ext4 /dev/mapper/vg_perpetuum-overlay ext4 /dev/mapper/vg_perpetuum-rhel7 /dev/mapper/vg_perpetuum-rhel8 /dev/mapper/vg_perpetuum-test0 ext3 /dev/mapper/vg_perpetuum-test1 xfs /dev/mapper/vg_perpetuum-test10 ext4 /dev/mapper/vg_perpetuum-test11 ext4 /dev/mapper/vg_perpetuum-test12 /dev/mapper/vg_perpetuum-test2 /dev/mapper/vg_perpetuum-test3 ext4 /dev/mapper/vg_perpetuum-test4 ext4 /dev/mapper/vg_perpetuum-test5 /dev/mapper/vg_perpetuum-test6 LVM2_member /dev/mapper/vg_perpetuum-test7 /dev/mapper/vg_perpetuum-test8 /dev/mapper/vg_perpetuum-test9 ext4 /dev/mapper/vg_perpetuum-testing ext4 /dev/mapper/vg_perpetuum-testing1 xfs /dev/mapper/vg_perpetuum-thinvolume now if I add | awk '{print $1}' I'll get /dev/mapper/vg_perpetuum-ext4--suck /dev/mapper/vg_perpetuum-fedora29 /dev/mapper/vg_perpetuum-fedora29--2 /dev/mapper/vg_perpetuum-freebsd /mnt/kernel /home / [SWAP] /var /dev/mapper/vg_perpetuum-lvol001 /dev/mapper/vg_perpetuum-overlay /dev/mapper/vg_perpetuum-rhel7 /dev/mapper/vg_perpetuum-rhel8 /dev/mapper/vg_perpetuum-test0 /dev/mapper/vg_perpetuum-test1 /dev/mapper/vg_perpetuum-test10 /dev/mapper/vg_perpetuum-test11 /dev/mapper/vg_perpetuum-test12 /dev/mapper/vg_perpetuum-test2 /dev/mapper/vg_perpetuum-test3 /dev/mapper/vg_perpetuum-test4 /dev/mapper/vg_perpetuum-test5 /dev/mapper/vg_perpetuum-test6 /dev/mapper/vg_perpetuum-test7 /dev/mapper/vg_perpetuum-test8 /dev/mapper/vg_perpetuum-test9 /dev/mapper/vg_perpetuum-testing /dev/mapper/vg_perpetuum-testing1 /dev/mapper/vg_perpetuum-thinvolume hence I get mountpoin where the volume is mounted and the device where it is not. That's what we need right ? What I did not consider was [SWAP] but we can get rid of that easily. grep -v '[SWAP]' > > I also really didn't like using grep to select the file system type > ext[234], since if it would falsely select a LV name that contained > "ext4", e.g.: > > /home/dave xfs rhel-ext4--sucks That can be dealt with as well as well just by grepping for ' ext[234]$' So in the end we'll have something like # lsblk -o MOUNTPOINT,NAME,FSTYPE -p -n -l `lvs -o lv_path -S lv_active=active,lv_role=public,lv_role!=snapshot,vg_free\>256 --noheadings` | grep -v '[SWAP]' | grep ' ext[234]$' | awk '{print $1}' and so on my system this gives me /mnt/kernel /dev/mapper/vg_perpetuum-lvol001 /dev/mapper/vg_perpetuum-overlay /dev/mapper/vg_perpetuum-test0 /dev/mapper/vg_perpetuum-test10 /dev/mapper/vg_perpetuum-test11 /dev/mapper/vg_perpetuum-test3 /dev/mapper/vg_perpetuum-test4 /dev/mapper/vg_perpetuum-test9 /dev/mapper/vg_perpetuum-testing And leaving out mounted file systems is again as simple as grep -v '^/dev/' Have I missed something ? Now for the performance. I drop caches, then run each twice the ls_scan_targets function runs real 0m0.575s user 0m0.086s sys 0m0.179s real 0m0.241s user 0m0.088s sys 0m0.169s My one-line suggestion runs real 0m0.275s user 0m0.027s sys 0m0.047s real 0m0.069s user 0m0.015s sys 0m0.052s So the difference is significant, although I would not consider it meaningful since it's really low anyway, but some people complained so I guess someone cares and people do have different systems so... > > :-) > > We could have used awk to select the field, but that still doesn't > deal with the mountpoint column being empty when it is unmounted. I > did briefly consider using lsblk -J, but I didn't want to add a > dependency on the jq[1] package (and I didn't even know if RHEL/Fedora > packages jq). > > [1] https://packages.debian.org/stretch/jq Yes Fedora does have it, but I agree that adding this dependency is not ideal. Thanks! -Lukas > > Cheers, > > - Ted
On Wed, Mar 20, 2019 at 10:02:17PM -0400, Theodore Ts'o wrote: > From: "Darrick J. Wong" <darrick.wong@oracle.com> > > Paul Menzel reported that the e2scrub_all reaper service that runs at > startup takes a long time to run, and Ted Ts'o pointed out that we could > do a lot less work by using lvs as the outer loop in the ext4 filesystem > probe function so that we only have to lsblk the lvm devices containing > ext4 filesystems. > > Therefore, refactor the loops to put lvs first, which should boost speed > a bit. > > [ Made some of the further optimizations suggested by Lukas Czerner. -- TYT ] > > Reported-by: Paul Menzel <pmenzel@molgen.mpg.de> > Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> > Signed-off-by: Theodore Ts'o <tytso@mit.edu> > --- > scrub/e2scrub_all.in | 34 +++++++++++++++++++++++----------- > 1 file changed, 23 insertions(+), 11 deletions(-) > > diff --git a/scrub/e2scrub_all.in b/scrub/e2scrub_all.in > index 4cb90a0de..cad232987 100644 > --- a/scrub/e2scrub_all.in > +++ b/scrub/e2scrub_all.in > @@ -22,6 +22,7 @@ PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin > > scrub_all=0 > snap_size_mb=256 > +reap=0 > conffile="@root_sysconfdir@/e2scrub.conf" > > test -f "${conffile}" && . "${conffile}" > @@ -65,7 +66,7 @@ exitcode() { > while getopts "nrAV" opt; do > case "${opt}" in > "n") DBG="echo Would execute: " ;; > - "r") scrub_args="${scrub_args} -r";; > + "r") scrub_args="${scrub_args} -r"; reap=1;; > "A") scrub_all=1;; > "V") print_version; exitcode 0;; > *) print_help; exitcode 2;; > @@ -88,9 +89,12 @@ if ! type lvcreate >& /dev/null ; then > fi > > # Find scrub targets, make sure we only do this once. > -ls_scrub_targets() { > - lsblk -o NAME,FSTYPE,MOUNTPOINT -p -P -n | while read vars; do > +ls_scan_targets() { > + lvs --name-prefixes -o vg_name,lv_path \ > + -S lv_active=active,lv_role=public --noheadings | \ > + while read vars; do > eval "${vars}" > + eval "$(lsblk -o FSTYPE,MOUNTPOINT -p -P -n "${LVM2_LV_PATH}")" Btw, I know that you're going to change and resend this, but there is another problem. You're missing NAME field and so the check that we have later 93 → → if [ -n "${MOUNTPOINT}" ]; then 94 → → → echo "${MOUNTPOINT}" 95 → → else 96 → → → echo "${NAME}" 97 → → fi will never print the device name. It almost feels like we need some regression testing for this ;) However I agree that it's going to be PITA to create since historically our test suite did not require elevated privledges, nor did it change the system in any way. I could bring some knowledge over from ssm and try to avoid system changes by using loop devices and DM_DEV_DIR, but it will still require a root privledges and will be significantly different from what we do in tests/ today. Any ideas on how you want to do this ? -Lukas > > # Skip non-ext[234] > case "${FSTYPE}" in > @@ -103,12 +107,6 @@ ls_scrub_targets() { > continue; > fi > > - # Skip non-lvm devices and lvm snapshots > - lvm_vars="$(lvs --nameprefixes -o vg_name,lv_name,lv_role --noheadings "${NAME}" 2> /dev/null)" > - test $? -ne 0 && continue > - eval "${lvm_vars}" > - echo "${LVM2_LV_ROLE}" | grep -q "snapshot" && continue > - > free_space="$(vgs -o vg_free --units m --noheadings --no-suffix "${LVM2_VG_NAME}" 2> /dev/null | sed -e 's/\..*//')" > test "${snap_size_mb}" -gt "${free_space}" && continue > > @@ -120,6 +118,20 @@ ls_scrub_targets() { > done | sort | uniq > } > > +# Find leftover scrub snapshots > +ls_reap_targets() { > + lvs -o lv_path -S lv_role=snapshot -S lv_name=~\(e2scrub$\) --noheadings > +} > + > +# Figure out what we're targeting > +ls_targets() { > + if [ "${reap}" -eq 1 ]; then > + ls_reap_targets > + else > + ls_scan_targets > + fi > +} > + > # systemd doesn't know to do path escaping on the instance variable we pass > # to the e2scrub service, which breaks things if there is a dash in the path > # name. Therefore, do the path escaping ourselves if needed. > @@ -140,10 +152,10 @@ escape_path_for_systemd() { > > # Scrub any mounted fs on lvm by creating a snapshot and fscking that. > stdin="$(realpath /dev/stdin)" > -ls_scrub_targets | while read tgt; do > +ls_targets | while read tgt; do > # If we're not reaping and systemd is present, try invoking the > # systemd service. > - if [ -z "${scrub_args}" ] && type systemctl > /dev/null 2>&1; then > + if [ "${reap}" -ne 1 ] && type systemctl > /dev/null 2>&1; then > tgt_esc="$(escape_path_for_systemd "${tgt}")" > ${DBG} systemctl start "e2scrub@${tgt_esc}" 2> /dev/null < "${stdin}" > res=$? > -- > 2.19.1 >
On Thu, Mar 21, 2019 at 11:27:42AM +0100, Lukas Czerner wrote: > > Snapshot of a thinspanshot is allowed though, so we might want to > include those. Not sure if it's wise to do it by default, but regardless > it's probably something for a separate change. Yeah, it's definitely a separate change. One potential design question is that for a thin volume, you can do both a thin or a think snapshot, and in some cases one might succeed while the other will fail. So do we make this choice be a parameter that we set in the config file, or do we try to see if there is sufficient spare freespace for a thick snapshot (and then do that), or a thin snapshot (and then do that) --- and which should use prefer? The other thing I'll note is that in order for us to tell whether something is a thin or thick LV, we're going to to need to ask lvs to return multiple parameters, so the optimization of using: for NAME in $(lvs -o lv_path --noheadings -S...) ; do ... done will no longer work. (Or we end up calling lvs a second time, which is less efficient.) Just curious --- do we know how commonly thin LV's are being used by customers of various distros? I assume enterprise distro users will be the most conservative, but how common is the uptake of thin LV's by Fedora and OpenSuSE users? - Ted
On Thu, Mar 21, 2019 at 04:57:03PM +0100, Lukas Czerner wrote: > > hence I get mountpoin where the volume is mounted and the device where > it is not. That's what we need right ? Well, except by default we need to be able to determine whether or not the volume is mounted, since by default e2scrub_all only runs on mounted file systems (unless -A) is specified. What I'm now doing is this, which I think is the simplest way to do things: ls_scan_targets() { for NAME in $(lvs -o lv_path --noheadings \ -S "lv_active=active,lv_role=public,lv_role!=snapshot,vg_free>${snap_size_mb}") ; do # Skip non-ext[234] case "$(blkid -o value -s TYPE ${NAME})" in ext[234]) ;; *) continue;; esac if [ "${scrub_all}" -eq 1 ]; then echo ${NAME} else MOUNTPOINT="$(lsblk -o MOUNTPOINT --noheadings ${NAME})" if [ -n "${MOUNTPOINT}" ]; then echo "${MOUNTPOINT}" fi fi done | sort | uniq } This way we only bother to fetch the mountpoints for ext[234] file systems, and only when -A is _not_ specified. In fact, I'm actually thinking that we should just *always* just return the device pathname in which case we can make this even simpler: ls_scan_targets() { for NAME in $(lvs -o lv_path --noheadings \ -S "lv_active=active,lv_role=public,lv_role!=snapshot,vg_free>${snap_size_mb}") ; do # Skip non-ext[234] case "$(blkid -o value -s TYPE ${NAME})" in ext[234]) ;; *) continue;; esac if [ "${scrub_all}" -eq 1 ] || [ -n "$(lsblk -o MOUNTPOINT --noheadings ${NAME})" ]; then echo ${NAME} fi done | sort | uniq } This means that we always run e2scrub on the device name, which in some cases might result in some ugliness, e.g. systemctl start e2scrub@-dev-lambda-test\\x2d1k But I think I can live with that. (However, the fact that systemd-escape will create Unicode characters which themselves have to be escaped is, well, sad....) What do you see on your system when you benchmark the above? The fact that we only determine the mountpoints on ext[234] file systems should save some time. We are sheling out to blkid for each device but that's probably not a huge overhead. My before (v1.45.0 plus support for -n so we can have comparable times) and after times (with all of the changes): 0.16user 0.15system 0:00.83elapsed 38%CPU (0avgtext+0avgdata 13384maxresident)k 0.12user 0.11system 0:00.36elapsed 64%CPU (0avgtext+0avgdata 13420maxresident)k Your one-linder is a bit faster: 0.03user 0.04system 0:00.23elapsed 31%CPU (0avgtext+0avgdata 13316maxresident)k But if we need to determine thick versus thin LV's so we can potentially do thin snapshots, a bunch of these optimizations are going to go away anyway. And realistically, so long as we're fast in the "no LV's" and "LV's exist but there is no free space" cases, that should avoid most user complaints, since if we *do* trigger e2scrub, the cost of running ls_scan_targets will be in the noise. - Ted
On Thu, Mar 21, 2019 at 01:48:11PM -0400, Theodore Ts'o wrote: > On Thu, Mar 21, 2019 at 11:27:42AM +0100, Lukas Czerner wrote: > > > > Snapshot of a thinspanshot is allowed though, so we might want to > > include those. Not sure if it's wise to do it by default, but regardless > > it's probably something for a separate change. > > Yeah, it's definitely a separate change. One potential design > question is that for a thin volume, you can do both a thin or a think > snapshot, and in some cases one might succeed while the other will > fail. So do we make this choice be a parameter that we set in the > config file, or do we try to see if there is sufficient spare > freespace for a thick snapshot (and then do that), or a thin snapshot > (and then do that) --- and which should use prefer? That's a good question. I am not sure it makes sense to allow choice between thin/thick snapshots of thin volume. Just pick one, ideally it would be thin snapshot of thin volume. > > The other thing I'll note is that in order for us to tell whether > something is a thin or thick LV, we're going to to need to ask lvs to > return multiple parameters, so the optimization of using: > > for NAME in $(lvs -o lv_path --noheadings -S...) ; do > ... > done > > will no longer work. (Or we end up calling lvs a second time, which > is less efficient.) Yeah, however calling lvs twice might still be better then calling lsblk for every lv ? This can be easily benchmarked. first it would be for a non-thin volumes so -S lv_active=active,lv_role=public,lv_role!=snapshot,segtype!=thin and second for thin volumes and thin snapshots -S lv_active=active,lv_role=public,segtype=thin Note that by default thin snapshots are not active so they will not be checked unless user activated them. > > Just curious --- do we know how commonly thin LV's are being used by > customers of various distros? I assume enterprise distro users will > be the most conservative, but how common is the uptake of thin LV's by > Fedora and OpenSuSE users? I wish I knew. I think that for workstation users it's the defaults that matters the most so I would not expect thin LV's to be used much there. Qubes OS being most notable exception I think. I'd think that thin would be mostly used for virtualization and storage appliances. I'll ask around, Fedora should have some stats. -Lukas > > - Ted
On Mar 21, 2019, at 9:57 AM, Lukas Czerner <lczerner@redhat.com> wrote: > > So the good news is that it's not really a problem. What you really want > is the mountpoint if it exists and the device otherwise right ? > > Then my suggestion will do just that becuase awk will separate the fieds > with FS (field separator) which by default is a single space. But space > is special in awk becuase the fields will be separated by a runs of > space, so if you pick the fileds just right you'll get what you want. > > So if I run this on my system > > # lsblk -o MOUNTPOINT,NAME,FSTYPE -p -n -l `lvs -o lv_path -S lv_active=active,lv_role=public,lv_role!=snapshot,vg_free\>256 --noheadings` > > I'll get > > /dev/mapper/vg_perpetuum-ext4--suck > /dev/mapper/vg_perpetuum-fedora29 > /dev/mapper/vg_perpetuum-fedora29--2 > /dev/mapper/vg_perpetuum-freebsd > /mnt/kernel /dev/mapper/vg_perpetuum-kernel ext4 > /home /dev/mapper/vg_perpetuum-lv_home xfs > / /dev/mapper/vg_perpetuum-lv_root xfs > [SWAP] /dev/mapper/vg_perpetuum-lv_swap swap > /var /dev/mapper/vg_perpetuum-lv_var xfs > /dev/mapper/vg_perpetuum-lvol001 ext4 > /dev/mapper/vg_perpetuum-overlay ext4 > /dev/mapper/vg_perpetuum-rhel7 > /dev/mapper/vg_perpetuum-rhel8 > /dev/mapper/vg_perpetuum-test0 ext3 > /dev/mapper/vg_perpetuum-testing ext4 > /dev/mapper/vg_perpetuum-testing1 xfs > /dev/mapper/vg_perpetuum-thinvolume > > now if I add > > | awk '{print $1}' > > I'll get > > /dev/mapper/vg_perpetuum-ext4--suck > /dev/mapper/vg_perpetuum-fedora29 > /dev/mapper/vg_perpetuum-fedora29--2 > /dev/mapper/vg_perpetuum-freebsd > /mnt/kernel > /home > / > [SWAP] > /var > /dev/mapper/vg_perpetuum-lvol001 > /dev/mapper/vg_perpetuum-overlay > /dev/mapper/vg_perpetuum-rhel7 > /dev/mapper/vg_perpetuum-rhel8 > /dev/mapper/vg_perpetuum-test0 > /dev/mapper/vg_perpetuum-testing > /dev/mapper/vg_perpetuum-testing1 > /dev/mapper/vg_perpetuum-thinvolume > > > hence I get mountpoin where the volume is mounted and the device where > it is not. That's what we need right ? > > What I did not consider was [SWAP] but we can get rid of that easily. > > grep -v '[SWAP]' > > So in the end we'll have something like > > # lsblk -o MOUNTPOINT,NAME,FSTYPE -p -n -l `lvs -o lv_path -S lv_active=active,lv_role=public,lv_role!=snapshot,vg_free\>256 --noheadings` | grep -v '[SWAP]' | grep ' ext[234]$' | awk '{print $1}' This doesn't need to skip [SWAP] explicitly because it will not have an ext[234] filesystem mounted on it, so would be excluded by the next "grep" anyway. However, if you are using "awk" then there is no need for the extra grep(2) I think. Something like the following looks like it will work: lsblk -o MOUNTPOINT,NAME,FSTYPE -p -n -l $(lvs -o lv_path \ -S lv_active=active,lv_role=public,lv_role!=snapshot,vg_free\>256 --noheadings) | awk '($NF > 1 && $2 == ext[234]) || ($NF > 2 && $3 == ext[234]) { print $1 }' The "$NF" checks appear to be needed because "$3 = ext[234]" seems to match everything if "$3" is empty, though I can't understand why that would be desirable. > and so on my system this gives me > > /mnt/kernel > /dev/mapper/vg_perpetuum-lvol001 > /dev/mapper/vg_perpetuum-overlay > /dev/mapper/vg_perpetuum-test0 > /dev/mapper/vg_perpetuum-test10 > /dev/mapper/vg_perpetuum-test11 > /dev/mapper/vg_perpetuum-test3 > /dev/mapper/vg_perpetuum-test4 > /dev/mapper/vg_perpetuum-test9 > /dev/mapper/vg_perpetuum-testing > > And leaving out mounted file systems is again as simple as > > grep -v '^/dev/' > > Have I missed something ? > > > Now for the performance. I drop caches, then run each twice > > the ls_scan_targets function runs > > real 0m0.575s > user 0m0.086s > sys 0m0.179s > > real 0m0.241s > user 0m0.088s > sys 0m0.169s > > > My one-line suggestion runs > > real 0m0.275s > user 0m0.027s > sys 0m0.047s > > real 0m0.069s > user 0m0.015s > sys 0m0.052s > > > So the difference is significant, although I would not consider it > meaningful since it's really low anyway, but some people complained so I > guess someone cares and people do have different systems so... > >> >> :-) >> >> We could have used awk to select the field, but that still doesn't >> deal with the mountpoint column being empty when it is unmounted. I >> did briefly consider using lsblk -J, but I didn't want to add a >> dependency on the jq[1] package (and I didn't even know if RHEL/Fedora >> packages jq). >> >> [1] https://packages.debian.org/stretch/jq > > Yes Fedora does have it, but I agree that adding this dependency is not > ideal. > > Thanks! > -Lukas > >> >> Cheers, >> >> - Ted Cheers, Andreas
On Thu, Mar 21, 2019 at 02:24:56PM -0400, Theodore Ts'o wrote: > On Thu, Mar 21, 2019 at 04:57:03PM +0100, Lukas Czerner wrote: > > > > hence I get mountpoin where the volume is mounted and the device where > > it is not. That's what we need right ? > > Well, except by default we need to be able to determine whether or not > the volume is mounted, since by default e2scrub_all only runs on > mounted file systems (unless -A) is specified. Right, I did mention it later in the reply. It can be filtered grep -v '^/dev/' > > What I'm now doing is this, which I think is the simplest way to do things: > > ls_scan_targets() { > for NAME in $(lvs -o lv_path --noheadings \ > -S "lv_active=active,lv_role=public,lv_role!=snapshot,vg_free>${snap_size_mb}") ; do > # Skip non-ext[234] > case "$(blkid -o value -s TYPE ${NAME})" in > ext[234]) ;; > *) continue;; > esac > > if [ "${scrub_all}" -eq 1 ]; then > echo ${NAME} > else > MOUNTPOINT="$(lsblk -o MOUNTPOINT --noheadings ${NAME})" > > if [ -n "${MOUNTPOINT}" ]; then > echo "${MOUNTPOINT}" > fi > fi > done | sort | uniq > } > > This way we only bother to fetch the mountpoints for ext[234] file > systems, and only when -A is _not_ specified. > > In fact, I'm actually thinking that we should just *always* just > return the device pathname in which case we can make this even > simpler: > > ls_scan_targets() { > for NAME in $(lvs -o lv_path --noheadings \ > -S "lv_active=active,lv_role=public,lv_role!=snapshot,vg_free>${snap_size_mb}") ; do > # Skip non-ext[234] > case "$(blkid -o value -s TYPE ${NAME})" in > ext[234]) ;; > *) continue;; > esac > > if [ "${scrub_all}" -eq 1 ] || > [ -n "$(lsblk -o MOUNTPOINT --noheadings ${NAME})" ]; then > echo ${NAME} > fi > done | sort | uniq > } > > This means that we always run e2scrub on the device name, which in > some cases might result in some ugliness, e.g. > > systemctl start e2scrub@-dev-lambda-test\\x2d1k > > But I think I can live with that. (However, the fact that > systemd-escape will create Unicode characters which themselves have to > be escaped is, well, sad....) > > What do you see on your system when you benchmark the above? The fact > that we only determine the mountpoints on ext[234] file systems should > save some time. We are sheling out to blkid for each device but > that's probably not a huge overhead. > > My before (v1.45.0 plus support for -n so we can have comparable > times) and after times (with all of the changes): > > 0.16user 0.15system 0:00.83elapsed 38%CPU (0avgtext+0avgdata 13384maxresident)k > > 0.12user 0.11system 0:00.36elapsed 64%CPU (0avgtext+0avgdata 13420maxresident)k For me this new function is the wors of all. cold cache: real 0m2.115s user 0m0.040s sys 0m0.154s second time: real 0m1.100s user 0m0.037s sys 0m0.122s But that's because of blkid which is terribly slow for some reason. Replacing it with lsblk I get much better results cold cache: real 0m0.383s user 0m0.043s sys 0m0.112s second time: real 0m0.153s user 0m0.048s sys 0m0.102s -Lukas > > Your one-linder is a bit faster: > > 0.03user 0.04system 0:00.23elapsed 31%CPU (0avgtext+0avgdata 13316maxresident)k > > But if we need to determine thick versus thin LV's so we can > potentially do thin snapshots, a bunch of these optimizations are > going to go away anyway. And realistically, so long as we're fast in > the "no LV's" and "LV's exist but there is no free space" cases, that > should avoid most user complaints, since if we *do* trigger e2scrub, > the cost of running ls_scan_targets will be in the noise. > > - Ted
On Thu, Mar 21, 2019 at 08:49:01PM +0100, Lukas Czerner wrote: > I wish I knew. I think that for workstation users it's the defaults that > matters the most so I would not expect thin LV's to be used much there. > Qubes OS being most notable exception I think. Yeah, especially since the lvcreate command is so complicated. Even if there's only one thinpool in a VG, you have to specify it explicitly: lvcreate -V 5G --thinpool mythinpool -n thinlv lambda Even though mythinpool is the only possible thinpool to use in VG lambda, and even though it's clear that a thinLV is desired due to the -V option, you still have to pass in the --thinpool option, and there is no convenient short option character for it. :-( Given that the devicemapper people are saying that thick snapshots are legacy and deprecated, and everyone should be using the new coolness, I'm pretty sure most users haven't gotten the message.... - Ted
On Thu, Mar 21, 2019 at 09:17:10PM +0100, Lukas Czerner wrote: > > Right, I did mention it later in the reply. It can be filtered > > grep -v '^/dev/' Well, that assumes all device nodes are in /dev. Which is not necessarily always the case, especially in some of the more _whacky_ container setups which I've seen. (Hmm, is whacky redundant here?) I suppose can test whether or not the path is a block device or a directory..... > > For me this new function is the wors of all. > > cold cache: > real 0m2.115s > user 0m0.040s > sys 0m0.154s > > second time: > real 0m1.100s > user 0m0.037s > sys 0m0.122s > > But that's because of blkid which is terribly slow for some reason. I ran my test on a system with a NVMe SSD, and no HDD's attached. I just did an strace, and I see the util-linux folks have really done a great job of pessimizing blkid. :-( My version used to just pull the information from the blkid cache file and then verified the results, but it looks like the new, improved util-linux version of blkid scans the /dev directory and opens and reads from each device node, even when we're querying a single block device. <groan> Even lsblk is *amazingly* inefficient in terms of the number of useless file opens which it performs, although at least they are all /sysfs files. I'm half tempted to create and ship a binary which just calls ext2fs_check_mount_point() and returns the value, since it's the most efficient. This command sudo strace -o /tmp/st /build/e2fsprogs-maint/lib/ext2fs/tst_ismounted /dev/lambda/tp Opens /proc/mounts and does a trial mount of /dev/lambda/tp to make sure it's actually busy, and that's it. Sigh... - Ted
On Thu, Mar 21, 2019 at 04:48:23PM -0400, Theodore Ts'o wrote: > On Thu, Mar 21, 2019 at 09:17:10PM +0100, Lukas Czerner wrote: > > > > Right, I did mention it later in the reply. It can be filtered > > > > grep -v '^/dev/' > > Well, that assumes all device nodes are in /dev. Which is not > necessarily always the case, especially in some of the more _whacky_ > container setups which I've seen. (Hmm, is whacky redundant here?) Fair enough, I've never seen this outside dm/lvm testing. > > I suppose can test whether or not the path is a block device or a > directory..... > > > > > For me this new function is the wors of all. > > > > cold cache: > > real 0m2.115s > > user 0m0.040s > > sys 0m0.154s > > > > second time: > > real 0m1.100s > > user 0m0.037s > > sys 0m0.122s > > > > But that's because of blkid which is terribly slow for some reason. > > I ran my test on a system with a NVMe SSD, and no HDD's attached. I > just did an strace, and I see the util-linux folks have really done a > great job of pessimizing blkid. :-( Yeah, all I have on that system is spinning rust :) lsblk works good enough for me so I am not sure how I feel about special binary to check the mount point :) -Lukas > > My version used to just pull the information from the blkid cache file > and then verified the results, but it looks like the new, improved > util-linux version of blkid scans the /dev directory and opens and > reads from each device node, even when we're querying a single block > device. <groan> > > Even lsblk is *amazingly* inefficient in terms of the number of > useless file opens which it performs, although at least they are all > /sysfs files. > > I'm half tempted to create and ship a binary which just calls > ext2fs_check_mount_point() and returns the value, since it's the most > efficient. This command > > sudo strace -o /tmp/st /build/e2fsprogs-maint/lib/ext2fs/tst_ismounted /dev/lambda/tp > > Opens /proc/mounts and does a trial mount of /dev/lambda/tp to make > sure it's actually busy, and that's it. > > Sigh... > > - Ted
OK, I've reworked the function to read: ls_scan_targets() { lsblk -o NAME,MOUNTPOINT,FSTYPE -P -n -p \ $(lvs -o lv_path --noheadings -S "lv_active=active,lv_role=public,lv_role!=snapshot,vg_free>${snap_size_mb}") | \ grep FSTYPE=\"ext\[234\]\" | while read vars ; do eval "${vars}" if [ "${scrub_all}" -eq 1 ] || [ -n "${MOUNTPOINT}" ]; then echo ${MOUNTPOINT:-${NAME}} fi done | sort | uniq } I think that's the final answer.... - Ted
On Thu, Mar 21, 2019 at 06:04:40PM -0400, Theodore Ts'o wrote: > OK, I've reworked the function to read: > > ls_scan_targets() { > lsblk -o NAME,MOUNTPOINT,FSTYPE -P -n -p \ > $(lvs -o lv_path --noheadings -S "lv_active=active,lv_role=public,lv_role!=snapshot,vg_free>${snap_size_mb}") | \ > grep FSTYPE=\"ext\[234\]\" | while read vars ; do > eval "${vars}" > > if [ "${scrub_all}" -eq 1 ] || [ -n "${MOUNTPOINT}" ]; then > echo ${MOUNTPOINT:-${NAME}} > fi > done | sort | uniq > } > > I think that's the final answer.... And I just saw your e-mail about dropping the sort and uniq calls. OK, I'll take care of that too. - Ted
On Thu, Mar 21, 2019 at 06:08:19PM -0400, Theodore Ts'o wrote: > On Thu, Mar 21, 2019 at 06:04:40PM -0400, Theodore Ts'o wrote: > > OK, I've reworked the function to read: > > > > ls_scan_targets() { > > lsblk -o NAME,MOUNTPOINT,FSTYPE -P -n -p \ > > $(lvs -o lv_path --noheadings -S "lv_active=active,lv_role=public,lv_role!=snapshot,vg_free>${snap_size_mb}") | \ > > grep FSTYPE=\"ext\[234\]\" | while read vars ; do > > eval "${vars}" > > > > if [ "${scrub_all}" -eq 1 ] || [ -n "${MOUNTPOINT}" ]; then > > echo ${MOUNTPOINT:-${NAME}} > > fi > > done | sort | uniq > > } > > > > I think that's the final answer.... > > And I just saw your e-mail about dropping the sort and uniq calls. > OK, I'll take care of that too. > > - Ted Great, I like it and it runs very fast on my system. cold cache real 0m0.268s user 0m0.011s sys 0m0.036s second run real 0m0.053s user 0m0.013s sys 0m0.031s Thanks for working on this. -Lukas
diff --git a/scrub/e2scrub_all.in b/scrub/e2scrub_all.in index 4cb90a0de..cad232987 100644 --- a/scrub/e2scrub_all.in +++ b/scrub/e2scrub_all.in @@ -22,6 +22,7 @@ PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin scrub_all=0 snap_size_mb=256 +reap=0 conffile="@root_sysconfdir@/e2scrub.conf" test -f "${conffile}" && . "${conffile}" @@ -65,7 +66,7 @@ exitcode() { while getopts "nrAV" opt; do case "${opt}" in "n") DBG="echo Would execute: " ;; - "r") scrub_args="${scrub_args} -r";; + "r") scrub_args="${scrub_args} -r"; reap=1;; "A") scrub_all=1;; "V") print_version; exitcode 0;; *) print_help; exitcode 2;; @@ -88,9 +89,12 @@ if ! type lvcreate >& /dev/null ; then fi # Find scrub targets, make sure we only do this once. -ls_scrub_targets() { - lsblk -o NAME,FSTYPE,MOUNTPOINT -p -P -n | while read vars; do +ls_scan_targets() { + lvs --name-prefixes -o vg_name,lv_path \ + -S lv_active=active,lv_role=public --noheadings | \ + while read vars; do eval "${vars}" + eval "$(lsblk -o FSTYPE,MOUNTPOINT -p -P -n "${LVM2_LV_PATH}")" # Skip non-ext[234] case "${FSTYPE}" in @@ -103,12 +107,6 @@ ls_scrub_targets() { continue; fi - # Skip non-lvm devices and lvm snapshots - lvm_vars="$(lvs --nameprefixes -o vg_name,lv_name,lv_role --noheadings "${NAME}" 2> /dev/null)" - test $? -ne 0 && continue - eval "${lvm_vars}" - echo "${LVM2_LV_ROLE}" | grep -q "snapshot" && continue - free_space="$(vgs -o vg_free --units m --noheadings --no-suffix "${LVM2_VG_NAME}" 2> /dev/null | sed -e 's/\..*//')" test "${snap_size_mb}" -gt "${free_space}" && continue @@ -120,6 +118,20 @@ ls_scrub_targets() { done | sort | uniq } +# Find leftover scrub snapshots +ls_reap_targets() { + lvs -o lv_path -S lv_role=snapshot -S lv_name=~\(e2scrub$\) --noheadings +} + +# Figure out what we're targeting +ls_targets() { + if [ "${reap}" -eq 1 ]; then + ls_reap_targets + else + ls_scan_targets + fi +} + # systemd doesn't know to do path escaping on the instance variable we pass # to the e2scrub service, which breaks things if there is a dash in the path # name. Therefore, do the path escaping ourselves if needed. @@ -140,10 +152,10 @@ escape_path_for_systemd() { # Scrub any mounted fs on lvm by creating a snapshot and fscking that. stdin="$(realpath /dev/stdin)" -ls_scrub_targets | while read tgt; do +ls_targets | while read tgt; do # If we're not reaping and systemd is present, try invoking the # systemd service. - if [ -z "${scrub_args}" ] && type systemctl > /dev/null 2>&1; then + if [ "${reap}" -ne 1 ] && type systemctl > /dev/null 2>&1; then tgt_esc="$(escape_path_for_systemd "${tgt}")" ${DBG} systemctl start "e2scrub@${tgt_esc}" 2> /dev/null < "${stdin}" res=$?