Message ID | 1448302147-19272-3-git-send-email-bigeasy@linutronix.de |
---|---|
State | Superseded |
Headers | show |
Sebastian, Am 23.11.2015 um 19:09 schrieb Sebastian Andrzej Siewior: > wear_leveling_worker() currently unconditionally puts a PEB on erase in > the error case even it just been taken from the free_list and never > used. > In case the PEB was never used it can be put back on the free list > saving an erase cycle. Makes sense, thanks for pointing out this optimization! > Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> > --- > drivers/mtd/ubi/wl.c | 23 ++++++++++++++++++++--- > 1 file changed, 20 insertions(+), 3 deletions(-) > > diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c > index eb4489f9082f..709ca27e103c 100644 > --- a/drivers/mtd/ubi/wl.c > +++ b/drivers/mtd/ubi/wl.c > @@ -631,6 +631,7 @@ static int do_sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, > return erase_worker(ubi, wl_wrk, 0); > } > > +static int ensure_wear_leveling(struct ubi_device *ubi, int nested); > /** > * wear_leveling_worker - wear-leveling worker function. > * @ubi: UBI device description object > @@ -652,6 +653,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, > #endif > struct ubi_wl_entry *e1, *e2; > struct ubi_vid_hdr *vid_hdr; > + int to_leb_clean = 0; Can we please rename this variable? I know it means "the `to` LEB is clean". But for everybody else this reads as "I'm going to clean a LEB". ;) > kfree(wrk); > if (shutdown) > @@ -756,6 +758,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, > > err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0); > if (err && err != UBI_IO_BITFLIPS) { > + to_leb_clean = 1; > if (err == UBI_IO_FF) { > /* > * We are trying to move PEB without a VID header. UBI > @@ -801,10 +804,12 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, > * protection queue. > */ > protect = 1; > + to_leb_clean = 1; > goto out_not_moved; > } > if (err == MOVE_RETRY) { > scrubbing = 1; > + to_leb_clean = 1; > goto out_not_moved; > } > if (err == MOVE_TARGET_BITFLIPS || err == MOVE_TARGET_WR_ERR || > @@ -830,6 +835,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, > ubi->erroneous_peb_count); > goto out_error; > } > + to_leb_clean = 1; > erroneous = 1; > goto out_not_moved; > } > @@ -900,15 +906,26 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, > wl_tree_add(e1, &ubi->scrub); > else > wl_tree_add(e1, &ubi->used); > + if (to_leb_clean) { > + wl_tree_add(e2, &ubi->free); > + ubi->free_count++; > + } > + > ubi_assert(!ubi->move_to_put); > ubi->move_from = ubi->move_to = NULL; > ubi->wl_scheduled = 0; > spin_unlock(&ubi->wl_lock); > > ubi_free_vid_hdr(ubi, vid_hdr); > - err = do_sync_erase(ubi, e2, vol_id, lnum, torture); > - if (err) > - goto out_ro; > + if (to_leb_clean) { > + ensure_wear_leveling(ubi, 0); Why are you rescheduling wear leveling? And the nested variable has to be set to no-zero as you're calling it from a UBI worker. > + } else { > + err = do_sync_erase(ubi, e2, vol_id, lnum, torture); > + if (err) { > + wl_entry_destroy(ubi, e2); Why that? The erase_worker will free e2 if it encounters a fatal error and gives up this PEB. You're introducing a double free. I think you have copy&pasted from: err = do_sync_erase(ubi, e1, vol_id, lnum, 0); if (err) { if (e2) wl_entry_destroy(ubi, e2); goto out_ro; } ...which looks wrong too. I'll double check tomorrow with a fresh brain. Thanks, //richard
Am 23.11.2015 um 22:30 schrieb Richard Weinberger: > Sebastian, > > Am 23.11.2015 um 19:09 schrieb Sebastian Andrzej Siewior: >> wear_leveling_worker() currently unconditionally puts a PEB on erase in >> the error case even it just been taken from the free_list and never >> used. >> In case the PEB was never used it can be put back on the free list >> saving an erase cycle. > > Makes sense, thanks for pointing out this optimization! > >> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> >> --- >> drivers/mtd/ubi/wl.c | 23 ++++++++++++++++++++--- >> 1 file changed, 20 insertions(+), 3 deletions(-) >> >> diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c >> index eb4489f9082f..709ca27e103c 100644 >> --- a/drivers/mtd/ubi/wl.c >> +++ b/drivers/mtd/ubi/wl.c >> @@ -631,6 +631,7 @@ static int do_sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, >> return erase_worker(ubi, wl_wrk, 0); >> } >> >> +static int ensure_wear_leveling(struct ubi_device *ubi, int nested); >> /** >> * wear_leveling_worker - wear-leveling worker function. >> * @ubi: UBI device description object >> @@ -652,6 +653,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, >> #endif >> struct ubi_wl_entry *e1, *e2; >> struct ubi_vid_hdr *vid_hdr; >> + int to_leb_clean = 0; > > Can we please rename this variable? > I know it means "the `to` LEB is clean". But for everybody else this reads as "I'm going to clean a LEB". ;) > >> kfree(wrk); >> if (shutdown) >> @@ -756,6 +758,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, >> >> err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0); >> if (err && err != UBI_IO_BITFLIPS) { >> + to_leb_clean = 1; >> if (err == UBI_IO_FF) { >> /* >> * We are trying to move PEB without a VID header. UBI >> @@ -801,10 +804,12 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, >> * protection queue. >> */ >> protect = 1; >> + to_leb_clean = 1; >> goto out_not_moved; >> } >> if (err == MOVE_RETRY) { >> scrubbing = 1; >> + to_leb_clean = 1; >> goto out_not_moved; >> } >> if (err == MOVE_TARGET_BITFLIPS || err == MOVE_TARGET_WR_ERR || >> @@ -830,6 +835,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, >> ubi->erroneous_peb_count); >> goto out_error; >> } >> + to_leb_clean = 1; >> erroneous = 1; >> goto out_not_moved; >> } >> @@ -900,15 +906,26 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, >> wl_tree_add(e1, &ubi->scrub); >> else >> wl_tree_add(e1, &ubi->used); >> + if (to_leb_clean) { >> + wl_tree_add(e2, &ubi->free); >> + ubi->free_count++; >> + } >> + >> ubi_assert(!ubi->move_to_put); >> ubi->move_from = ubi->move_to = NULL; >> ubi->wl_scheduled = 0; >> spin_unlock(&ubi->wl_lock); >> >> ubi_free_vid_hdr(ubi, vid_hdr); >> - err = do_sync_erase(ubi, e2, vol_id, lnum, torture); >> - if (err) >> - goto out_ro; >> + if (to_leb_clean) { >> + ensure_wear_leveling(ubi, 0); > > Why are you rescheduling wear leveling? > And the nested variable has to be set to no-zero as you're calling it from a UBI worker. > >> + } else { >> + err = do_sync_erase(ubi, e2, vol_id, lnum, torture); >> + if (err) { >> + wl_entry_destroy(ubi, e2); > > Why that? The erase_worker will free e2 if it encounters > a fatal error and gives up this PEB. You're introducing a double free. > > I think you have copy&pasted from: > err = do_sync_erase(ubi, e1, vol_id, lnum, 0); > if (err) { > if (e2) > wl_entry_destroy(ubi, e2); > goto out_ro; > } > > ...which looks wrong too. I'll double check tomorrow with a fresh brain. Okay. That one is fine, as we switch to ro mode anyway... Thanks, //richard
On 11/23/2015 10:30 PM, Richard Weinberger wrote: >> diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c >> index eb4489f9082f..709ca27e103c 100644 >> --- a/drivers/mtd/ubi/wl.c >> +++ b/drivers/mtd/ubi/wl.c >> @@ -652,6 +653,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, >> #endif >> struct ubi_wl_entry *e1, *e2; >> struct ubi_vid_hdr *vid_hdr; >> + int to_leb_clean = 0; > > Can we please rename this variable? > I know it means "the `to` LEB is clean". But for everybody else this reads as "I'm going to clean a LEB". ;) such a shame that you can't make files RO. Any suggestions? dst_clean ? >> kfree(wrk); >> if (shutdown) >> @@ -900,15 +906,26 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, >> wl_tree_add(e1, &ubi->scrub); >> else >> wl_tree_add(e1, &ubi->used); >> + if (to_leb_clean) { >> + wl_tree_add(e2, &ubi->free); >> + ubi->free_count++; >> + } >> + >> ubi_assert(!ubi->move_to_put); >> ubi->move_from = ubi->move_to = NULL; >> ubi->wl_scheduled = 0; >> spin_unlock(&ubi->wl_lock); >> >> ubi_free_vid_hdr(ubi, vid_hdr); >> - err = do_sync_erase(ubi, e2, vol_id, lnum, torture); >> - if (err) >> - goto out_ro; >> + if (to_leb_clean) { >> + ensure_wear_leveling(ubi, 0); > > Why are you rescheduling wear leveling? Because we had it pending and we didn't do anything. If I don't schedule it would be deferred until another LEB is going to be deleted. Before the change it happens after do_sync_erase() work job completes but now we add it back to the free list. > And the nested variable has to be set to no-zero as you're calling it from a UBI worker. Ah, okay. I've been looking at it and got wrong then. As I said in my cover mail, this was forwarded ported. > >> + } else { >> + err = do_sync_erase(ubi, e2, vol_id, lnum, torture); >> + if (err) { >> + wl_entry_destroy(ubi, e2); > > Why that? The erase_worker will free e2 if it encounters > a fatal error and gives up this PEB. You're introducing a double free. Hmmm. That is real bad error handling you have there. So you invoke do_sync_erase(), the kmalloc() fails and how exactly you free e2 here? You don't so I did what I did. But then if this succeeds but erase_worker() fails then we have a double free here. Indeed. > I think you have copy&pasted from: > err = do_sync_erase(ubi, e1, vol_id, lnum, 0); > if (err) { > if (e2) > wl_entry_destroy(ubi, e2); > goto out_ro; > } > > ...which looks wrong too. I'll double check tomorrow with a fresh brain. > > Thanks, > //richard Sebastian
Am 24.11.2015 um 09:26 schrieb Sebastian Andrzej Siewior: > On 11/23/2015 10:30 PM, Richard Weinberger wrote: >>> diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c >>> index eb4489f9082f..709ca27e103c 100644 >>> --- a/drivers/mtd/ubi/wl.c >>> +++ b/drivers/mtd/ubi/wl.c >>> @@ -652,6 +653,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, >>> #endif >>> struct ubi_wl_entry *e1, *e2; >>> struct ubi_vid_hdr *vid_hdr; >>> + int to_leb_clean = 0; >> >> Can we please rename this variable? >> I know it means "the `to` LEB is clean". But for everybody else this reads as "I'm going to clean a LEB". ;) > > such a shame that you can't make files RO. Any suggestions? dst_clean ? Files RO? I don't get it. dst_clean is good. :-) > >>> kfree(wrk); >>> if (shutdown) >>> @@ -900,15 +906,26 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, >>> wl_tree_add(e1, &ubi->scrub); >>> else >>> wl_tree_add(e1, &ubi->used); >>> + if (to_leb_clean) { >>> + wl_tree_add(e2, &ubi->free); >>> + ubi->free_count++; >>> + } >>> + >>> ubi_assert(!ubi->move_to_put); >>> ubi->move_from = ubi->move_to = NULL; >>> ubi->wl_scheduled = 0; >>> spin_unlock(&ubi->wl_lock); >>> >>> ubi_free_vid_hdr(ubi, vid_hdr); >>> - err = do_sync_erase(ubi, e2, vol_id, lnum, torture); >>> - if (err) >>> - goto out_ro; >>> + if (to_leb_clean) { >>> + ensure_wear_leveling(ubi, 0); >> >> Why are you rescheduling wear leveling? > > Because we had it pending and we didn't do anything. If I don't > schedule it would be deferred until another LEB is going to be deleted. > Before the change it happens after do_sync_erase() work job completes > but now we add it back to the free list. Makes sense. >> And the nested variable has to be set to no-zero as you're calling it from a UBI worker. > Ah, okay. I've been looking at it and got wrong then. As I said in my > cover mail, this was forwarded ported. > >> >>> + } else { >>> + err = do_sync_erase(ubi, e2, vol_id, lnum, torture); >>> + if (err) { >>> + wl_entry_destroy(ubi, e2); >> >> Why that? The erase_worker will free e2 if it encounters >> a fatal error and gives up this PEB. You're introducing a double free. > > Hmmm. That is real bad error handling you have there. So you invoke > do_sync_erase(), the kmalloc() fails and how exactly you free e2 here? Why do you want to free e2? We free an erase entry only if we give it up. wear leveling entries are allocated at init time and destroyed when you detach UBI. Thanks, //richard
On 11/24/2015 09:39 AM, Richard Weinberger wrote: >>>> + } else { >>>> + err = do_sync_erase(ubi, e2, vol_id, lnum, torture); >>>> + if (err) { >>>> + wl_entry_destroy(ubi, e2); >>> >>> Why that? The erase_worker will free e2 if it encounters >>> a fatal error and gives up this PEB. You're introducing a double free. >> >> Hmmm. That is real bad error handling you have there. So you invoke >> do_sync_erase(), the kmalloc() fails and how exactly you free e2 here? > > Why do you want to free e2? We free an erase entry only if we give > it up. wear leveling entries are allocated at init time and destroyed > when you detach UBI. The reference to it in the RB-tree (free) was removed. Is there another reference to it? > Thanks, > //richard > Sebastian
Am 24.11.2015 um 09:42 schrieb Sebastian Andrzej Siewior: > On 11/24/2015 09:39 AM, Richard Weinberger wrote: >>>>> + } else { >>>>> + err = do_sync_erase(ubi, e2, vol_id, lnum, torture); >>>>> + if (err) { >>>>> + wl_entry_destroy(ubi, e2); >>>> >>>> Why that? The erase_worker will free e2 if it encounters >>>> a fatal error and gives up this PEB. You're introducing a double free. >>> >>> Hmmm. That is real bad error handling you have there. So you invoke >>> do_sync_erase(), the kmalloc() fails and how exactly you free e2 here? >> >> Why do you want to free e2? We free an erase entry only if we give >> it up. wear leveling entries are allocated at init time and destroyed >> when you detach UBI. > > The reference to it in the RB-tree (free) was removed. Is there another > reference to it? UBI supports only single references. Everything else would be a bug. That said, I agree that the error handling in the wear_leveling_worker() can be improved. Especially as currently an error while do_sync_erase() puts UBI into read-only mode and cleanup is skipped as we're in read-only anyway then. Would you volunteer? :-) Thanks, //richard
On 11/24/2015 10:02 AM, Richard Weinberger wrote: > Am 24.11.2015 um 09:42 schrieb Sebastian Andrzej Siewior: >> On 11/24/2015 09:39 AM, Richard Weinberger wrote: >>>>>> + } else { >>>>>> + err = do_sync_erase(ubi, e2, vol_id, lnum, torture); >>>>>> + if (err) { >>>>>> + wl_entry_destroy(ubi, e2); >>>>> >>>>> Why that? The erase_worker will free e2 if it encounters >>>>> a fatal error and gives up this PEB. You're introducing a double free. >>>> >>>> Hmmm. That is real bad error handling you have there. So you invoke >>>> do_sync_erase(), the kmalloc() fails and how exactly you free e2 here? >>> >>> Why do you want to free e2? We free an erase entry only if we give >>> it up. wear leveling entries are allocated at init time and destroyed >>> when you detach UBI. >> >> The reference to it in the RB-tree (free) was removed. Is there another >> reference to it? > > UBI supports only single references. > Everything else would be a bug. So if there is no reference to e2 which was just removed from the RB-tree free and do_sync_erase() can't kmalloc() then we leak e2, correct? > Thanks, > //richard > Sebastian
Am 24.11.2015 um 10:07 schrieb Sebastian Andrzej Siewior: > On 11/24/2015 10:02 AM, Richard Weinberger wrote: >> Am 24.11.2015 um 09:42 schrieb Sebastian Andrzej Siewior: >>> On 11/24/2015 09:39 AM, Richard Weinberger wrote: >>>>>>> + } else { >>>>>>> + err = do_sync_erase(ubi, e2, vol_id, lnum, torture); >>>>>>> + if (err) { >>>>>>> + wl_entry_destroy(ubi, e2); >>>>>> >>>>>> Why that? The erase_worker will free e2 if it encounters >>>>>> a fatal error and gives up this PEB. You're introducing a double free. >>>>> >>>>> Hmmm. That is real bad error handling you have there. So you invoke >>>>> do_sync_erase(), the kmalloc() fails and how exactly you free e2 here? >>>> >>>> Why do you want to free e2? We free an erase entry only if we give >>>> it up. wear leveling entries are allocated at init time and destroyed >>>> when you detach UBI. >>> >>> The reference to it in the RB-tree (free) was removed. Is there another >>> reference to it? >> >> UBI supports only single references. >> Everything else would be a bug. > > So if there is no reference to e2 which was just removed from the > RB-tree free and do_sync_erase() can't kmalloc() then we leak e2, > correct? Yes, you are right. That definitely needs improvement. A possible solution would be iterating over ubi->lookuptbl upon detach time and call wl_entry_destroy() on every non-NULL entry. ...or rework do_sync_erase(), currently it calls the erase worker directly. The erase worker destroys a failed wl entry upon failure. But from the return code of do_sync_erase() we cannot know whether the erase worker destroyed it already. Thanks, //richard
On Mon, 2015-11-23 at 19:09 +0100, Sebastian Andrzej Siewior wrote: > wear_leveling_worker() currently unconditionally puts a PEB on erase > in > the error case even it just been taken from the free_list and never > used. > In case the PEB was never used it can be put back on the free list > saving an erase cycle. Did you think about putting LEBs like that to the protection queue instead of playing tricks with scheduler? The protection queue is there in order to protect eraseblocks from the wear-leveling subsystem (not the best choice of words, but terminology could be improved separately) And this is what you need - you want the wear-leveling subsystem to leave the eraseblock alone for some time, right? The protection queue uses the erase cycles counts instead of the actual time, but this seems OK for the situation you described. Just to remind why this protection queue exists - when the WL subsystem gives an eraseblock to the user, this may be an eraseblock with a high erase counter, and it may be a candidate for being moved, the WL subsystem just did not have a chance to do this yet. But if we give the eraseblock to the user, the user will probably write something there soon, and we do not want the WL subsystem to initiate data relocation while the user is writing the data there. Instead, we want to wait a little, and then move the data in background without interfering with the user. Back to my idea - what if you add the MOVE_RETRY eraseblocks to the protection queue. May be not to the tail, may be to the head.
On 11/24/2015 01:58 PM, Artem Bityutskiy wrote: Hello Artem, > On Mon, 2015-11-23 at 19:09 +0100, Sebastian Andrzej Siewior wrote: >> wear_leveling_worker() currently unconditionally puts a PEB on erase >> in >> the error case even it just been taken from the free_list and never >> used. >> In case the PEB was never used it can be put back on the free list >> saving an erase cycle. > > Did you think about putting LEBs like that to the protection queue > instead of playing tricks with scheduler? Why am I playing tricks with the scheduler? > The protection queue is there in order to protect eraseblocks from the > wear-leveling subsystem (not the best choice of words, but terminology > could be improved separately) > > And this is what you need - you want the wear-leveling subsystem to > leave the eraseblock alone for some time, right? The empty eraseblock is not the problem. The src-LEB is the problem because it can not be moved due to lock contention. Ideally I would do nothing here (except putting it back to the free list) and on unlock of the SRC-LEB it would trigger a wear-leveling cycle. > The protection queue uses the erase cycles counts instead of the actual > time, but this seems OK for the situation you described. > > Just to remind why this protection queue exists - when the WL subsystem > gives an eraseblock to the user, this may be an eraseblock with a high > erase counter, and it may be a candidate for being moved, the WL > subsystem just did not have a chance to do this yet. But if we give the > eraseblock to the user, the user will probably write something there > soon, and we do not want the WL subsystem to initiate data relocation > while the user is writing the data there. Instead, we want to wait a > little, and then move the data in background without interfering with > the user. > > Back to my idea - what if you add the MOVE_RETRY eraseblocks to the > protection queue. May be not to the tail, may be to the head. Hmm. About which erase blocks are you talking about? The e1 which is the src EB and will be relocated _or_ the e2 which is the destination EB where the data will be written to? From what you explain it does not make sense to put e2 on the protect list. I just try to save here an erase cycle here. e1 on the other hand might be a candidate. So e1 has a low EC value and WL-subsystem decides to move it to a an empty block with a high EC value. This fails due to MOVE_RETRY. I add e2 on to free-RB-tree, e1 on the protect-list. No ensure_wear_leveling() invocation. What will happen next? When will e1 be removed from the protection list? Sebastian
On Tue, 2015-11-24 at 14:33 +0100, Sebastian Andrzej Siewior wrote: > What will happen next? When will e1 > be removed from the protection list? If my memory still serves me, the answer is: roughly speaking, after a number of erase operation, which is currently defined as /* * Length of the protection queue. The length is effectively equivalent t o the * number of (global) erase cycles PEBs are protected from the wear-leveling * worker. */ #define UBI_PROT_QUEUE_LEN 10 But if you put it to the head of the protection queue, it'll be removed from there as soon as any LEB is "put", which effectively means "scheduled for erasure". Artem.
A follow-up... On Tue, 2015-11-24 at 14:33 +0100, Sebastian Andrzej Siewior wrote: > > Did you think about putting LEBs like that to the protection queue > > instead of playing tricks with scheduler? > > Why am I playing tricks with the scheduler? I should have replied the "1/2" e-mail, not this one, sorry. Because this is what you seem to try ding in 1/2. > Hmm. About which erase blocks are you talking about? The e1 which is > the src EB and will be relocated _or_ the e2 which is the destination I think the one which is currently unavailable. Or may be even both. I suggest you to consider pros an cons :-) > From what you explain it does not make sense to put e2 on the protect > list. I just try to save here an erase cycle here. I think if you put to the head of the PQ, that will be it.
On 11/24/2015 01:58 PM, Artem Bityutskiy wrote: > On Mon, 2015-11-23 at 19:09 +0100, Sebastian Andrzej Siewior wrote: > Back to my idea - what if you add the MOVE_RETRY eraseblocks to the > protection queue. May be not to the tail, may be to the head. I've been thinking about this. The protected list protects EBs from the WL worker. There are 10 slots and after one successful erase process all EB from this slot will be moved back to ->used list. So e2 can't be placed there because it belongs on the ->free list. e1 could be added there so it is protected from the WL worker until at least one erase cycle happened. This is no guarantee that MOVE_RETRY does not happen again but an optimistic try :) However once the protection time ends, it won't be added to the ->scrub list but the ->used list instead. The difference is that if it has an average number of erase cycles it won't be considered by the WL worker any time soon. So if it was added to the ->scrub list because a bitflip was detected on read then you lose this information. For that reason I think it is not worth putting it on the protected list. Sebastian
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c index eb4489f9082f..709ca27e103c 100644 --- a/drivers/mtd/ubi/wl.c +++ b/drivers/mtd/ubi/wl.c @@ -631,6 +631,7 @@ static int do_sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, return erase_worker(ubi, wl_wrk, 0); } +static int ensure_wear_leveling(struct ubi_device *ubi, int nested); /** * wear_leveling_worker - wear-leveling worker function. * @ubi: UBI device description object @@ -652,6 +653,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, #endif struct ubi_wl_entry *e1, *e2; struct ubi_vid_hdr *vid_hdr; + int to_leb_clean = 0; kfree(wrk); if (shutdown) @@ -756,6 +758,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0); if (err && err != UBI_IO_BITFLIPS) { + to_leb_clean = 1; if (err == UBI_IO_FF) { /* * We are trying to move PEB without a VID header. UBI @@ -801,10 +804,12 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, * protection queue. */ protect = 1; + to_leb_clean = 1; goto out_not_moved; } if (err == MOVE_RETRY) { scrubbing = 1; + to_leb_clean = 1; goto out_not_moved; } if (err == MOVE_TARGET_BITFLIPS || err == MOVE_TARGET_WR_ERR || @@ -830,6 +835,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, ubi->erroneous_peb_count); goto out_error; } + to_leb_clean = 1; erroneous = 1; goto out_not_moved; } @@ -900,15 +906,26 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, wl_tree_add(e1, &ubi->scrub); else wl_tree_add(e1, &ubi->used); + if (to_leb_clean) { + wl_tree_add(e2, &ubi->free); + ubi->free_count++; + } + ubi_assert(!ubi->move_to_put); ubi->move_from = ubi->move_to = NULL; ubi->wl_scheduled = 0; spin_unlock(&ubi->wl_lock); ubi_free_vid_hdr(ubi, vid_hdr); - err = do_sync_erase(ubi, e2, vol_id, lnum, torture); - if (err) - goto out_ro; + if (to_leb_clean) { + ensure_wear_leveling(ubi, 0); + } else { + err = do_sync_erase(ubi, e2, vol_id, lnum, torture); + if (err) { + wl_entry_destroy(ubi, e2); + goto out_ro; + } + } mutex_unlock(&ubi->move_mutex); return 0;
wear_leveling_worker() currently unconditionally puts a PEB on erase in the error case even it just been taken from the free_list and never used. In case the PEB was never used it can be put back on the free list saving an erase cycle. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- drivers/mtd/ubi/wl.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-)