diff mbox

[6/7,RFC] UBI: Implement checkpointing support

Message ID 1336585125-127220-7-git-send-email-richard@nod.at
State RFC
Headers show

Commit Message

Richard Weinberger May 9, 2012, 5:38 p.m. UTC
Implements UBI checkpointing support.
It reduces the attaching time from O(N) to O(1).
Checkpoints are written on demand and upon changes of the volume layout.
If the recovery from a checkpoint fails we fall back to scanning mode.

Signed-off-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/ubi/Kconfig      |    8 +
 drivers/mtd/ubi/Makefile     |    1 +
 drivers/mtd/ubi/checkpoint.c | 1128 ++++++++++++++++++++++++++++++++++++++++++
 drivers/mtd/ubi/scan.c       |   10 +-
 drivers/mtd/ubi/ubi.h        |   10 +-
 5 files changed, 1155 insertions(+), 2 deletions(-)
 create mode 100644 drivers/mtd/ubi/checkpoint.c
diff mbox

Patch

diff --git a/drivers/mtd/ubi/Kconfig b/drivers/mtd/ubi/Kconfig
index 4dcc752..3ba9978 100644
--- a/drivers/mtd/ubi/Kconfig
+++ b/drivers/mtd/ubi/Kconfig
@@ -51,6 +51,14 @@  config MTD_UBI_GLUEBI
 	   volume. This is handy to make MTD-oriented software (like JFFS2)
 	   work on top of UBI. Do not enable this unless you use legacy
 	   software.
+config MTD_UBI_CHECKPOINT
+	bool "UBIVIS (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	default n
+	help
+	   This option enables UBIVIS (AKA checkpointing).
+	   It allows attaching UBI devices without scanning the whole MTD
+	   device. Instead it extracts all needed information from a checkpoint.
 
 config MTD_UBI_DEBUG
 	bool "UBI debugging"
diff --git a/drivers/mtd/ubi/Makefile b/drivers/mtd/ubi/Makefile
index c9302a5..845312a 100644
--- a/drivers/mtd/ubi/Makefile
+++ b/drivers/mtd/ubi/Makefile
@@ -3,5 +3,6 @@  obj-$(CONFIG_MTD_UBI) += ubi.o
 ubi-y += vtbl.o vmt.o upd.o build.o cdev.o kapi.o eba.o io.o wl.o scan.o
 ubi-y += misc.o
 
+ubi-$(CONFIG_MTD_UBI_CHECKPOINT) += checkpoint.o
 ubi-$(CONFIG_MTD_UBI_DEBUG) += debug.o
 obj-$(CONFIG_MTD_UBI_GLUEBI) += gluebi.o
diff --git a/drivers/mtd/ubi/checkpoint.c b/drivers/mtd/ubi/checkpoint.c
new file mode 100644
index 0000000..f43441c
--- /dev/null
+++ b/drivers/mtd/ubi/checkpoint.c
@@ -0,0 +1,1128 @@ 
+/*
+ * Copyright (c) 2012 Linutronix GmbH
+ * Author: Richard Weinberger <richard@nod.at>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ *
+ */
+
+#include <linux/crc32.h>
+#include "ubi.h"
+
+/**
+ * new_cp_vhdr - allocate a new volume header for checkpoint usage.
+ * @ubi: UBI device description object
+ * @vol_id: the VID of the new header
+ */
+static struct ubi_vid_hdr *new_cp_vhdr(struct ubi_device *ubi, int vol_id)
+{
+	struct ubi_vid_hdr *new;
+
+	new = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL);
+	if (!new)
+		goto out;
+
+	new->vol_type = UBI_VID_DYNAMIC;
+	new->vol_id = cpu_to_be32(vol_id);
+
+	/* the checkpoint has be deleted on older kernels */
+	new->compat = UBI_COMPAT_DELETE;
+
+out:
+	return new;
+}
+
+/**
+ * add_seb - create and add a scan erase block to a given list.
+ * @si: UBI scan info object
+ * @list: the target list
+ * @pnum: PEB number of the new scan erase block
+ * @ec: erease counter of the new SEB
+ */
+static int add_seb(struct ubi_scan_info *si, struct list_head *list,
+		   int pnum, int ec)
+{
+	struct ubi_scan_leb *seb;
+
+	seb = kmem_cache_alloc(si->scan_leb_slab, GFP_KERNEL);
+	if (!seb)
+		return -ENOMEM;
+
+	seb->pnum = pnum;
+	seb->ec = ec;
+	seb->lnum = -1;
+	seb->scrub = seb->copy_flag = seb->sqnum = 0;
+
+	si->ec_sum += seb->ec;
+	si->ec_count++;
+
+	if (si->max_ec < seb->ec)
+		si->max_ec = seb->ec;
+
+	if (si->min_ec > seb->ec)
+		si->min_ec = seb->ec;
+
+	list_add_tail(&seb->u.list, list);
+
+	return 0;
+}
+
+/**
+ * add_vol - create and add a new scan volume to ubi_scan_info.
+ * @si: ubi_scan_info object
+ * @vol_id: VID of the new volume
+ * @used_ebs: number of used EBS
+ * @data_pad: data padding value of the new volume
+ * @vol_type: volume type
+ * @last_eb_bytes: number of bytes in the last LEB
+ */
+static struct ubi_scan_volume *add_vol(struct ubi_scan_info *si, int vol_id,
+				       int used_ebs, int data_pad, u8 vol_type,
+				       int last_eb_bytes)
+{
+	struct ubi_scan_volume *sv;
+	struct rb_node **p = &si->volumes.rb_node, *parent = NULL;
+
+	while (*p) {
+		parent = *p;
+		sv = rb_entry(parent, struct ubi_scan_volume, rb);
+
+		if (vol_id > sv->vol_id)
+			p = &(*p)->rb_left;
+		else if (vol_id > sv->vol_id)
+			p = &(*p)->rb_right;
+	}
+
+	sv = kmalloc(sizeof(struct ubi_scan_volume), GFP_KERNEL);
+	if (!sv)
+		goto out;
+
+	sv->highest_lnum = sv->leb_count = 0;
+	sv->vol_id = vol_id;
+	sv->used_ebs = used_ebs;
+	sv->data_pad = data_pad;
+	sv->last_data_size = last_eb_bytes;
+	sv->compat = 0;
+	sv->vol_type = vol_type;
+	sv->root = RB_ROOT;
+
+	rb_link_node(&sv->rb, parent, p);
+	rb_insert_color(&sv->rb, &si->volumes);
+
+out:
+	return sv;
+}
+
+/**
+ * assign_seb_to_sv - assigns a SEB to a given scan_volume and removes it
+ * from it's original list.
+ * @si: ubi_scan_info object
+ * @seb: the to be assigned SEB
+ * @sv: target scan volume
+ */
+static void assign_seb_to_sv(struct ubi_scan_info *si,
+			     struct ubi_scan_leb *seb,
+			     struct ubi_scan_volume *sv)
+{
+	struct ubi_scan_leb *tmp_seb;
+	struct rb_node **p = &si->volumes.rb_node, *parent = NULL;
+
+	p = &sv->root.rb_node;
+	while (*p) {
+		parent = *p;
+
+		tmp_seb = rb_entry(parent, struct ubi_scan_leb, u.rb);
+		if (seb->lnum != tmp_seb->lnum) {
+			if (seb->lnum < tmp_seb->lnum)
+				p = &(*p)->rb_left;
+			else
+				p = &(*p)->rb_right;
+
+			continue;
+		} else
+			break;
+	}
+
+	list_del(&seb->u.list);
+	sv->leb_count++;
+
+	rb_link_node(&seb->u.rb, parent, p);
+	rb_insert_color(&seb->u.rb, &sv->root);
+}
+
+/**
+ * update_vol - inserts or updates a LEB which was found a pool.
+ * @ubi: the UBI device object
+ * @si: scan info object
+ * @sv: the scan volume where this LEB belongs to
+ * @new_vh: the volume header derived from new_seb
+ * @new_seb: the SEB to be examined
+ */
+static int update_vol(struct ubi_device *ubi, struct ubi_scan_info *si,
+		      struct ubi_scan_volume *sv, struct ubi_vid_hdr *new_vh,
+		      struct ubi_scan_leb *new_seb)
+{
+	struct rb_node **p = &sv->root.rb_node, *parent = NULL;
+	struct ubi_scan_leb *seb, *victim;
+	int cmp_res;
+
+	while (*p) {
+		parent = *p;
+		seb = rb_entry(parent, struct ubi_scan_leb, u.rb);
+
+		if (be32_to_cpu(new_vh->lnum) != seb->lnum) {
+			if (be32_to_cpu(new_vh->lnum) < seb->lnum)
+				p = &(*p)->rb_left;
+			else
+				p = &(*p)->rb_right;
+
+			continue;
+		}
+
+		/* A nasty corner case:
+		 *
+		 * As we have three checkpoint pools (short, long and
+		 * unknown term) it can happen that a PEB is checkpointed
+		 * (in the EBA table of the checkpoint) and sits in one of the
+		 * thee pools. E.g. PEB P get's requests from WL subsystem for
+		 * short term usage, P goes into the short term checkpoint pool
+		 * and UBI assigns a LEB L to P. Therefore P is also known in
+		 * the EBA table.
+		 * If the long term or unknown pool is full a new checkpoint
+		 * is written.
+		 * --> P is in the short term pool and the EBA.
+		 * While reading the checkpoint we see P twice.
+		 *
+		 * If we had only one pool this must not happen.
+		 */
+		if (seb->pnum == new_seb->pnum) {
+			kmem_cache_free(si->scan_leb_slab, new_seb);
+
+			return 0;
+		}
+
+		cmp_res = ubi_compare_lebs(ubi, seb, new_seb->pnum, new_vh);
+		if (cmp_res < 0)
+			return cmp_res;
+
+		/* new_seb is newer */
+		if (cmp_res & 1) {
+			victim = kmem_cache_alloc(si->scan_leb_slab,
+				GFP_KERNEL);
+			if (!victim)
+				return -ENOMEM;
+
+			victim->ec = seb->ec;
+			victim->pnum = seb->pnum;
+			list_add_tail(&victim->u.list, &si->erase);
+
+			seb->ec = new_seb->ec;
+			seb->pnum = new_seb->pnum;
+			seb->copy_flag = new_vh->copy_flag;
+			kmem_cache_free(si->scan_leb_slab, new_seb);
+
+		/* new_seb is older */
+		} else {
+			ubi_msg("Vol %i: LEB %i's PEB %i is old, dropping it\n",
+				sv->vol_id, seb->lnum, new_seb->pnum);
+			list_add_tail(&new_seb->u.list, &si->erase);
+		}
+
+		return 0;
+	}
+
+	/* This LEB is new, let's add it to the volume */
+	dbg_bld("Vol %i (type = %i): SEB %i is new, adding it!\n", sv->vol_type,
+		sv->vol_id, new_seb->lnum);
+
+	if (sv->vol_type == UBI_STATIC_VOLUME)
+		sv->used_ebs++;
+
+	sv->leb_count++;
+
+	rb_link_node(&new_seb->u.rb, parent, p);
+	rb_insert_color(&new_seb->u.rb, &sv->root);
+
+	return 0;
+}
+
+/**
+ * process_pool_seb - we found a non-empty PEB in a pool
+ * @ubi: UBI device object
+ * @si: scan info object
+ * @new_vh: the volume header derived from new_seb
+ * @new_seb: the SEB to be examined
+ */
+static int process_pool_seb(struct ubi_device *ubi, struct ubi_scan_info *si,
+			    struct ubi_vid_hdr *new_vh,
+			    struct ubi_scan_leb *new_seb)
+{
+	struct ubi_scan_volume *sv, *tmp_sv = NULL;
+	struct rb_node **p = &si->volumes.rb_node, *parent = NULL;
+	int found = 0;
+
+	if (be32_to_cpu(new_vh->vol_id) == UBI_CP_SB_VOLUME_ID ||
+		be32_to_cpu(new_vh->vol_id) == UBI_CP_DATA_VOLUME_ID) {
+		kmem_cache_free(si->scan_leb_slab, new_seb);
+
+		return 0;
+	}
+
+	/* Find the volume this SEB belongs to */
+	while (*p) {
+		parent = *p;
+		tmp_sv = rb_entry(parent, struct ubi_scan_volume, rb);
+
+		if (be32_to_cpu(new_vh->vol_id) > tmp_sv->vol_id)
+			p = &(*p)->rb_left;
+		else if (be32_to_cpu(new_vh->vol_id) < tmp_sv->vol_id)
+			p = &(*p)->rb_right;
+		else {
+			found = 1;
+			break;
+		}
+	}
+
+	if (found)
+		sv = tmp_sv;
+	else {
+		ubi_err("Orphaned volume in checkpoint pool!");
+
+		return -EINVAL;
+	}
+
+	ubi_assert(be32_to_cpu(new_vh->vol_id) == sv->vol_id);
+
+	return update_vol(ubi, si, sv, new_vh, new_seb);
+}
+
+/**
+ * scan_pool - scans a pool for changed (no longer empty PEBs)
+ * @ubi: UBI device object
+ * @si: scan info object
+ * @pebs: an array of all PEB numbers in the to be scanned pool
+ * @pool_size: size of the pool (number of entries in @pebs)
+ * @max_sqnum2: pointer to the maximal sequence number
+ */
+static int scan_pool(struct ubi_device *ubi, struct ubi_scan_info *si,
+	int *pebs, int pool_size, unsigned long long *max_sqnum2)
+{
+	struct ubi_vid_hdr *vh;
+	struct ubi_scan_leb *new_seb;
+	int i;
+	int pnum;
+	int err;
+
+	vh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL);
+	if (!vh)
+		return -ENOMEM;
+
+	/*
+	 * Now scan all PEBs in the pool to find changes which have been made
+	 * after the creation of the checkpoint
+	 */
+	for (i = 0; i < pool_size; i++) {
+		pnum = be32_to_cpu(pebs[i]);
+		err = ubi_io_read_vid_hdr(ubi, pnum, vh, 0);
+
+		if (err == UBI_IO_FF)
+			continue;
+		else if (err == 0) {
+			dbg_bld("PEB %i is no longer free, scanning it!", pnum);
+
+			new_seb = kmem_cache_alloc(si->scan_leb_slab,
+				GFP_KERNEL);
+			if (!new_seb) {
+				ubi_free_vid_hdr(ubi, vh);
+
+				return -ENOMEM;
+			}
+
+			new_seb->ec = -1;
+			new_seb->pnum = pnum;
+			new_seb->lnum = be32_to_cpu(vh->lnum);
+			new_seb->sqnum = be64_to_cpu(vh->sqnum);
+			new_seb->copy_flag = vh->copy_flag;
+			new_seb->scrub = 0;
+
+			err = process_pool_seb(ubi, si, vh, new_seb);
+			if (err) {
+				ubi_free_vid_hdr(ubi, vh);
+				return err;
+			}
+
+			if (*max_sqnum2 < new_seb->sqnum)
+				*max_sqnum2 = new_seb->sqnum;
+		} else {
+			/* We are paranoid and fall back to scanning mode */
+			ubi_err("Checkpoint pool PEBs contains damaged PEBs!");
+			ubi_free_vid_hdr(ubi, vh);
+			return err;
+		}
+
+	}
+	ubi_free_vid_hdr(ubi, vh);
+
+	return 0;
+}
+
+/**
+ * ubi_scan_checkpoint - creates ubi_scan_info from a checkpoint.
+ * @ubi: UBI device object
+ * @cp_raw: the checkpoint it self al byte array
+ * @cp_size: size of the checkpoint in bytes
+ */
+struct ubi_scan_info *ubi_scan_checkpoint(struct ubi_device *ubi,
+					  char *cp_raw,
+					  size_t cp_size)
+{
+	struct list_head used;
+	struct ubi_scan_volume *sv;
+	struct ubi_scan_leb *seb, *tmp_seb, *_tmp_seb;
+	struct ubi_scan_info *si;
+	int i, j;
+
+	size_t cp_pos = 0;
+	struct ubi_cp_sb *cpsb;
+	struct ubi_cp_hdr *cphdr;
+	struct ubi_cp_long_pool *cplpl;
+	struct ubi_cp_short_pool *cpspl;
+	struct ubi_cp_unk_pool *cpupl;
+	struct ubi_cp_ec *cpec;
+	struct ubi_cp_volhdr *cpvhdr;
+	struct ubi_cp_eba *cp_eba;
+
+	unsigned long long max_sqnum2 = 0;
+
+	si = kzalloc(sizeof(struct ubi_scan_info), GFP_KERNEL);
+	if (!si)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&used);
+	INIT_LIST_HEAD(&si->corr);
+	INIT_LIST_HEAD(&si->free);
+	INIT_LIST_HEAD(&si->erase);
+	INIT_LIST_HEAD(&si->alien);
+	si->volumes = RB_ROOT;
+	si->min_ec = UBI_MAX_ERASECOUNTER;
+
+	si->scan_leb_slab = kmem_cache_create("ubi_scan_leb_slab",
+					      sizeof(struct ubi_scan_leb),
+					      0, 0, NULL);
+	if (!si->scan_leb_slab)
+		goto fail;
+
+	cpsb = (struct ubi_cp_sb *)(cp_raw);
+	si->max_sqnum = cpsb->sqnum;
+	cp_pos += sizeof(struct ubi_cp_sb);
+	if (cp_pos >= cp_size)
+		goto fail;
+
+	cphdr = (struct ubi_cp_hdr *)(cp_raw + cp_pos);
+	cp_pos += sizeof(*cphdr);
+
+	if (cphdr->magic != UBI_CP_HDR_MAGIC)
+		goto fail;
+
+	cplpl = (struct ubi_cp_long_pool *)(cp_raw + cp_pos);
+	cp_pos += sizeof(*cplpl);
+	if (cplpl->magic != UBI_CP_LPOOL_MAGIC)
+		goto fail;
+
+	cpspl = (struct ubi_cp_short_pool *)(cp_raw + cp_pos);
+	cp_pos += sizeof(*cpspl);
+	if (cpspl->magic != UBI_CP_SPOOL_MAGIC)
+		goto fail;
+
+	cpupl = (struct ubi_cp_unk_pool *)(cp_raw + cp_pos);
+	cp_pos += sizeof(*cpupl);
+	if (cpupl->magic != UBI_CP_UPOOL_MAGIC)
+		goto fail;
+
+	/* read EC values from free list */
+	for (i = 0; i < be32_to_cpu(cphdr->nfree); i++) {
+		cpec = (struct ubi_cp_ec *)(cp_raw + cp_pos);
+		cp_pos += sizeof(*cpec);
+		if (cp_pos >= cp_size)
+			goto fail;
+
+		add_seb(si, &si->free, be32_to_cpu(cpec->pnum),
+			be32_to_cpu(cpec->ec));
+	}
+
+	/* read EC values from used list */
+	for (i = 0; i < be32_to_cpu(cphdr->nused); i++) {
+		cpec = (struct ubi_cp_ec *)(cp_raw + cp_pos);
+		cp_pos += sizeof(*cpec);
+		if (cp_pos >= cp_size)
+			goto fail;
+
+		add_seb(si, &used, be32_to_cpu(cpec->pnum),
+			be32_to_cpu(cpec->ec));
+	}
+
+	si->mean_ec = div_u64(si->ec_sum, si->ec_count);
+
+	/* Iterate over all volumes and read their EBA table */
+	for (i = 0; i < be32_to_cpu(cphdr->nvol); i++) {
+		cpvhdr = (struct ubi_cp_volhdr *)(cp_raw + cp_pos);
+		cp_pos += sizeof(*cpvhdr);
+
+		if (cpvhdr->magic != UBI_CP_VHDR_MAGIC)
+			goto fail;
+
+		sv = add_vol(si, be32_to_cpu(cpvhdr->vol_id),
+			be32_to_cpu(cpvhdr->used_ebs),
+			be32_to_cpu(cpvhdr->data_pad),
+			cpvhdr->vol_type, be32_to_cpu(cpvhdr->last_eb_bytes));
+
+		if (!sv)
+			goto fail;
+
+		si->vols_found++;
+		if (si->highest_vol_id < be32_to_cpu(cpvhdr->vol_id))
+			si->highest_vol_id = be32_to_cpu(cpvhdr->vol_id);
+
+		for (j = 0; j < be32_to_cpu(cpvhdr->used_ebs); j++) {
+			cp_eba = (struct ubi_cp_eba *)(cp_raw + cp_pos);
+			cp_pos += sizeof(*cp_eba);
+			if (cp_pos >= cp_size)
+				goto fail;
+
+			if ((int)be32_to_cpu(cp_eba->pnum) < 0)
+				continue;
+
+			seb = NULL;
+			list_for_each_entry(tmp_seb, &used, u.list) {
+				if (tmp_seb->pnum == be32_to_cpu(cp_eba->pnum))
+					seb = tmp_seb;
+			}
+
+			/* Not good, a EBA entry points to a PEB which is not
+			 * n our used list */
+			if (!seb)
+				goto fail;
+
+			seb->lnum = be32_to_cpu(cp_eba->lnum);
+			assign_seb_to_sv(si, seb, sv);
+
+			dbg_bld("Inserting pnum %i (leb %i) to vol %i",
+				seb->pnum, seb->lnum, sv->vol_id);
+		}
+	}
+
+	/*
+	 * The remainning PEB in the used list are not used.
+	 * They lived in the checkpoint pool but got never used.
+	 */
+	list_for_each_entry_safe(tmp_seb, _tmp_seb, &used, u.list) {
+		list_del(&tmp_seb->u.list);
+		list_add_tail(&tmp_seb->u.list, &si->free);
+	}
+
+	if (scan_pool(ubi, si, cplpl->pebs, be32_to_cpu(cplpl->size),
+			&max_sqnum2) < 0)
+		goto fail;
+	if (scan_pool(ubi, si, cpspl->pebs, be32_to_cpu(cpspl->size),
+			&max_sqnum2) < 0)
+		goto fail;
+	if (scan_pool(ubi, si, cpupl->pebs, be32_to_cpu(cpupl->size),
+			&max_sqnum2) < 0)
+		goto fail;
+
+	if (max_sqnum2 > si->max_sqnum)
+		si->max_sqnum = max_sqnum2;
+
+	return si;
+
+fail:
+	ubi_scan_destroy_si(si);
+	return NULL;
+}
+
+/**
+ * ubi_read_checkpoint - read the checkpoint
+ * @ubi: UBI device object
+ * @cb_sb_pnum: PEB number of the checkpoint super block
+ */
+struct ubi_scan_info *ubi_read_checkpoint(struct ubi_device *ubi,
+					  int cb_sb_pnum)
+{
+	struct ubi_cp_sb *cpsb;
+	struct ubi_vid_hdr *vh;
+	int ret, i, nblocks;
+	char *cp_raw;
+	size_t cp_size;
+	__be32 data_crc;
+	unsigned long long sqnum = 0;
+	struct ubi_scan_info *si = NULL;
+
+	cpsb = kmalloc(sizeof(*cpsb), GFP_KERNEL);
+	if (!cpsb) {
+		si = ERR_PTR(-ENOMEM);
+
+		goto out;
+	}
+
+	ret = ubi_io_read(ubi, cpsb, cb_sb_pnum, ubi->leb_start, sizeof(*cpsb));
+	if (ret) {
+		ubi_err("Unable to read checkpoint super block");
+		si = ERR_PTR(ret);
+		kfree(cpsb);
+
+		goto out;
+	}
+
+	if (cpsb->magic != UBI_CP_SB_MAGIC) {
+		ubi_err("Super block magic does not match");
+		si = ERR_PTR(-EINVAL);
+		kfree(cpsb);
+
+		goto out;
+	}
+
+	if (cpsb->version != UBI_CP_FMT_VERSION) {
+		ubi_err("Unknown checkpoint format version!");
+		si = ERR_PTR(-EINVAL);
+		kfree(cpsb);
+
+		goto out;
+	}
+
+	nblocks = be32_to_cpu(cpsb->nblocks);
+
+	if (nblocks > UBI_CP_MAX_BLOCKS || nblocks < 1) {
+		ubi_err("Number of checkpoint blocks is invalid");
+		si = ERR_PTR(-EINVAL);
+		kfree(cpsb);
+
+		goto out;
+	}
+
+	cp_size = ubi->leb_size * nblocks;
+	/* cp_raw will contain the whole checkpoint */
+	cp_raw = vzalloc(cp_size);
+	if (!cp_raw) {
+		si = ERR_PTR(-ENOMEM);
+		kfree(cpsb);
+	}
+
+	vh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL);
+	if (!vh) {
+		si = ERR_PTR(-ENOMEM);
+		kfree(cpsb);
+
+		goto free_raw;
+	}
+
+	for (i = 0; i < nblocks; i++) {
+		ret = ubi_io_read_vid_hdr(ubi, be32_to_cpu(cpsb->block_loc[i]),
+			vh, 0);
+		if (ret) {
+			ubi_err("Unable to read checkpoint block# %i (PEB: %i)",
+				i, be32_to_cpu(cpsb->block_loc[i]));
+			si = ERR_PTR(ret);
+
+			goto free_vhdr;
+		}
+
+		if (i == 0) {
+			if (be32_to_cpu(vh->vol_id) != UBI_CP_SB_VOLUME_ID) {
+				si = ERR_PTR(-EINVAL);
+
+				goto free_vhdr;
+			}
+		} else {
+			if (be32_to_cpu(vh->vol_id) != UBI_CP_DATA_VOLUME_ID) {
+				goto free_vhdr;
+
+				si = ERR_PTR(-EINVAL);
+			}
+		}
+
+		if (sqnum < be64_to_cpu(vh->sqnum))
+			sqnum = be64_to_cpu(vh->sqnum);
+
+		ret = ubi_io_read(ubi, cp_raw + (ubi->leb_size * i),
+			be32_to_cpu(cpsb->block_loc[i]),
+			ubi->leb_start, ubi->leb_size);
+
+		if (ret) {
+			ubi_err("Unable to read checkpoint block# %i (PEB: %i)",
+				i, be32_to_cpu(cpsb->block_loc[i]));
+			si = ERR_PTR(ret);
+
+			goto free_vhdr;
+		}
+	}
+
+	kfree(cpsb);
+
+	cpsb = (struct ubi_cp_sb *)cp_raw;
+	data_crc = crc32_be(UBI_CRC32_INIT, cp_raw + sizeof(*cpsb),
+		cp_size - sizeof(*cpsb));
+	if (data_crc != cpsb->data_crc) {
+		ubi_err("Checkpoint data CRC is invalid");
+		si = ERR_PTR(-EINVAL);
+
+		goto free_vhdr;
+	}
+
+	cpsb->sqnum = sqnum;
+
+	si = ubi_scan_checkpoint(ubi, cp_raw, cp_size);
+	if (!si) {
+		si = ERR_PTR(-EINVAL);
+
+		goto free_vhdr;
+	}
+
+	/* Store the checkpoint position into the ubi_device struct */
+	ubi->cp = kmalloc(sizeof(struct ubi_checkpoint), GFP_KERNEL);
+	if (!ubi->cp) {
+		si = ERR_PTR(-ENOMEM);
+		ubi_scan_destroy_si(si);
+
+		goto free_vhdr;
+	}
+
+	ubi->cp->size = cp_size;
+	ubi->cp->used_blocks = nblocks;
+
+	for (i = 0; i < UBI_CP_MAX_BLOCKS; i++) {
+		if (i < nblocks) {
+			ubi->cp->peb[i] = be32_to_cpu(cpsb->block_loc[i]);
+			ubi->cp->ec[i] = be32_to_cpu(cpsb->block_ec[i]);
+		} else {
+			ubi->cp->peb[i] = -1;
+			ubi->cp->ec[i] = 0;
+		}
+	}
+
+free_vhdr:
+	ubi_free_vid_hdr(ubi, vh);
+free_raw:
+	vfree(cp_raw);
+out:
+	return si;
+}
+
+/**
+ * ubi_find_checkpoint - searches the first UBI_CP_MAX_START PEBs for the
+ * checkpoint super block.
+ * @ubi: UBI device object
+ */
+int ubi_find_checkpoint(struct ubi_device *ubi)
+{
+	int i, ret;
+	int cp_sb = -ENOENT;
+	struct ubi_vid_hdr *vhdr;
+
+	vhdr = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL);
+	if (!vhdr)
+		return -ENOMEM;
+
+	for (i = 0; i < UBI_CP_MAX_START; i++) {
+		ret = ubi_io_read_vid_hdr(ubi, i, vhdr, 0);
+		/* ignore read errors */
+		if (ret)
+			continue;
+
+		if (be32_to_cpu(vhdr->vol_id) == UBI_CP_SB_VOLUME_ID) {
+			cp_sb = i;
+			break;
+		}
+	}
+
+	ubi_free_vid_hdr(ubi, vhdr);
+	return cp_sb;
+}
+
+/**
+ * ubi_write_checkpoint - writes a checkpoint
+ * @ubi: UBI device object
+ * @new_cp: the to be written checkppoint
+ */
+static int ubi_write_checkpoint(struct ubi_device *ubi,
+				struct ubi_checkpoint *new_cp)
+{
+	int ret;
+	size_t cp_pos = 0;
+	char *cp_raw;
+	int i, j;
+
+	struct ubi_cp_sb *cpsb;
+	struct ubi_cp_hdr *cph;
+	struct ubi_cp_long_pool *cplpl;
+	struct ubi_cp_short_pool *cpspl;
+	struct ubi_cp_unk_pool *cpupl;
+	struct ubi_cp_ec *cec;
+	struct ubi_cp_volhdr *cvh;
+	struct ubi_cp_eba *ceba;
+
+	struct rb_node *node;
+	struct ubi_wl_entry *wl_e;
+	struct ubi_volume *vol;
+
+	struct ubi_vid_hdr *svhdr, *dvhdr;
+
+	int nfree, nused, nvol;
+
+	cp_raw = vzalloc(new_cp->size);
+	if (!cp_raw) {
+		ret = -ENOMEM;
+
+		goto out;
+	}
+
+	svhdr = new_cp_vhdr(ubi, UBI_CP_SB_VOLUME_ID);
+	if (!svhdr) {
+		ret = -ENOMEM;
+
+		goto out_vfree;
+	}
+
+	dvhdr = new_cp_vhdr(ubi, UBI_CP_DATA_VOLUME_ID);
+	if (!dvhdr) {
+		ret = -ENOMEM;
+
+		goto out_kfree;
+	}
+
+	ubi_flush_prot_queue(ubi);
+
+	spin_lock(&ubi->volumes_lock);
+	spin_lock(&ubi->wl_lock);
+
+	cpsb = (struct ubi_cp_sb *)cp_raw;
+	cp_pos += sizeof(*cpsb);
+	ubi_assert(cp_pos <= new_cp->size);
+
+	cph = (struct ubi_cp_hdr *)(cp_raw + cp_pos);
+	cp_pos += sizeof(*cph);
+	ubi_assert(cp_pos <= new_cp->size);
+
+	cpsb->magic = UBI_CP_SB_MAGIC;
+	cpsb->version = UBI_CP_FMT_VERSION;
+	cpsb->nblocks = cpu_to_be32(new_cp->used_blocks);
+	/* the max sqnum will be filled in while *reading* the checkpoint */
+	cpsb->sqnum = 0;
+
+	cph->magic = UBI_CP_HDR_MAGIC;
+	nfree = 0;
+	nused = 0;
+	nvol = 0;
+
+	cplpl = (struct ubi_cp_long_pool *)(cp_raw + cp_pos);
+	cp_pos += sizeof(*cplpl);
+	cplpl->magic = UBI_CP_LPOOL_MAGIC;
+	cplpl->size = cpu_to_be32(ubi->long_pool.size);
+
+	cpspl = (struct ubi_cp_short_pool *)(cp_raw + cp_pos);
+	cp_pos += sizeof(*cpspl);
+	cpspl->magic = UBI_CP_SPOOL_MAGIC;
+	cpspl->size = cpu_to_be32(ubi->short_pool.size);
+
+	cpupl = (struct ubi_cp_unk_pool *)(cp_raw + cp_pos);
+	cp_pos += sizeof(*cpupl);
+	cpupl->magic = UBI_CP_UPOOL_MAGIC;
+	cpupl->size = cpu_to_be32(ubi->unk_pool.size);
+
+	for (i = 0; i < ubi->long_pool.size; i++)
+		cplpl->pebs[i] = cpu_to_be32(ubi->long_pool.pebs[i]);
+
+	for (i = 0; i < ubi->short_pool.size; i++)
+		cpspl->pebs[i] = cpu_to_be32(ubi->short_pool.pebs[i]);
+
+	for (i = 0; i < ubi->unk_pool.size; i++)
+		cpupl->pebs[i] = cpu_to_be32(ubi->unk_pool.pebs[i]);
+
+	for (node = rb_first(&ubi->free); node; node = rb_next(node)) {
+		wl_e = rb_entry(node, struct ubi_wl_entry, u.rb);
+		cec = (struct ubi_cp_ec *)(cp_raw + cp_pos);
+
+		cec->pnum = cpu_to_be32(wl_e->pnum);
+		cec->ec = cpu_to_be32(wl_e->ec);
+
+		nfree++;
+		cp_pos += sizeof(*cec);
+		ubi_assert(cp_pos <= new_cp->size);
+	}
+	cph->nfree = cpu_to_be32(nfree);
+
+	for (node = rb_first(&ubi->used); node; node = rb_next(node)) {
+		wl_e = rb_entry(node, struct ubi_wl_entry, u.rb);
+		cec = (struct ubi_cp_ec *)(cp_raw + cp_pos);
+
+		cec->pnum = cpu_to_be32(wl_e->pnum);
+		cec->ec = cpu_to_be32(wl_e->ec);
+
+		nused++;
+		cp_pos += sizeof(*cec);
+		ubi_assert(cp_pos <= new_cp->size);
+	}
+	cph->nused = cpu_to_be32(nused);
+
+	for (i = 0; i < UBI_MAX_VOLUMES + UBI_INT_VOL_COUNT; i++) {
+		vol = ubi->volumes[i];
+
+		if (!vol)
+			continue;
+
+		nvol++;
+
+		cvh = (struct ubi_cp_volhdr *)(cp_raw + cp_pos);
+		cp_pos += sizeof(*cvh);
+		ubi_assert(cp_pos <= new_cp->size);
+
+		cvh->magic = UBI_CP_VHDR_MAGIC;
+		cvh->vol_id = cpu_to_be32(vol->vol_id);
+		cvh->vol_type = vol->vol_type;
+		cvh->used_ebs = cpu_to_be32(vol->used_ebs);
+		cvh->data_pad = cpu_to_be32(vol->data_pad);
+		cvh->last_eb_bytes = cpu_to_be32(vol->last_eb_bytes);
+
+		ubi_assert(vol->vol_type == UBI_DYNAMIC_VOLUME ||
+			vol->vol_type == UBI_STATIC_VOLUME);
+
+		for (j = 0; j < vol->used_ebs; j++) {
+			ceba = (struct ubi_cp_eba *)(cp_raw + cp_pos);
+
+			ceba->lnum = cpu_to_be32(j);
+			ceba->pnum = cpu_to_be32(vol->eba_tbl[j]);
+
+			cp_pos += sizeof(*ceba);
+			ubi_assert(cp_pos <= new_cp->size);
+		}
+	}
+	cph->nvol = cpu_to_be32(nvol);
+
+	svhdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
+	svhdr->lnum = 0;
+
+	spin_unlock(&ubi->wl_lock);
+	spin_unlock(&ubi->volumes_lock);
+
+	dbg_bld("Writing checkpoint SB to PEB %i\n", new_cp->peb[0]);
+	ret = ubi_io_write_vid_hdr(ubi, new_cp->peb[0], svhdr);
+	if (ret) {
+		ubi_err("Unable to write vid_hdr to checkpoint SB!\n");
+
+		goto out_kfree;
+	}
+
+	for (i = 0; i < UBI_CP_MAX_BLOCKS; i++) {
+		cpsb->block_loc[i] = cpu_to_be32(new_cp->peb[i]);
+		cpsb->block_ec[i] = cpu_to_be32(new_cp->ec[i]);
+	}
+
+	cpsb->data_crc = 0;
+	cpsb->data_crc = crc32_be(UBI_CRC32_INIT, cp_raw + sizeof(*cpsb),
+		new_cp->size - sizeof(*cpsb));
+
+	for (i = 1; i < new_cp->used_blocks; i++) {
+		dvhdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
+		dvhdr->lnum = cpu_to_be32(i);
+		dbg_bld("Writing checkpoint data to PEB %i sqnum %llu\n",
+			new_cp->peb[i], be64_to_cpu(dvhdr->sqnum));
+		ret = ubi_io_write_vid_hdr(ubi, new_cp->peb[i], dvhdr);
+		if (ret) {
+			ubi_err("Unable to write vid_hdr to PEB %i!\n",
+				new_cp->peb[i]);
+
+			goto out_kfree;
+		}
+	}
+
+	for (i = 0; i < new_cp->used_blocks; i++) {
+		ret = ubi_io_write(ubi, cp_raw + (i * ubi->leb_size),
+			new_cp->peb[i], ubi->leb_start, ubi->leb_size);
+		if (ret) {
+			ubi_err("Unable to write checkpoint to PEB %i!\n",
+				new_cp->peb[i]);
+
+			goto out_kfree;
+		}
+	}
+
+	ubi_assert(new_cp);
+	ubi->cp = new_cp;
+
+	dbg_bld("Checkpoint written!");
+
+out_kfree:
+	kfree(svhdr);
+out_vfree:
+	vfree(cp_raw);
+out:
+	return ret;
+}
+
+/**
+ * get_ec - returns the erase counter of a given PEB
+ * @ubi: UBI device object
+ * @pnum: PEB number
+ */
+static int get_ec(struct ubi_device *ubi, int pnum)
+{
+	struct ubi_wl_entry *e;
+
+	e = ubi->lookuptbl[pnum];
+
+	/* can this really happen? */
+	if (!e)
+		return ubi->mean_ec ?: 1;
+	else
+		return e->ec;
+}
+
+/**
+ * ubi_update_checkpoint - will be called by UBI if a volume changes or
+ * a checkpoint pool becomes full.
+ * @ubi: UBI device object
+ */
+int ubi_update_checkpoint(struct ubi_device *ubi)
+{
+	int ret, i;
+	struct ubi_checkpoint *new_cp;
+
+	if (ubi->ro_mode)
+		return 0;
+
+	new_cp = kmalloc(sizeof(*new_cp), GFP_KERNEL);
+	if (!new_cp)
+		return -ENOMEM;
+
+	ubi->old_cp = ubi->cp;
+	ubi->cp = NULL;
+
+	if (ubi->old_cp) {
+		new_cp->peb[0] = ubi_wl_get_cp_peb(ubi, UBI_CP_MAX_START);
+		/* no fresh early PEB was found, reuse the old one */
+		if (new_cp->peb[0] < 0) {
+			struct ubi_ec_hdr *ec_hdr;
+
+			ec_hdr = kmalloc(sizeof(*ec_hdr), GFP_KERNEL);
+			if (!ec_hdr) {
+				kfree(new_cp);
+				return -ENOMEM;
+			}
+
+			/* we have to erase the block by hand */
+
+			ret = ubi_io_read_ec_hdr(ubi, ubi->old_cp->peb[0],
+				ec_hdr, 0);
+			if (ret) {
+				ubi_err("Unable to read EC header");
+
+				kfree(new_cp);
+				kfree(ec_hdr);
+				return -EINVAL;
+			}
+
+			ret = ubi_io_sync_erase(ubi, ubi->old_cp->peb[0], 0);
+			if (ret < 0) {
+				ubi_err("Unable to erase old SB");
+
+				kfree(new_cp);
+				kfree(ec_hdr);
+				return -EINVAL;
+			}
+
+			ec_hdr->ec += ret;
+			if (ret > UBI_MAX_ERASECOUNTER) {
+				ubi_err("Erase counter overflow!");
+				kfree(new_cp);
+				kfree(ec_hdr);
+				return -EINVAL;
+			}
+
+			ret = ubi_io_write_ec_hdr(ubi, ubi->old_cp->peb[0],
+				ec_hdr);
+			kfree(ec_hdr);
+			if (ret) {
+				ubi_err("Unable to write new EC header");
+				kfree(new_cp);
+				return -EINVAL;
+			}
+
+			new_cp->peb[0] = ubi->old_cp->peb[0];
+			new_cp->ec[0] = ubi->old_cp->ec[0];
+		} else {
+			/* we've got a new early PEB, return the old one */
+			ubi_wl_put_cp_peb(ubi, ubi->old_cp->peb[0], 0);
+			new_cp->ec[0] = get_ec(ubi, new_cp->peb[0]);
+		}
+
+		/* return all other checkpoint block to the wl system */
+		for (i = 1; i < UBI_CP_MAX_BLOCKS; i++) {
+			if (ubi->old_cp->peb[i] >= 0)
+				ubi_wl_put_cp_peb(ubi, ubi->old_cp->peb[i], 0);
+			else
+				break;
+		}
+	} else {
+		new_cp->peb[0] = ubi_wl_get_cp_peb(ubi, UBI_CP_MAX_START);
+		if (new_cp->peb[0] < 0) {
+			ubi_err("Could not find an early PEB");
+			kfree(new_cp);
+			return -ENOSPC;
+		}
+		new_cp->ec[0] = get_ec(ubi, new_cp->peb[0]);
+	}
+
+	new_cp->size = sizeof(struct ubi_cp_hdr) + \
+			sizeof(struct ubi_cp_long_pool) + \
+			sizeof(struct ubi_cp_short_pool) + \
+			sizeof(struct ubi_cp_unk_pool) + \
+			ubi->peb_count * (sizeof(struct ubi_cp_ec) + \
+			sizeof(struct ubi_cp_eba)) + \
+			sizeof(struct ubi_cp_volhdr) * UBI_MAX_VOLUMES;
+	new_cp->size = roundup(new_cp->size, ubi->leb_size);
+
+	new_cp->used_blocks = new_cp->size / ubi->leb_size;
+
+	if (new_cp->used_blocks > UBI_CP_MAX_BLOCKS) {
+		ubi_err("Checkpoint too large");
+		kfree(new_cp);
+
+		return -ENOSPC;
+	}
+
+	/* give the wl subsystem a chance to produce some free blocks */
+	cond_resched();
+
+	for (i = 1; i < UBI_CP_MAX_BLOCKS; i++) {
+		if (i < new_cp->used_blocks) {
+			new_cp->peb[i] = ubi_wl_get_cp_peb(ubi, INT_MAX);
+			if (new_cp->peb[i] < 0) {
+				ubi_err("Could not get any free erase block");
+
+				while (i--)
+					ubi_wl_put_cp_peb(ubi, new_cp->peb[i],
+						0);
+
+				kfree(new_cp);
+
+				return -ENOSPC;
+			}
+
+			new_cp->ec[i] = get_ec(ubi, new_cp->peb[i]);
+		} else {
+			new_cp->peb[i] = -1;
+			new_cp->ec[i] = 0;
+		}
+	}
+
+	kfree(ubi->old_cp);
+	ubi->old_cp = NULL;
+
+	return ubi_write_checkpoint(ubi, new_cp);
+}
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index 5d4c1d3..7d04008 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -1011,7 +1011,15 @@  static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si,
 	}
 
 	vol_id = be32_to_cpu(vidh->vol_id);
-	if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOLUME_ID) {
+#ifdef CONFIG_MTD_UBI_CHECKPOINT
+	if (vol_id > UBI_MAX_VOLUMES &&
+		vol_id != UBI_LAYOUT_VOLUME_ID &&
+		vol_id != UBI_CP_SB_VOLUME_ID &&
+		vol_id != UBI_CP_DATA_VOLUME_ID)
+#else
+	if (vol_id > UBI_MAX_VOLUMES && vol_id != UBI_LAYOUT_VOLUME_ID)
+#endif
+	{
 		int lnum = be32_to_cpu(vidh->lnum);
 
 		/* Unsupported internal volume */
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index df267bb..8d44152 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -625,11 +625,19 @@  int ubi_enumerate_volumes(struct notifier_block *nb);
 void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di);
 void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol,
 			    struct ubi_volume_info *vi);
-
 /* scan.c */
 int ubi_compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb,
 		      int pnum, const struct ubi_vid_hdr *vid_hdr);
 
+#ifdef CONFIG_MTD_UBI_CHECKPOINT
+/* checkpoint.c */
+int ubi_update_checkpoint(struct ubi_device *ubi);
+struct ubi_scan_info *ubi_read_checkpoint(struct ubi_device *ubi,
+	int cb_sb_pnum);
+int ubi_update_checkpoint(struct ubi_device *ubi);
+int ubi_find_checkpoint(struct ubi_device *ubi);
+#endif
+
 /*
  * ubi_rb_for_each_entry - walk an RB-tree.
  * @rb: a pointer to type 'struct rb_node' to use as a loop counter