From: Dennis Groenen Date: Fri, 30 Sep 2011 19:46:20 +0000 (+0200) Subject: Backported UBIFS patches by Peter Hunt X-Git-Url: https://vcs.maemo.org/git/?p=kernel-bfs;a=commitdiff_plain;h=48566248ca4b3068029c3b408ad775dcc8ba9596 Backported UBIFS patches by Peter Hunt --- diff --git a/kernel-bfs-2.6.28/debian/patches/extra/ubifs.diff b/kernel-bfs-2.6.28/debian/patches/extra/ubifs.diff new file mode 100644 index 0000000..81bfa46 --- /dev/null +++ b/kernel-bfs-2.6.28/debian/patches/extra/ubifs.diff @@ -0,0 +1,15327 @@ +diff -uprN linux-2.6.28/arch/x86/include/asm/proto.h ubifs-v2.6.28/arch/x86/include/asm/proto.h +--- linux-2.6.28/arch/x86/include/asm/proto.h 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/arch/x86/include/asm/proto.h 2011-06-15 14:22:06.000000000 -0400 +@@ -26,7 +26,4 @@ static const int reboot_force = 0; + + long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); + +-#define round_up(x, y) (((x) + (y) - 1) & ~((y) - 1)) +-#define round_down(x, y) ((x) & ~((y) - 1)) +- + #endif /* _ASM_X86_PROTO_H */ +diff -uprN linux-2.6.28/Documentation/filesystems/ubifs.txt ubifs-v2.6.28/Documentation/filesystems/ubifs.txt +--- linux-2.6.28/Documentation/filesystems/ubifs.txt 2011-06-15 15:12:26.000000000 -0400 ++++ ubifs-v2.6.28/Documentation/filesystems/ubifs.txt 2011-06-15 14:22:04.000000000 -0400 +@@ -82,12 +82,12 @@ Mount options + bulk_read read more in one go to take advantage of flash + media that read faster sequentially + no_bulk_read (*) do not bulk-read +-no_chk_data_crc skip checking of CRCs on data nodes in order to ++no_chk_data_crc (*) skip checking of CRCs on data nodes in order to + improve read performance. Use this option only + if the flash media is highly reliable. The effect + of this option is that corruption of the contents + of a file can go unnoticed. +-chk_data_crc (*) do not skip checking CRCs on data nodes ++chk_data_crc do not skip checking CRCs on data nodes + compr=none override default compressor and set it to "none" + compr=lzo override default compressor and set it to "lzo" + compr=zlib override default compressor and set it to "zlib" +@@ -115,28 +115,8 @@ ubi.mtd=0 root=ubi0:rootfs rootfstype=ub + Module Parameters for Debugging + =============================== + +-When UBIFS has been compiled with debugging enabled, there are 3 module ++When UBIFS has been compiled with debugging enabled, there are 2 module + parameters that are available to control aspects of testing and debugging. +-The parameters are unsigned integers where each bit controls an option. +-The parameters are: +- +-debug_msgs Selects which debug messages to display, as follows: +- +- Message Type Flag value +- +- General messages 1 +- Journal messages 2 +- Mount messages 4 +- Commit messages 8 +- LEB search messages 16 +- Budgeting messages 32 +- Garbage collection messages 64 +- Tree Node Cache (TNC) messages 128 +- LEB properties (lprops) messages 256 +- Input/output messages 512 +- Log messages 1024 +- Scan messages 2048 +- Recovery messages 4096 + + debug_chks Selects extra checks that UBIFS can do while running: + +@@ -154,11 +134,9 @@ debug_tsts Selects a mode of testing, as + + Test mode Flag value + +- Force in-the-gaps method 2 + Failure mode for recovery testing 4 + +-For example, set debug_msgs to 5 to display General messages and Mount +-messages. ++For example, set debug_chks to 3 to enable general and TNC checks. + + + References +diff -uprN linux-2.6.28/drivers/char/random.c ubifs-v2.6.28/drivers/char/random.c +--- linux-2.6.28/drivers/char/random.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/drivers/char/random.c 2011-06-15 15:16:03.000000000 -0400 +@@ -1018,12 +1018,6 @@ random_read(struct file *file, char __us + /* like a named pipe */ + } + +- /* +- * If we gave the user some bytes, update the access time. +- */ +- if (count) +- file_accessed(file); +- + return (count ? count : retval); + } + +@@ -1074,7 +1068,6 @@ static ssize_t random_write(struct file + size_t count, loff_t *ppos) + { + size_t ret; +- struct inode *inode = file->f_path.dentry->d_inode; + + ret = write_pool(&blocking_pool, buffer, count); + if (ret) +@@ -1083,8 +1076,6 @@ static ssize_t random_write(struct file + if (ret) + return ret; + +- inode->i_mtime = current_fs_time(inode->i_sb); +- mark_inode_dirty(inode); + return (ssize_t)count; + } + +diff -uprN linux-2.6.28/drivers/mtd/chips/cfi_cmdset_0001.c ubifs-v2.6.28/drivers/mtd/chips/cfi_cmdset_0001.c +--- linux-2.6.28/drivers/mtd/chips/cfi_cmdset_0001.c 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/drivers/mtd/chips/cfi_cmdset_0001.c 2011-06-15 14:22:07.000000000 -0400 +@@ -421,6 +421,7 @@ struct mtd_info *cfi_cmdset_0001(struct + mtd->flags = MTD_CAP_NORFLASH; + mtd->name = map->name; + mtd->writesize = 1; ++ mtd->writebufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize; + + mtd->reboot_notifier.notifier_call = cfi_intelext_reboot; + +diff -uprN linux-2.6.28/drivers/mtd/chips/cfi_cmdset_0002.c ubifs-v2.6.28/drivers/mtd/chips/cfi_cmdset_0002.c +--- linux-2.6.28/drivers/mtd/chips/cfi_cmdset_0002.c 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/drivers/mtd/chips/cfi_cmdset_0002.c 2011-06-15 14:22:07.000000000 -0400 +@@ -346,6 +346,10 @@ struct mtd_info *cfi_cmdset_0002(struct + mtd->flags = MTD_CAP_NORFLASH; + mtd->name = map->name; + mtd->writesize = 1; ++ mtd->writebufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize; ++ ++ DEBUG(MTD_DEBUG_LEVEL3, "MTD %s(): write buffer size %d\n", ++ __func__, mtd->writebufsize); + + if (cfi->cfi_mode==CFI_MODE_CFI){ + unsigned char bootloc; +diff -uprN linux-2.6.28/drivers/mtd/chips/cfi_cmdset_0020.c ubifs-v2.6.28/drivers/mtd/chips/cfi_cmdset_0020.c +--- linux-2.6.28/drivers/mtd/chips/cfi_cmdset_0020.c 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/drivers/mtd/chips/cfi_cmdset_0020.c 2011-06-15 14:22:07.000000000 -0400 +@@ -239,6 +239,7 @@ static struct mtd_info *cfi_staa_setup(s + mtd->resume = cfi_staa_resume; + mtd->flags = MTD_CAP_NORFLASH & ~MTD_BIT_WRITEABLE; + mtd->writesize = 8; /* FIXME: Should be 0 for STMicro flashes w/out ECC */ ++ mtd->writebufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize; + map->fldrv = &cfi_staa_chipdrv; + __module_get(THIS_MODULE); + mtd->name = map->name; +diff -uprN linux-2.6.28/drivers/mtd/devices/mtdram.c ubifs-v2.6.28/drivers/mtd/devices/mtdram.c +--- linux-2.6.28/drivers/mtd/devices/mtdram.c 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/drivers/mtd/devices/mtdram.c 2011-06-15 14:22:07.000000000 -0400 +@@ -109,6 +109,7 @@ int mtdram_init_device(struct mtd_info * + mtd->flags = MTD_CAP_RAM; + mtd->size = size; + mtd->writesize = 1; ++ mtd->writebufsize = 64; /* Mimic CFI NOR flashes */ + mtd->erasesize = MTDRAM_ERASE_SIZE; + mtd->priv = mapped_address; + +diff -uprN linux-2.6.28/drivers/mtd/mtd_blkdevs.c ubifs-v2.6.28/drivers/mtd/mtd_blkdevs.c +--- linux-2.6.28/drivers/mtd/mtd_blkdevs.c 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/drivers/mtd/mtd_blkdevs.c 2011-06-15 14:22:07.000000000 -0400 +@@ -139,7 +139,7 @@ static int blktrans_open(struct block_de + struct mtd_blktrans_ops *tr = dev->tr; + int ret = -ENODEV; + +- if (!try_module_get(dev->mtd->owner)) ++ if (!get_mtd_device(NULL, dev->mtd->index)) + goto out; + + if (!try_module_get(tr->owner)) +@@ -153,7 +153,7 @@ static int blktrans_open(struct block_de + ret = 0; + if (tr->open && (ret = tr->open(dev))) { + dev->mtd->usecount--; +- module_put(dev->mtd->owner); ++ put_mtd_device(dev->mtd); + out_tr: + module_put(tr->owner); + } +@@ -172,7 +172,7 @@ static int blktrans_release(struct gendi + + if (!ret) { + dev->mtd->usecount--; +- module_put(dev->mtd->owner); ++ put_mtd_device(dev->mtd); + module_put(tr->owner); + } + +diff -uprN linux-2.6.28/drivers/mtd/mtdconcat.c ubifs-v2.6.28/drivers/mtd/mtdconcat.c +--- linux-2.6.28/drivers/mtd/mtdconcat.c 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/drivers/mtd/mtdconcat.c 2011-06-15 14:22:07.000000000 -0400 +@@ -698,6 +698,7 @@ struct mtd_info *mtd_concat_create(struc + struct mtd_concat *concat; + u_int32_t max_erasesize, curr_erasesize; + int num_erase_region; ++ int max_writebufsize = 0; + + printk(KERN_NOTICE "Concatenating MTD devices:\n"); + for (i = 0; i < num_devs; i++) +@@ -724,6 +725,12 @@ struct mtd_info *mtd_concat_create(struc + concat->mtd.size = subdev[0]->size; + concat->mtd.erasesize = subdev[0]->erasesize; + concat->mtd.writesize = subdev[0]->writesize; ++ ++ for (i = 0; i < num_devs; i++) ++ if (max_writebufsize < subdev[i]->writebufsize) ++ max_writebufsize = subdev[i]->writebufsize; ++ concat->mtd.writebufsize = max_writebufsize; ++ + concat->mtd.subpage_sft = subdev[0]->subpage_sft; + concat->mtd.oobsize = subdev[0]->oobsize; + concat->mtd.oobavail = subdev[0]->oobavail; +diff -uprN linux-2.6.28/drivers/mtd/mtdpart.c ubifs-v2.6.28/drivers/mtd/mtdpart.c +--- linux-2.6.28/drivers/mtd/mtdpart.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/drivers/mtd/mtdpart.c 2011-06-15 15:16:03.000000000 -0400 +@@ -363,6 +363,7 @@ static struct mtd_part *add_one_partitio + slave->mtd.flags = master->flags & ~part->mask_flags; + slave->mtd.size = part->size; + slave->mtd.writesize = master->writesize; ++ slave->mtd.writebufsize = master->writebufsize; + slave->mtd.oobsize = master->oobsize; + slave->mtd.oobavail = master->oobavail; + slave->mtd.subpage_sft = master->subpage_sft; +diff -uprN linux-2.6.28/drivers/mtd/nand/nand_base.c ubifs-v2.6.28/drivers/mtd/nand/nand_base.c +--- linux-2.6.28/drivers/mtd/nand/nand_base.c 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/drivers/mtd/nand/nand_base.c 2011-06-15 14:22:07.000000000 -0400 +@@ -1084,7 +1084,8 @@ static int nand_do_read_ops(struct mtd_i + + /* Transfer not aligned data */ + if (!aligned) { +- if (!NAND_SUBPAGE_READ(chip) && !oob) ++ if (!NAND_SUBPAGE_READ(chip) && !oob && ++ !(mtd->ecc_stats.failed - stats.failed)) + chip->pagebuf = realpage; + memcpy(buf, chip->buffers->databuf + col, bytes); + } +@@ -2703,6 +2704,7 @@ int nand_scan_tail(struct mtd_info *mtd) + mtd->resume = nand_resume; + mtd->block_isbad = nand_block_isbad; + mtd->block_markbad = nand_block_markbad; ++ mtd->writebufsize = mtd->writesize; + + /* propagate ecc.layout to mtd_info */ + mtd->ecclayout = chip->ecc.layout; +diff -uprN linux-2.6.28/drivers/mtd/nand/nandsim.c ubifs-v2.6.28/drivers/mtd/nand/nandsim.c +--- linux-2.6.28/drivers/mtd/nand/nandsim.c 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/drivers/mtd/nand/nandsim.c 2011-06-15 14:22:07.000000000 -0400 +@@ -1736,13 +1736,17 @@ static void ns_nand_write_byte(struct mt + + /* Check if chip is expecting command */ + if (NS_STATE(ns->nxstate) != STATE_UNKNOWN && !(ns->nxstate & STATE_CMD_MASK)) { +- /* +- * We are in situation when something else (not command) +- * was expected but command was input. In this case ignore +- * previous command(s)/state(s) and accept the last one. +- */ +- NS_WARN("write_byte: command (%#x) wasn't expected, expected state is %s, " +- "ignore previous states\n", (uint)byte, get_state_name(ns->nxstate)); ++ /* Do not warn if only 2 id bytes are read */ ++ if (!(ns->regs.command == NAND_CMD_READID && ++ NS_STATE(ns->state) == STATE_DATAOUT_ID && ns->regs.count == 2)) { ++ /* ++ * We are in situation when something else (not command) ++ * was expected but command was input. In this case ignore ++ * previous command(s)/state(s) and accept the last one. ++ */ ++ NS_WARN("write_byte: command (%#x) wasn't expected, expected state is %s, " ++ "ignore previous states\n", (uint)byte, get_state_name(ns->nxstate)); ++ } + switch_to_ready_state(ns, NS_STATUS_FAILED(ns)); + } + +diff -uprN linux-2.6.28/drivers/mtd/onenand/onenand_base.c ubifs-v2.6.28/drivers/mtd/onenand/onenand_base.c +--- linux-2.6.28/drivers/mtd/onenand/onenand_base.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/drivers/mtd/onenand/onenand_base.c 2011-06-15 15:16:03.000000000 -0400 +@@ -2858,6 +2858,7 @@ int onenand_scan(struct mtd_info *mtd, i + mtd->block_isbad = onenand_block_isbad; + mtd->block_markbad = onenand_block_markbad; + mtd->owner = THIS_MODULE; ++ mtd->writebufsize = mtd->writesize; + + /* Unlock whole block */ + onenand_unlock_all(mtd); +diff -uprN linux-2.6.28/drivers/mtd/ubi/build.c ubifs-v2.6.28/drivers/mtd/ubi/build.c +--- linux-2.6.28/drivers/mtd/ubi/build.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/drivers/mtd/ubi/build.c 2011-06-15 14:22:07.000000000 -0400 +@@ -37,6 +37,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -46,9 +47,16 @@ + /* Maximum length of the 'mtd=' parameter */ + #define MTD_PARAM_LEN_MAX 64 + ++#ifdef CONFIG_MTD_UBI_MODULE ++#define ubi_is_module() 1 ++#else ++#define ubi_is_module() 0 ++#endif ++ + /** + * struct mtd_dev_param - MTD device parameter description data structure. +- * @name: MTD device name or number string ++ * @name: MTD character device node path, MTD device name, or MTD device number ++ * string + * @vid_hdr_offs: VID header offset + */ + struct mtd_dev_param { +@@ -57,10 +65,10 @@ struct mtd_dev_param { + }; + + /* Numbers of elements set in the @mtd_dev_param array */ +-static int mtd_devs; ++static int __initdata mtd_devs; + + /* MTD devices specification parameters */ +-static struct mtd_dev_param mtd_dev_param[UBI_MAX_DEVICES]; ++static struct mtd_dev_param __initdata mtd_dev_param[UBI_MAX_DEVICES]; + + /* Root UBI "class" object (corresponds to '//class/ubi/') */ + struct class *ubi_class; +@@ -122,6 +130,94 @@ static struct device_attribute dev_mtd_n + __ATTR(mtd_num, S_IRUGO, dev_attribute_show, NULL); + + /** ++ * ubi_volume_notify - send a volume change notification. ++ * @ubi: UBI device description object ++ * @vol: volume description object of the changed volume ++ * @ntype: notification type to send (%UBI_VOLUME_ADDED, etc) ++ * ++ * This is a helper function which notifies all subscribers about a volume ++ * change event (creation, removal, re-sizing, re-naming, updating). Returns ++ * zero in case of success and a negative error code in case of failure. ++ */ ++int ubi_volume_notify(struct ubi_device *ubi, struct ubi_volume *vol, int ntype) ++{ ++ struct ubi_notification nt; ++ ++ ubi_do_get_device_info(ubi, &nt.di); ++ ubi_do_get_volume_info(ubi, vol, &nt.vi); ++ return blocking_notifier_call_chain(&ubi_notifiers, ntype, &nt); ++} ++ ++/** ++ * ubi_notify_all - send a notification to all volumes. ++ * @ubi: UBI device description object ++ * @ntype: notification type to send (%UBI_VOLUME_ADDED, etc) ++ * @nb: the notifier to call ++ * ++ * This function walks all volumes of UBI device @ubi and sends the @ntype ++ * notification for each volume. If @nb is %NULL, then all registered notifiers ++ * are called, otherwise only the @nb notifier is called. Returns the number of ++ * sent notifications. ++ */ ++int ubi_notify_all(struct ubi_device *ubi, int ntype, struct notifier_block *nb) ++{ ++ struct ubi_notification nt; ++ int i, count = 0; ++ ++ ubi_do_get_device_info(ubi, &nt.di); ++ ++ mutex_lock(&ubi->device_mutex); ++ for (i = 0; i < ubi->vtbl_slots; i++) { ++ /* ++ * Since the @ubi->device is locked, and we are not going to ++ * change @ubi->volumes, we do not have to lock ++ * @ubi->volumes_lock. ++ */ ++ if (!ubi->volumes[i]) ++ continue; ++ ++ ubi_do_get_volume_info(ubi, ubi->volumes[i], &nt.vi); ++ if (nb) ++ nb->notifier_call(nb, ntype, &nt); ++ else ++ blocking_notifier_call_chain(&ubi_notifiers, ntype, ++ &nt); ++ count += 1; ++ } ++ mutex_unlock(&ubi->device_mutex); ++ ++ return count; ++} ++ ++/** ++ * ubi_enumerate_volumes - send "add" notification for all existing volumes. ++ * @nb: the notifier to call ++ * ++ * This function walks all UBI devices and volumes and sends the ++ * %UBI_VOLUME_ADDED notification for each volume. If @nb is %NULL, then all ++ * registered notifiers are called, otherwise only the @nb notifier is called. ++ * Returns the number of sent notifications. ++ */ ++int ubi_enumerate_volumes(struct notifier_block *nb) ++{ ++ int i, count = 0; ++ ++ /* ++ * Since the @ubi_devices_mutex is locked, and we are not going to ++ * change @ubi_devices, we do not have to lock @ubi_devices_lock. ++ */ ++ for (i = 0; i < UBI_MAX_DEVICES; i++) { ++ struct ubi_device *ubi = ubi_devices[i]; ++ ++ if (!ubi) ++ continue; ++ count += ubi_notify_all(ubi, UBI_VOLUME_ADDED, nb); ++ } ++ ++ return count; ++} ++ ++/** + * ubi_get_device - get UBI device. + * @ubi_num: UBI device number + * +@@ -263,17 +359,23 @@ static ssize_t dev_attribute_show(struct + return ret; + } + +-/* Fake "release" method for UBI devices */ +-static void dev_release(struct device *dev) { } ++static void dev_release(struct device *dev) ++{ ++ struct ubi_device *ubi = container_of(dev, struct ubi_device, dev); ++ ++ kfree(ubi); ++} + + /** + * ubi_sysfs_init - initialize sysfs for an UBI device. + * @ubi: UBI device description object ++ * @ref: set to %1 on exit in case of failure if a reference to @ubi->dev was ++ * taken + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +-static int ubi_sysfs_init(struct ubi_device *ubi) ++static int ubi_sysfs_init(struct ubi_device *ubi, int *ref) + { + int err; + +@@ -285,6 +387,7 @@ static int ubi_sysfs_init(struct ubi_dev + if (err) + return err; + ++ *ref = 1; + err = device_create_file(&ubi->dev, &dev_eraseblock_size); + if (err) + return err; +@@ -340,7 +443,7 @@ static void ubi_sysfs_close(struct ubi_d + } + + /** +- * kill_volumes - destroy all volumes. ++ * kill_volumes - destroy all user volumes. + * @ubi: UBI device description object + */ + static void kill_volumes(struct ubi_device *ubi) +@@ -353,36 +456,29 @@ static void kill_volumes(struct ubi_devi + } + + /** +- * free_user_volumes - free all user volumes. +- * @ubi: UBI device description object +- * +- * Normally the volumes are freed at the release function of the volume device +- * objects. However, on error paths the volumes have to be freed before the +- * device objects have been initialized. +- */ +-static void free_user_volumes(struct ubi_device *ubi) +-{ +- int i; +- +- for (i = 0; i < ubi->vtbl_slots; i++) +- if (ubi->volumes[i]) { +- kfree(ubi->volumes[i]->eba_tbl); +- kfree(ubi->volumes[i]); +- } +-} +- +-/** + * uif_init - initialize user interfaces for an UBI device. + * @ubi: UBI device description object ++ * @ref: set to %1 on exit in case of failure if a reference to @ubi->dev was ++ * taken, otherwise set to %0 ++ * ++ * This function initializes various user interfaces for an UBI device. If the ++ * initialization fails at an early stage, this function frees all the ++ * resources it allocated, returns an error, and @ref is set to %0. However, ++ * if the initialization fails after the UBI device was registered in the ++ * driver core subsystem, this function takes a reference to @ubi->dev, because ++ * otherwise the release function ('dev_release()') would free whole @ubi ++ * object. The @ref argument is set to %1 in this case. The caller has to put ++ * this reference. + * + * This function returns zero in case of success and a negative error code in +- * case of failure. Note, this function destroys all volumes if it failes. ++ * case of failure. + */ +-static int uif_init(struct ubi_device *ubi) ++static int uif_init(struct ubi_device *ubi, int *ref) + { +- int i, err, do_free = 0; ++ int i, err; + dev_t dev; + ++ *ref = 0; + sprintf(ubi->ubi_name, UBI_NAME_STR "%d", ubi->ubi_num); + + /* +@@ -410,7 +506,7 @@ static int uif_init(struct ubi_device *u + goto out_unreg; + } + +- err = ubi_sysfs_init(ubi); ++ err = ubi_sysfs_init(ubi, ref); + if (err) + goto out_sysfs; + +@@ -427,13 +523,12 @@ static int uif_init(struct ubi_device *u + + out_volumes: + kill_volumes(ubi); +- do_free = 0; + out_sysfs: ++ if (*ref) ++ get_device(&ubi->dev); + ubi_sysfs_close(ubi); + cdev_del(&ubi->cdev); + out_unreg: +- if (do_free) +- free_user_volumes(ubi); + unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1); + ubi_err("cannot initialize UBI %s, error %d", ubi->ubi_name, err); + return err; +@@ -493,8 +588,10 @@ static int attach_by_scanning(struct ubi + + ubi->bad_peb_count = si->bad_peb_count; + ubi->good_peb_count = ubi->peb_count - ubi->bad_peb_count; ++ ubi->corr_peb_count = si->corr_peb_count; + ubi->max_ec = si->max_ec; + ubi->mean_ec = si->mean_ec; ++ ubi_msg("max. sequence number: %llu", si->max_sqnum); + + err = ubi_read_volume_table(ubi, si); + if (err) +@@ -567,6 +664,11 @@ static int io_init(struct ubi_device *ub + if (ubi->mtd->block_isbad && ubi->mtd->block_markbad) + ubi->bad_allowed = 1; + ++ if (ubi->mtd->type == MTD_NORFLASH) { ++ ubi_assert(ubi->mtd->writesize == 1); ++ ubi->nor_flash = 1; ++ } ++ + ubi->min_io_size = ubi->mtd->writesize; + ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft; + +@@ -585,11 +687,25 @@ static int io_init(struct ubi_device *ub + ubi_assert(ubi->hdrs_min_io_size <= ubi->min_io_size); + ubi_assert(ubi->min_io_size % ubi->hdrs_min_io_size == 0); + ++ ubi->max_write_size = ubi->mtd->writebufsize; ++ /* ++ * Maximum write size has to be greater or equivalent to min. I/O ++ * size, and be multiple of min. I/O size. ++ */ ++ if (ubi->max_write_size < ubi->min_io_size || ++ ubi->max_write_size % ubi->min_io_size || ++ !is_power_of_2(ubi->max_write_size)) { ++ ubi_err("bad write buffer size %d for %d min. I/O unit", ++ ubi->max_write_size, ubi->min_io_size); ++ return -EINVAL; ++ } ++ + /* Calculate default aligned sizes of EC and VID headers */ + ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE, ubi->hdrs_min_io_size); + ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE, ubi->hdrs_min_io_size); + + dbg_msg("min_io_size %d", ubi->min_io_size); ++ dbg_msg("max_write_size %d", ubi->max_write_size); + dbg_msg("hdrs_min_io_size %d", ubi->hdrs_min_io_size); + dbg_msg("ec_hdr_alsize %d", ubi->ec_hdr_alsize); + dbg_msg("vid_hdr_alsize %d", ubi->vid_hdr_alsize); +@@ -606,7 +722,7 @@ static int io_init(struct ubi_device *ub + } + + /* Similar for the data offset */ +- ubi->leb_start = ubi->vid_hdr_offset + UBI_EC_HDR_SIZE; ++ ubi->leb_start = ubi->vid_hdr_offset + UBI_VID_HDR_SIZE; + ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size); + + dbg_msg("vid_hdr_offset %d", ubi->vid_hdr_offset); +@@ -751,7 +867,7 @@ static int autoresize(struct ubi_device + int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset) + { + struct ubi_device *ubi; +- int i, err, do_free = 1; ++ int i, err, ref = 0; + + /* + * Check if we already have the same MTD device attached. +@@ -814,11 +930,12 @@ int ubi_attach_mtd_dev(struct mtd_info * + + mutex_init(&ubi->buf_mutex); + mutex_init(&ubi->ckvol_mutex); +- mutex_init(&ubi->mult_mutex); +- mutex_init(&ubi->volumes_mutex); ++ mutex_init(&ubi->device_mutex); + spin_lock_init(&ubi->volumes_lock); + + ubi_msg("attaching mtd%d to ubi%d", mtd->index, ubi_num); ++ dbg_msg("sizeof(struct ubi_scan_leb) %zu", sizeof(struct ubi_scan_leb)); ++ dbg_msg("sizeof(struct ubi_wl_entry) %zu", sizeof(struct ubi_wl_entry)); + + err = io_init(ubi); + if (err) +@@ -833,13 +950,6 @@ int ubi_attach_mtd_dev(struct mtd_info * + if (!ubi->peb_buf2) + goto out_free; + +-#ifdef CONFIG_MTD_UBI_DEBUG +- mutex_init(&ubi->dbg_buf_mutex); +- ubi->dbg_peb_buf = vmalloc(ubi->peb_size); +- if (!ubi->dbg_peb_buf) +- goto out_free; +-#endif +- + err = attach_by_scanning(ubi); + if (err) { + dbg_err("failed to attach by scanning, error %d", err); +@@ -852,9 +962,9 @@ int ubi_attach_mtd_dev(struct mtd_info * + goto out_detach; + } + +- err = uif_init(ubi); ++ err = uif_init(ubi, &ref); + if (err) +- goto out_nofree; ++ goto out_detach; + + ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name); + if (IS_ERR(ubi->bgt_thread)) { +@@ -869,6 +979,7 @@ int ubi_attach_mtd_dev(struct mtd_info * + ubi_msg("MTD device size: %llu MiB", ubi->flash_size >> 20); + ubi_msg("number of good PEBs: %d", ubi->good_peb_count); + ubi_msg("number of bad PEBs: %d", ubi->bad_peb_count); ++ ubi_msg("number of corrupted PEBs: %d", ubi->corr_peb_count); + ubi_msg("max. allowed volumes: %d", ubi->vtbl_slots); + ubi_msg("wear-leveling threshold: %d", CONFIG_MTD_UBI_WL_THRESHOLD); + ubi_msg("number of internal volumes: %d", UBI_INT_VOL_COUNT); +@@ -879,32 +990,34 @@ int ubi_attach_mtd_dev(struct mtd_info * + ubi_msg("number of PEBs reserved for bad PEB handling: %d", + ubi->beb_rsvd_pebs); + ubi_msg("max/mean erase counter: %d/%d", ubi->max_ec, ubi->mean_ec); +- ubi_msg("image sequence number: %d", ubi->image_seq); ++ ubi_msg("image sequence number: %d", ubi->image_seq); + +- if (!DBG_DISABLE_BGT) +- ubi->thread_enabled = 1; ++ /* ++ * The below lock makes sure we do not race with 'ubi_thread()' which ++ * checks @ubi->thread_enabled. Otherwise we may fail to wake it up. ++ */ ++ spin_lock(&ubi->wl_lock); ++ ubi->thread_enabled = 1; + wake_up_process(ubi->bgt_thread); ++ spin_unlock(&ubi->wl_lock); + + ubi_devices[ubi_num] = ubi; ++ ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL); + return ubi_num; + + out_uif: + uif_close(ubi); +-out_nofree: +- do_free = 0; + out_detach: + ubi_wl_close(ubi); +- if (do_free) +- free_user_volumes(ubi); + free_internal_volumes(ubi); + vfree(ubi->vtbl); + out_free: + vfree(ubi->peb_buf1); + vfree(ubi->peb_buf2); +-#ifdef CONFIG_MTD_UBI_DEBUG +- vfree(ubi->dbg_peb_buf); +-#endif +- kfree(ubi); ++ if (ref) ++ put_device(&ubi->dev); ++ else ++ kfree(ubi); + return err; + } + +@@ -928,13 +1041,13 @@ int ubi_detach_mtd_dev(int ubi_num, int + if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) + return -EINVAL; + +- spin_lock(&ubi_devices_lock); +- ubi = ubi_devices[ubi_num]; +- if (!ubi) { +- spin_unlock(&ubi_devices_lock); ++ ubi = ubi_get_device(ubi_num); ++ if (!ubi) + return -EINVAL; +- } + ++ spin_lock(&ubi_devices_lock); ++ put_device(&ubi->dev); ++ ubi->ref_count -= 1; + if (ubi->ref_count) { + if (!anyway) { + spin_unlock(&ubi_devices_lock); +@@ -948,6 +1061,7 @@ int ubi_detach_mtd_dev(int ubi_num, int + spin_unlock(&ubi_devices_lock); + + ubi_assert(ubi_num == ubi->ubi_num); ++ ubi_notify_all(ubi, UBI_VOLUME_REMOVED, NULL); + dbg_msg("detaching mtd%d from ubi%d", ubi->mtd->index, ubi_num); + + /* +@@ -957,6 +1071,12 @@ int ubi_detach_mtd_dev(int ubi_num, int + if (ubi->bgt_thread) + kthread_stop(ubi->bgt_thread); + ++ /* ++ * Get a reference to the device in order to prevent 'dev_release()' ++ * from freeing the @ubi object. ++ */ ++ get_device(&ubi->dev); ++ + uif_close(ubi); + ubi_wl_close(ubi); + free_internal_volumes(ubi); +@@ -964,22 +1084,56 @@ int ubi_detach_mtd_dev(int ubi_num, int + put_mtd_device(ubi->mtd); + vfree(ubi->peb_buf1); + vfree(ubi->peb_buf2); +-#ifdef CONFIG_MTD_UBI_DEBUG +- vfree(ubi->dbg_peb_buf); +-#endif + ubi_msg("mtd%d is detached from ubi%d", ubi->mtd->index, ubi->ubi_num); +- kfree(ubi); ++ put_device(&ubi->dev); + return 0; + } + + /** +- * find_mtd_device - open an MTD device by its name or number. +- * @mtd_dev: name or number of the device ++ * open_mtd_by_chdev - open an MTD device by its character device node path. ++ * @mtd_dev: MTD character device node path ++ * ++ * This helper function opens an MTD device by its character node device path. ++ * Returns MTD device description object in case of success and a negative ++ * error code in case of failure. ++ */ ++static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev) ++{ ++ int err, major, minor, mode; ++ struct path path; ++ ++ /* Probably this is an MTD character device node path */ ++ err = kern_path(mtd_dev, LOOKUP_FOLLOW, &path); ++ if (err) ++ return ERR_PTR(err); ++ ++ /* MTD device number is defined by the major / minor numbers */ ++ major = imajor(path.dentry->d_inode); ++ minor = iminor(path.dentry->d_inode); ++ mode = path.dentry->d_inode->i_mode; ++ path_put(&path); ++ if (major != MTD_CHAR_MAJOR || !S_ISCHR(mode)) ++ return ERR_PTR(-EINVAL); ++ ++ if (minor & 1) ++ /* ++ * Just do not think the "/dev/mtdrX" devices support is need, ++ * so do not support them to avoid doing extra work. ++ */ ++ return ERR_PTR(-EINVAL); ++ ++ return get_mtd_device(NULL, minor / 2); ++} ++ ++/** ++ * open_mtd_device - open MTD device by name, character device path, or number. ++ * @mtd_dev: name, character device node path, or MTD device device number + * + * This function tries to open and MTD device described by @mtd_dev string, +- * which is first treated as an ASCII number, and if it is not true, it is +- * treated as MTD device name. Returns MTD device description object in case of +- * success and a negative error code in case of failure. ++ * which is first treated as ASCII MTD device number, and if it is not true, it ++ * is treated as MTD device name, and if that is also not true, it is treated ++ * as MTD character device node path. Returns MTD device description object in ++ * case of success and a negative error code in case of failure. + */ + static struct mtd_info * __init open_mtd_device(const char *mtd_dev) + { +@@ -994,6 +1148,9 @@ static struct mtd_info * __init open_mtd + * MTD device name. + */ + mtd = get_mtd_device_nm(mtd_dev); ++ if (IS_ERR(mtd) && PTR_ERR(mtd) == -ENODEV) ++ /* Probably this is an MTD character device node path */ ++ mtd = open_mtd_by_chdev(mtd_dev); + } else + mtd = get_mtd_device(NULL, mtd_num); + +@@ -1057,9 +1214,24 @@ static int __init ubi_init(void) + p->vid_hdr_offs); + mutex_unlock(&ubi_devices_mutex); + if (err < 0) { +- put_mtd_device(mtd); + ubi_err("cannot attach mtd%d", mtd->index); +- goto out_detach; ++ put_mtd_device(mtd); ++ ++ /* ++ * Originally UBI stopped initializing on any error. ++ * However, later on it was found out that this ++ * behavior is not very good when UBI is compiled into ++ * the kernel and the MTD devices to attach are passed ++ * through the command line. Indeed, UBI failure ++ * stopped whole boot sequence. ++ * ++ * To fix this, we changed the behavior for the ++ * non-module case, but preserved the old behavior for ++ * the module case, just for compatibility. This is a ++ * little inconsistent, though. ++ */ ++ if (ubi_is_module()) ++ goto out_detach; + } + } + +@@ -1209,13 +1381,15 @@ static int __init ubi_mtd_param_parse(co + + module_param_call(mtd, ubi_mtd_param_parse, NULL, NULL, 000); + MODULE_PARM_DESC(mtd, "MTD devices to attach. Parameter format: " +- "mtd=[,].\n" ++ "mtd=[,].\n" + "Multiple \"mtd\" parameters may be specified.\n" +- "MTD devices may be specified by their number or name.\n" ++ "MTD devices may be specified by their number, name, or " ++ "path to the MTD character device node.\n" + "Optional \"vid_hdr_offs\" parameter specifies UBI VID " +- "header position and data starting position to be used " +- "by UBI.\n" +- "Example: mtd=content,1984 mtd=4 - attach MTD device" ++ "header position to be used by UBI.\n" ++ "Example 1: mtd=/dev/mtd0 - attach MTD device " ++ "/dev/mtd0.\n" ++ "Example 2: mtd=content,1984 mtd=4 - attach MTD device " + "with name \"content\" using VID header offset 1984, and " + "MTD device number 4 with default VID header offset."); + +diff -uprN linux-2.6.28/drivers/mtd/ubi/cdev.c ubifs-v2.6.28/drivers/mtd/ubi/cdev.c +--- linux-2.6.28/drivers/mtd/ubi/cdev.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/drivers/mtd/ubi/cdev.c 2011-06-15 14:22:07.000000000 -0400 +@@ -40,9 +40,9 @@ + #include + #include + #include +-#include ++#include ++#include + #include +-#include + #include "ubi.h" + + /** +@@ -113,7 +113,8 @@ static int vol_cdev_open(struct inode *i + else + mode = UBI_READONLY; + +- dbg_gen("open volume %d, mode %d", vol_id, mode); ++ dbg_gen("open device %d, volume %d, mode %d", ++ ubi_num, vol_id, mode); + + desc = ubi_open_volume(ubi_num, vol_id, mode); + if (IS_ERR(desc)) +@@ -128,7 +129,8 @@ static int vol_cdev_release(struct inode + struct ubi_volume_desc *desc = file->private_data; + struct ubi_volume *vol = desc->vol; + +- dbg_gen("release volume %d, mode %d", vol->vol_id, desc->mode); ++ dbg_gen("release device %d, volume %d, mode %d", ++ vol->ubi->ubi_num, vol->vol_id, desc->mode); + + if (vol->updating) { + ubi_warn("update of volume %d not finished, volume is damaged", +@@ -155,7 +157,7 @@ static loff_t vol_cdev_llseek(struct fil + loff_t new_offset; + + if (vol->updating) { +- /* Update is in progress, seeking is prohibited */ ++ /* Update is in progress, seeking is prohibited */ + dbg_err("updating"); + return -EBUSY; + } +@@ -186,6 +188,16 @@ static loff_t vol_cdev_llseek(struct fil + return new_offset; + } + ++static int vol_cdev_fsync(struct file *file, struct dentry *dentry, ++ int datasync) ++{ ++ struct ubi_volume_desc *desc = file->private_data; ++ struct ubi_device *ubi = desc->vol->ubi; ++ ++ return ubi_sync(ubi->ubi_num); ++} ++ ++ + static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count, + loff_t *offp) + { +@@ -195,7 +207,6 @@ static ssize_t vol_cdev_read(struct file + int err, lnum, off, len, tbuf_size; + size_t count_save = count; + void *tbuf; +- uint64_t tmp; + + dbg_gen("read %zd bytes from offset %lld of volume %d", + count, *offp, vol->vol_id); +@@ -225,10 +236,7 @@ static ssize_t vol_cdev_read(struct file + return -ENOMEM; + + len = count > tbuf_size ? tbuf_size : count; +- +- tmp = *offp; +- off = do_div(tmp, vol->usable_leb_size); +- lnum = tmp; ++ lnum = div_u64_rem(*offp, vol->usable_leb_size, &off); + + do { + cond_resched(); +@@ -263,12 +271,9 @@ static ssize_t vol_cdev_read(struct file + return err ? err : count_save - count; + } + +-#ifdef CONFIG_MTD_UBI_DEBUG_USERSPACE_IO +- + /* + * This function allows to directly write to dynamic UBI volumes, without +- * issuing the volume update operation. Available only as a debugging feature. +- * Very useful for testing UBI. ++ * issuing the volume update operation. + */ + static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf, + size_t count, loff_t *offp) +@@ -279,7 +284,9 @@ static ssize_t vol_cdev_direct_write(str + int lnum, off, len, tbuf_size, err = 0; + size_t count_save = count; + char *tbuf; +- uint64_t tmp; ++ ++ if (!vol->direct_writes) ++ return -EPERM; + + dbg_gen("requested: write %zd bytes to offset %lld of volume %u", + count, *offp, vol->vol_id); +@@ -287,10 +294,7 @@ static ssize_t vol_cdev_direct_write(str + if (vol->vol_type == UBI_STATIC_VOLUME) + return -EROFS; + +- tmp = *offp; +- off = do_div(tmp, vol->usable_leb_size); +- lnum = tmp; +- ++ lnum = div_u64_rem(*offp, vol->usable_leb_size, &off); + if (off & (ubi->min_io_size - 1)) { + dbg_err("unaligned position"); + return -EINVAL; +@@ -347,10 +351,6 @@ static ssize_t vol_cdev_direct_write(str + return err ? err : count_save - count; + } + +-#else +-#define vol_cdev_direct_write(file, buf, count, offp) (-EPERM) +-#endif /* CONFIG_MTD_UBI_DEBUG_USERSPACE_IO */ +- + static ssize_t vol_cdev_write(struct file *file, const char __user *buf, + size_t count, loff_t *offp) + { +@@ -395,15 +395,15 @@ static ssize_t vol_cdev_write(struct fil + vol->corrupted = 1; + } + vol->checked = 1; +- ubi_gluebi_updated(vol); ++ ubi_volume_notify(ubi, vol, UBI_VOLUME_UPDATED); + revoke_exclusive(desc, UBI_READWRITE); + } + + return count; + } + +-static int vol_cdev_ioctl(struct inode *inode, struct file *file, +- unsigned int cmd, unsigned long arg) ++static long vol_cdev_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) + { + int err = 0; + struct ubi_volume_desc *desc = file->private_data; +@@ -487,7 +487,6 @@ static int vol_cdev_ioctl(struct inode * + break; + } + +-#ifdef CONFIG_MTD_UBI_DEBUG_USERSPACE_IO + /* Logical eraseblock erasure command */ + case UBI_IOCEBER: + { +@@ -518,13 +517,77 @@ static int vol_cdev_ioctl(struct inode * + err = ubi_wl_flush(ubi); + break; + } +-#endif ++ ++ /* Logical eraseblock map command */ ++ case UBI_IOCEBMAP: ++ { ++ struct ubi_map_req req; ++ ++ err = copy_from_user(&req, argp, sizeof(struct ubi_map_req)); ++ if (err) { ++ err = -EFAULT; ++ break; ++ } ++ err = ubi_leb_map(desc, req.lnum, req.dtype); ++ break; ++ } ++ ++ /* Logical eraseblock un-map command */ ++ case UBI_IOCEBUNMAP: ++ { ++ int32_t lnum; ++ ++ err = get_user(lnum, (__user int32_t *)argp); ++ if (err) { ++ err = -EFAULT; ++ break; ++ } ++ err = ubi_leb_unmap(desc, lnum); ++ break; ++ } ++ ++ /* Check if logical eraseblock is mapped command */ ++ case UBI_IOCEBISMAP: ++ { ++ int32_t lnum; ++ ++ err = get_user(lnum, (__user int32_t *)argp); ++ if (err) { ++ err = -EFAULT; ++ break; ++ } ++ err = ubi_is_mapped(desc, lnum); ++ break; ++ } ++ ++ /* Set volume property command */ ++ case UBI_IOCSETVOLPROP: ++ { ++ struct ubi_set_vol_prop_req req; ++ ++ err = copy_from_user(&req, argp, ++ sizeof(struct ubi_set_vol_prop_req)); ++ if (err) { ++ err = -EFAULT; ++ break; ++ } ++ switch (req.property) { ++ case UBI_VOL_PROP_DIRECT_WRITE: ++ mutex_lock(&ubi->device_mutex); ++ desc->vol->direct_writes = !!req.value; ++ mutex_unlock(&ubi->device_mutex); ++ break; ++ default: ++ err = -EINVAL; ++ break; ++ } ++ break; ++ } + + default: + err = -ENOTTY; + break; + } +- + return err; + } + +@@ -735,23 +798,23 @@ static int rename_volumes(struct ubi_dev + goto out_free; + } + +- re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL); +- if (!re) { ++ re1 = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL); ++ if (!re1) { + err = -ENOMEM; + ubi_close_volume(desc); + goto out_free; + } + +- re->remove = 1; +- re->desc = desc; +- list_add(&re->list, &rename_list); ++ re1->remove = 1; ++ re1->desc = desc; ++ list_add(&re1->list, &rename_list); + dbg_msg("will remove volume %d, name \"%s\"", +- re->desc->vol->vol_id, re->desc->vol->name); ++ re1->desc->vol->vol_id, re1->desc->vol->name); + } + +- mutex_lock(&ubi->volumes_mutex); ++ mutex_lock(&ubi->device_mutex); + err = ubi_rename_volumes(ubi, &rename_list); +- mutex_unlock(&ubi->volumes_mutex); ++ mutex_unlock(&ubi->device_mutex); + + out_free: + list_for_each_entry_safe(re, re1, &rename_list, list) { +@@ -762,8 +825,8 @@ out_free: + return err; + } + +-static int ubi_cdev_ioctl(struct inode *inode, struct file *file, +- unsigned int cmd, unsigned long arg) ++static long ubi_cdev_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) + { + int err = 0; + struct ubi_device *ubi; +@@ -773,7 +836,7 @@ static int ubi_cdev_ioctl(struct inode * + if (!capable(CAP_SYS_RESOURCE)) + return -EPERM; + +- ubi = ubi_get_by_major(imajor(inode)); ++ ubi = ubi_get_by_major(imajor(file->f_mapping->host)); + if (!ubi) + return -ENODEV; + +@@ -794,9 +857,9 @@ static int ubi_cdev_ioctl(struct inode * + if (err) + break; + +- mutex_lock(&ubi->volumes_mutex); ++ mutex_lock(&ubi->device_mutex); + err = ubi_create_volume(ubi, &req); +- mutex_unlock(&ubi->volumes_mutex); ++ mutex_unlock(&ubi->device_mutex); + if (err) + break; + +@@ -825,9 +887,9 @@ static int ubi_cdev_ioctl(struct inode * + break; + } + +- mutex_lock(&ubi->volumes_mutex); ++ mutex_lock(&ubi->device_mutex); + err = ubi_remove_volume(desc, 0); +- mutex_unlock(&ubi->volumes_mutex); ++ mutex_unlock(&ubi->device_mutex); + + /* + * The volume is deleted (unless an error occurred), and the +@@ -842,7 +904,6 @@ static int ubi_cdev_ioctl(struct inode * + case UBI_IOCRSVOL: + { + int pebs; +- uint64_t tmp; + struct ubi_rsvol_req req; + + dbg_gen("re-size volume"); +@@ -862,13 +923,12 @@ static int ubi_cdev_ioctl(struct inode * + break; + } + +- tmp = req.bytes; +- pebs = !!do_div(tmp, desc->vol->usable_leb_size); +- pebs += tmp; ++ pebs = div_u64(req.bytes + desc->vol->usable_leb_size - 1, ++ desc->vol->usable_leb_size); + +- mutex_lock(&ubi->volumes_mutex); ++ mutex_lock(&ubi->device_mutex); + err = ubi_resize_volume(desc, pebs); +- mutex_unlock(&ubi->volumes_mutex); ++ mutex_unlock(&ubi->device_mutex); + ubi_close_volume(desc); + break; + } +@@ -892,9 +952,7 @@ static int ubi_cdev_ioctl(struct inode * + break; + } + +- mutex_lock(&ubi->mult_mutex); + err = rename_volumes(ubi, req); +- mutex_unlock(&ubi->mult_mutex); + kfree(req); + break; + } +@@ -908,8 +966,8 @@ static int ubi_cdev_ioctl(struct inode * + return err; + } + +-static int ctrl_cdev_ioctl(struct inode *inode, struct file *file, +- unsigned int cmd, unsigned long arg) ++static long ctrl_cdev_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) + { + int err = 0; + void __user *argp = (void __user *)arg; +@@ -985,26 +1043,61 @@ static int ctrl_cdev_ioctl(struct inode + return err; + } + +-/* UBI control character device operations */ +-struct file_operations ubi_ctrl_cdev_operations = { +- .ioctl = ctrl_cdev_ioctl, +- .owner = THIS_MODULE, ++#ifdef CONFIG_COMPAT ++static long vol_cdev_compat_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ unsigned long translated_arg = (unsigned long)compat_ptr(arg); ++ ++ return vol_cdev_ioctl(file, cmd, translated_arg); ++} ++ ++static long ubi_cdev_compat_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ unsigned long translated_arg = (unsigned long)compat_ptr(arg); ++ ++ return ubi_cdev_ioctl(file, cmd, translated_arg); ++} ++ ++static long ctrl_cdev_compat_ioctl(struct file *file, unsigned int cmd, ++ unsigned long arg) ++{ ++ unsigned long translated_arg = (unsigned long)compat_ptr(arg); ++ ++ return ctrl_cdev_ioctl(file, cmd, translated_arg); ++} ++#else ++#define vol_cdev_compat_ioctl NULL ++#define ubi_cdev_compat_ioctl NULL ++#define ctrl_cdev_compat_ioctl NULL ++#endif ++ ++/* UBI volume character device operations */ ++const struct file_operations ubi_vol_cdev_operations = { ++ .owner = THIS_MODULE, ++ .open = vol_cdev_open, ++ .release = vol_cdev_release, ++ .llseek = vol_cdev_llseek, ++ .read = vol_cdev_read, ++ .write = vol_cdev_write, ++ .fsync = vol_cdev_fsync, ++ .unlocked_ioctl = vol_cdev_ioctl, ++ .compat_ioctl = vol_cdev_compat_ioctl, + }; + + /* UBI character device operations */ +-struct file_operations ubi_cdev_operations = { +- .owner = THIS_MODULE, +- .ioctl = ubi_cdev_ioctl, +- .llseek = no_llseek, ++const struct file_operations ubi_cdev_operations = { ++ .owner = THIS_MODULE, ++ .llseek = no_llseek, ++ .unlocked_ioctl = ubi_cdev_ioctl, ++ .compat_ioctl = ubi_cdev_compat_ioctl, + }; + +-/* UBI volume character device operations */ +-struct file_operations ubi_vol_cdev_operations = { +- .owner = THIS_MODULE, +- .open = vol_cdev_open, +- .release = vol_cdev_release, +- .llseek = vol_cdev_llseek, +- .read = vol_cdev_read, +- .write = vol_cdev_write, +- .ioctl = vol_cdev_ioctl, ++/* UBI control character device operations */ ++const struct file_operations ubi_ctrl_cdev_operations = { ++ .owner = THIS_MODULE, ++ .unlocked_ioctl = ctrl_cdev_ioctl, ++ .compat_ioctl = ctrl_cdev_compat_ioctl, ++ .llseek = no_llseek, + }; +diff -uprN linux-2.6.28/drivers/mtd/ubi/debug.c ubifs-v2.6.28/drivers/mtd/ubi/debug.c +--- linux-2.6.28/drivers/mtd/ubi/debug.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/drivers/mtd/ubi/debug.c 2011-06-15 14:22:07.000000000 -0400 +@@ -27,6 +27,17 @@ + #ifdef CONFIG_MTD_UBI_DEBUG + + #include "ubi.h" ++#include ++#include ++ ++unsigned int ubi_chk_flags; ++unsigned int ubi_tst_flags; ++ ++module_param_named(debug_chks, ubi_chk_flags, uint, S_IRUGO | S_IWUSR); ++module_param_named(debug_tsts, ubi_chk_flags, uint, S_IRUGO | S_IWUSR); ++ ++MODULE_PARM_DESC(debug_chks, "Debug check flags"); ++MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); + + /** + * ubi_dbg_dump_ec_hdr - dump an erase counter header. +@@ -61,15 +72,15 @@ void ubi_dbg_dump_vid_hdr(const struct u + { + printk(KERN_DEBUG "Volume identifier header dump:\n"); + printk(KERN_DEBUG "\tmagic %08x\n", be32_to_cpu(vid_hdr->magic)); +- printk(KERN_DEBUG "\tversion %d\n", (int)vid_hdr->version); +- printk(KERN_DEBUG "\tvol_type %d\n", (int)vid_hdr->vol_type); +- printk(KERN_DEBUG "\tcopy_flag %d\n", (int)vid_hdr->copy_flag); +- printk(KERN_DEBUG "\tcompat %d\n", (int)vid_hdr->compat); +- printk(KERN_DEBUG "\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id)); +- printk(KERN_DEBUG "\tlnum %d\n", be32_to_cpu(vid_hdr->lnum)); +- printk(KERN_DEBUG "\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size)); +- printk(KERN_DEBUG "\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs)); +- printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad)); ++ printk(KERN_DEBUG "\tversion %d\n", (int)vid_hdr->version); ++ printk(KERN_DEBUG "\tvol_type %d\n", (int)vid_hdr->vol_type); ++ printk(KERN_DEBUG "\tcopy_flag %d\n", (int)vid_hdr->copy_flag); ++ printk(KERN_DEBUG "\tcompat %d\n", (int)vid_hdr->compat); ++ printk(KERN_DEBUG "\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id)); ++ printk(KERN_DEBUG "\tlnum %d\n", be32_to_cpu(vid_hdr->lnum)); ++ printk(KERN_DEBUG "\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size)); ++ printk(KERN_DEBUG "\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs)); ++ printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad)); + printk(KERN_DEBUG "\tsqnum %llu\n", + (unsigned long long)be64_to_cpu(vid_hdr->sqnum)); + printk(KERN_DEBUG "\thdr_crc %08x\n", be32_to_cpu(vid_hdr->hdr_crc)); +@@ -196,4 +207,36 @@ void ubi_dbg_dump_mkvol_req(const struct + printk(KERN_DEBUG "\t1st 16 characters of name: %s\n", nm); + } + ++/** ++ * ubi_dbg_dump_flash - dump a region of flash. ++ * @ubi: UBI device description object ++ * @pnum: the physical eraseblock number to dump ++ * @offset: the starting offset within the physical eraseblock to dump ++ * @len: the length of the region to dump ++ */ ++void ubi_dbg_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len) ++{ ++ int err; ++ size_t read; ++ void *buf; ++ loff_t addr = (loff_t)pnum * ubi->peb_size + offset; ++ ++ buf = vmalloc(len); ++ if (!buf) ++ return; ++ err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf); ++ if (err && err != -EUCLEAN) { ++ ubi_err("error %d while reading %d bytes from PEB %d:%d, " ++ "read %zd bytes", err, len, pnum, offset, read); ++ goto out; ++ } ++ ++ dbg_msg("dumping %d bytes of data from PEB %d, offset %d", ++ len, pnum, offset); ++ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, buf, len, 1); ++out: ++ vfree(buf); ++ return; ++} ++ + #endif /* CONFIG_MTD_UBI_DEBUG */ +diff -uprN linux-2.6.28/drivers/mtd/ubi/debug.h ubifs-v2.6.28/drivers/mtd/ubi/debug.h +--- linux-2.6.28/drivers/mtd/ubi/debug.h 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/drivers/mtd/ubi/debug.h 2011-06-15 14:22:07.000000000 -0400 +@@ -21,11 +21,17 @@ + #ifndef __UBI_DEBUG_H__ + #define __UBI_DEBUG_H__ + ++struct ubi_ec_hdr; ++struct ubi_vid_hdr; ++struct ubi_volume; ++struct ubi_vtbl_record; ++struct ubi_scan_volume; ++struct ubi_scan_leb; ++struct ubi_mkvol_req; ++ + #ifdef CONFIG_MTD_UBI_DEBUG + #include + +-#define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__) +- + #define ubi_assert(expr) do { \ + if (unlikely(!(expr))) { \ + printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \ +@@ -34,19 +40,28 @@ + } \ + } while (0) + +-#define dbg_msg(fmt, ...) \ +- printk(KERN_DEBUG "UBI DBG (pid %d): %s: " fmt "\n", \ +- current->pid, __func__, ##__VA_ARGS__) ++#define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__) + + #define ubi_dbg_dump_stack() dump_stack() + +-struct ubi_ec_hdr; +-struct ubi_vid_hdr; +-struct ubi_volume; +-struct ubi_vtbl_record; +-struct ubi_scan_volume; +-struct ubi_scan_leb; +-struct ubi_mkvol_req; ++#define ubi_dbg_print_hex_dump(l, ps, pt, r, g, b, len, a) \ ++ print_hex_dump(l, ps, pt, r, g, b, len, a) ++ ++#define ubi_dbg_msg(type, fmt, ...) \ ++ pr_debug("UBI DBG " type ": " fmt "\n", ##__VA_ARGS__) ++ ++/* Just a debugging messages not related to any specific UBI subsystem */ ++#define dbg_msg(fmt, ...) ubi_dbg_msg("msg", fmt, ##__VA_ARGS__) ++/* General debugging messages */ ++#define dbg_gen(fmt, ...) ubi_dbg_msg("gen", fmt, ##__VA_ARGS__) ++/* Messages from the eraseblock association sub-system */ ++#define dbg_eba(fmt, ...) ubi_dbg_msg("eba", fmt, ##__VA_ARGS__) ++/* Messages from the wear-leveling sub-system */ ++#define dbg_wl(fmt, ...) ubi_dbg_msg("wl", fmt, ##__VA_ARGS__) ++/* Messages from the input/output sub-system */ ++#define dbg_io(fmt, ...) ubi_dbg_msg("io", fmt, ##__VA_ARGS__) ++/* Initialization and build messages */ ++#define dbg_bld(fmt, ...) ubi_dbg_msg("bld", fmt, ##__VA_ARGS__) + + void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr); + void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr); +@@ -55,51 +70,53 @@ void ubi_dbg_dump_vtbl_record(const stru + void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv); + void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type); + void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req); ++void ubi_dbg_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len); + +-#ifdef CONFIG_MTD_UBI_DEBUG_MSG +-/* General debugging messages */ +-#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#else +-#define dbg_gen(fmt, ...) ({}) +-#endif +- +-#ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA +-/* Messages from the eraseblock association sub-system */ +-#define dbg_eba(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#else +-#define dbg_eba(fmt, ...) ({}) +-#endif ++extern unsigned int ubi_chk_flags; + +-#ifdef CONFIG_MTD_UBI_DEBUG_MSG_WL +-/* Messages from the wear-leveling sub-system */ +-#define dbg_wl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#else +-#define dbg_wl(fmt, ...) ({}) +-#endif ++/* ++ * Debugging check flags. ++ * ++ * UBI_CHK_GEN: general checks ++ * UBI_CHK_IO: check writes and erases ++ */ ++enum { ++ UBI_CHK_GEN = 0x1, ++ UBI_CHK_IO = 0x2, ++}; ++ ++int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len); ++int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum, ++ int offset, int len); + +-#ifdef CONFIG_MTD_UBI_DEBUG_MSG_IO +-/* Messages from the input/output sub-system */ +-#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#else +-#define dbg_io(fmt, ...) ({}) +-#endif ++extern unsigned int ubi_tst_flags; + +-#ifdef CONFIG_MTD_UBI_DEBUG_MSG_BLD +-/* Initialization and build messages */ +-#define dbg_bld(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define UBI_IO_DEBUG 1 +-#else +-#define dbg_bld(fmt, ...) ({}) +-#define UBI_IO_DEBUG 0 +-#endif ++/* ++ * Special testing flags. ++ * ++ * UBIFS_TST_DISABLE_BGT: disable the background thread ++ * UBI_TST_EMULATE_BITFLIPS: emulate bit-flips ++ * UBI_TST_EMULATE_WRITE_FAILURES: emulate write failures ++ * UBI_TST_EMULATE_ERASE_FAILURES: emulate erase failures ++ */ ++enum { ++ UBI_TST_DISABLE_BGT = 0x1, ++ UBI_TST_EMULATE_BITFLIPS = 0x2, ++ UBI_TST_EMULATE_WRITE_FAILURES = 0x4, ++ UBI_TST_EMULATE_ERASE_FAILURES = 0x8, ++}; + +-#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT +-#define DBG_DISABLE_BGT 1 +-#else +-#define DBG_DISABLE_BGT 0 +-#endif ++/** ++ * ubi_dbg_is_bgt_disabled - if the background thread is disabled. ++ * ++ * Returns non-zero if the UBI background thread is disabled for testing ++ * purposes. ++ */ ++static inline int ubi_dbg_is_bgt_disabled(void) ++{ ++ return ubi_tst_flags & UBI_TST_DISABLE_BGT; ++} + +-#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_BITFLIPS + /** + * ubi_dbg_is_bitflip - if it is time to emulate a bit-flip. + * +@@ -107,13 +124,11 @@ void ubi_dbg_dump_mkvol_req(const struct + */ + static inline int ubi_dbg_is_bitflip(void) + { +- return !(random32() % 200); ++ if (ubi_tst_flags & UBI_TST_EMULATE_BITFLIPS) ++ return !(random32() % 200); ++ return 0; + } +-#else +-#define ubi_dbg_is_bitflip() 0 +-#endif + +-#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_WRITE_FAILURES + /** + * ubi_dbg_is_write_failure - if it is time to emulate a write failure. + * +@@ -122,13 +137,11 @@ static inline int ubi_dbg_is_bitflip(voi + */ + static inline int ubi_dbg_is_write_failure(void) + { +- return !(random32() % 500); ++ if (ubi_tst_flags & UBI_TST_EMULATE_WRITE_FAILURES) ++ return !(random32() % 500); ++ return 0; + } +-#else +-#define ubi_dbg_is_write_failure() 0 +-#endif + +-#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_ERASE_FAILURES + /** + * ubi_dbg_is_erase_failure - if its time to emulate an erase failure. + * +@@ -137,36 +150,68 @@ static inline int ubi_dbg_is_write_failu + */ + static inline int ubi_dbg_is_erase_failure(void) + { ++ if (ubi_tst_flags & UBI_TST_EMULATE_ERASE_FAILURES) + return !(random32() % 400); ++ return 0; + } +-#else +-#define ubi_dbg_is_erase_failure() 0 +-#endif + + #else + +-#define ubi_assert(expr) ({}) +-#define dbg_err(fmt, ...) ({}) +-#define dbg_msg(fmt, ...) ({}) +-#define dbg_gen(fmt, ...) ({}) +-#define dbg_eba(fmt, ...) ({}) +-#define dbg_wl(fmt, ...) ({}) +-#define dbg_io(fmt, ...) ({}) +-#define dbg_bld(fmt, ...) ({}) +-#define ubi_dbg_dump_stack() ({}) +-#define ubi_dbg_dump_ec_hdr(ec_hdr) ({}) +-#define ubi_dbg_dump_vid_hdr(vid_hdr) ({}) +-#define ubi_dbg_dump_vol_info(vol) ({}) +-#define ubi_dbg_dump_vtbl_record(r, idx) ({}) +-#define ubi_dbg_dump_sv(sv) ({}) +-#define ubi_dbg_dump_seb(seb, type) ({}) +-#define ubi_dbg_dump_mkvol_req(req) ({}) +- +-#define UBI_IO_DEBUG 0 +-#define DBG_DISABLE_BGT 0 +-#define ubi_dbg_is_bitflip() 0 +-#define ubi_dbg_is_write_failure() 0 +-#define ubi_dbg_is_erase_failure() 0 ++/* Use "if (0)" to make compiler check arguments even if debugging is off */ ++#define ubi_assert(expr) do { \ ++ if (0) { \ ++ printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \ ++ __func__, __LINE__, current->pid); \ ++ } \ ++} while (0) ++ ++#define dbg_err(fmt, ...) do { \ ++ if (0) \ ++ ubi_err(fmt, ##__VA_ARGS__); \ ++} while (0) ++ ++#define ubi_dbg_msg(fmt, ...) do { \ ++ if (0) \ ++ pr_debug(fmt "\n", ##__VA_ARGS__); \ ++} while (0) ++ ++#define dbg_msg(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_gen(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_eba(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_wl(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_io(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_bld(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__) ++ ++static inline void ubi_dbg_dump_stack(void) { return; } ++static inline void ++ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr) { return; } ++static inline void ++ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr) { return; } ++static inline void ++ubi_dbg_dump_vol_info(const struct ubi_volume *vol) { return; } ++static inline void ++ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx) { return; } ++static inline void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv) { return; } ++static inline void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, ++ int type) { return; } ++static inline void ++ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req) { return; } ++static inline void ubi_dbg_dump_flash(struct ubi_device *ubi, ++ int pnum, int offset, int len) { return; } ++static inline void ++ubi_dbg_print_hex_dump(const char *l, const char *ps, int pt, int r, ++ int g, const void *b, size_t len, bool a) { return; } ++ ++static inline int ubi_dbg_is_bgt_disabled(void) { return 0; } ++static inline int ubi_dbg_is_bitflip(void) { return 0; } ++static inline int ubi_dbg_is_write_failure(void) { return 0; } ++static inline int ubi_dbg_is_erase_failure(void) { return 0; } ++static inline int ubi_dbg_check_all_ff(struct ubi_device *ubi, ++ int pnum, int offset, ++ int len) { return 0; } ++static inline int ubi_dbg_check_write(struct ubi_device *ubi, ++ const void *buf, int pnum, ++ int offset, int len) { return 0; } + + #endif /* !CONFIG_MTD_UBI_DEBUG */ + #endif /* !__UBI_DEBUG_H__ */ +diff -uprN linux-2.6.28/drivers/mtd/ubi/eba.c ubifs-v2.6.28/drivers/mtd/ubi/eba.c +--- linux-2.6.28/drivers/mtd/ubi/eba.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/drivers/mtd/ubi/eba.c 2011-06-15 14:22:07.000000000 -0400 +@@ -418,7 +418,8 @@ retry: + * may try to recover data. FIXME: but this is + * not implemented. + */ +- if (err == UBI_IO_BAD_VID_HDR) { ++ if (err == UBI_IO_BAD_HDR_EBADMSG || ++ err == UBI_IO_BAD_HDR) { + ubi_warn("corrupted VID header at PEB " + "%d, LEB %d:%d", pnum, vol_id, + lnum); +@@ -718,7 +719,7 @@ write_error: + * to the real data size, although the @buf buffer has to contain the + * alignment. In all other cases, @len has to be aligned. + * +- * It is prohibited to write more then once to logical eraseblocks of static ++ * It is prohibited to write more than once to logical eraseblocks of static + * volumes. This function returns zero in case of success and a negative error + * code in case of failure. + */ +@@ -961,8 +962,8 @@ write_error: + */ + static int is_error_sane(int err) + { +- if (err == -EIO || err == -ENOMEM || err == UBI_IO_BAD_VID_HDR || +- err == -ETIMEDOUT) ++ if (err == -EIO || err == -ENOMEM || err == UBI_IO_BAD_HDR || ++ err == UBI_IO_BAD_HDR_EBADMSG || err == -ETIMEDOUT) + return 0; + return 1; + } +@@ -1165,6 +1166,47 @@ out_unlock_leb: + } + + /** ++ * print_rsvd_warning - warn about not having enough reserved PEBs. ++ * @ubi: UBI device description object ++ * ++ * This is a helper function for 'ubi_eba_init_scan()' which is called when UBI ++ * cannot reserve enough PEBs for bad block handling. This function makes a ++ * decision whether we have to print a warning or not. The algorithm is as ++ * follows: ++ * o if this is a new UBI image, then just print the warning ++ * o if this is an UBI image which has already been used for some time, print ++ * a warning only if we can reserve less than 10% of the expected amount of ++ * the reserved PEB. ++ * ++ * The idea is that when UBI is used, PEBs become bad, and the reserved pool ++ * of PEBs becomes smaller, which is normal and we do not want to scare users ++ * with a warning every time they attach the MTD device. This was an issue ++ * reported by real users. ++ */ ++static void print_rsvd_warning(struct ubi_device *ubi, ++ struct ubi_scan_info *si) ++{ ++ /* ++ * The 1 << 18 (256KiB) number is picked randomly, just a reasonably ++ * large number to distinguish between newly flashed and used images. ++ */ ++ if (si->max_sqnum > (1 << 18)) { ++ int min = ubi->beb_rsvd_level / 10; ++ ++ if (!min) ++ min = 1; ++ if (ubi->beb_rsvd_pebs > min) ++ return; ++ } ++ ++ ubi_warn("cannot reserve enough PEBs for bad PEB handling, reserved %d," ++ " need %d", ubi->beb_rsvd_pebs, ubi->beb_rsvd_level); ++ if (ubi->corr_peb_count) ++ ubi_warn("%d PEBs are corrupted and not used", ++ ubi->corr_peb_count); ++} ++ ++/** + * ubi_eba_init_scan - initialize the EBA sub-system using scanning information. + * @ubi: UBI device description object + * @si: scanning information +@@ -1224,6 +1266,9 @@ int ubi_eba_init_scan(struct ubi_device + if (ubi->avail_pebs < EBA_RESERVED_PEBS) { + ubi_err("no enough physical eraseblocks (%d, need %d)", + ubi->avail_pebs, EBA_RESERVED_PEBS); ++ if (ubi->corr_peb_count) ++ ubi_err("%d PEBs are corrupted and not used", ++ ubi->corr_peb_count); + err = -ENOSPC; + goto out_free; + } +@@ -1236,9 +1281,7 @@ int ubi_eba_init_scan(struct ubi_device + if (ubi->avail_pebs < ubi->beb_rsvd_level) { + /* No enough free physical eraseblocks */ + ubi->beb_rsvd_pebs = ubi->avail_pebs; +- ubi_warn("cannot reserve enough PEBs for bad PEB " +- "handling, reserved %d, need %d", +- ubi->beb_rsvd_pebs, ubi->beb_rsvd_level); ++ print_rsvd_warning(ubi, si); + } else + ubi->beb_rsvd_pebs = ubi->beb_rsvd_level; + +@@ -1254,6 +1297,7 @@ out_free: + if (!ubi->volumes[i]) + continue; + kfree(ubi->volumes[i]->eba_tbl); ++ ubi->volumes[i]->eba_tbl = NULL; + } + return err; + } +diff -uprN linux-2.6.28/drivers/mtd/ubi/gluebi.c ubifs-v2.6.28/drivers/mtd/ubi/gluebi.c +--- linux-2.6.28/drivers/mtd/ubi/gluebi.c 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/drivers/mtd/ubi/gluebi.c 2011-06-15 14:22:07.000000000 -0400 +@@ -19,17 +19,78 @@ + */ + + /* +- * This file includes implementation of fake MTD devices for each UBI volume. +- * This sounds strange, but it is in fact quite useful to make MTD-oriented +- * software (including all the legacy software) to work on top of UBI. ++ * This is a small driver which implements fake MTD devices on top of UBI ++ * volumes. This sounds strange, but it is in fact quite useful to make ++ * MTD-oriented software (including all the legacy software) work on top of ++ * UBI. + * + * Gluebi emulates MTD devices of "MTD_UBIVOLUME" type. Their minimal I/O unit +- * size (mtd->writesize) is equivalent to the UBI minimal I/O unit. The ++ * size (@mtd->writesize) is equivalent to the UBI minimal I/O unit. The + * eraseblock size is equivalent to the logical eraseblock size of the volume. + */ + ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + #include +-#include "ubi.h" ++#include "ubi-media.h" ++ ++#define err_msg(fmt, ...) \ ++ printk(KERN_DEBUG "gluebi (pid %d): %s: " fmt "\n", \ ++ current->pid, __func__, ##__VA_ARGS__) ++ ++static inline uint32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd) ++{ ++ do_div(sz, mtd->erasesize); ++ return sz; ++} ++ ++/** ++ * struct gluebi_device - a gluebi device description data structure. ++ * @mtd: emulated MTD device description object ++ * @refcnt: gluebi device reference count ++ * @desc: UBI volume descriptor ++ * @ubi_num: UBI device number this gluebi device works on ++ * @vol_id: ID of UBI volume this gluebi device works on ++ * @list: link in a list of gluebi devices ++ */ ++struct gluebi_device { ++ struct mtd_info mtd; ++ int refcnt; ++ struct ubi_volume_desc *desc; ++ int ubi_num; ++ int vol_id; ++ struct list_head list; ++}; ++ ++/* List of all gluebi devices */ ++static LIST_HEAD(gluebi_devices); ++static DEFINE_MUTEX(devices_mutex); ++ ++/** ++ * find_gluebi_nolock - find a gluebi device. ++ * @ubi_num: UBI device number ++ * @vol_id: volume ID ++ * ++ * This function seraches for gluebi device corresponding to UBI device ++ * @ubi_num and UBI volume @vol_id. Returns the gluebi device description ++ * object in case of success and %NULL in case of failure. The caller has to ++ * have the &devices_mutex locked. ++ */ ++static struct gluebi_device *find_gluebi_nolock(int ubi_num, int vol_id) ++{ ++ struct gluebi_device *gluebi; ++ ++ list_for_each_entry(gluebi, &gluebi_devices, list) ++ if (gluebi->ubi_num == ubi_num && gluebi->vol_id == vol_id) ++ return gluebi; ++ return NULL; ++} + + /** + * gluebi_get_device - get MTD device reference. +@@ -41,15 +102,18 @@ + */ + static int gluebi_get_device(struct mtd_info *mtd) + { +- struct ubi_volume *vol; ++ struct gluebi_device *gluebi; ++ int ubi_mode = UBI_READONLY; + +- vol = container_of(mtd, struct ubi_volume, gluebi_mtd); ++ if (!try_module_get(THIS_MODULE)) ++ return -ENODEV; + +- /* +- * We do not introduce locks for gluebi reference count because the +- * get_device()/put_device() calls are already serialized at MTD. +- */ +- if (vol->gluebi_refcount > 0) { ++ if (mtd->flags & MTD_WRITEABLE) ++ ubi_mode = UBI_READWRITE; ++ ++ gluebi = container_of(mtd, struct gluebi_device, mtd); ++ mutex_lock(&devices_mutex); ++ if (gluebi->refcnt > 0) { + /* + * The MTD device is already referenced and this is just one + * more reference. MTD allows many users to open the same +@@ -58,7 +122,8 @@ static int gluebi_get_device(struct mtd_ + * open the UBI volume again - just increase the reference + * counter and return. + */ +- vol->gluebi_refcount += 1; ++ gluebi->refcnt += 1; ++ mutex_unlock(&devices_mutex); + return 0; + } + +@@ -66,11 +131,15 @@ static int gluebi_get_device(struct mtd_ + * This is the first reference to this UBI volume via the MTD device + * interface. Open the corresponding volume in read-write mode. + */ +- vol->gluebi_desc = ubi_open_volume(vol->ubi->ubi_num, vol->vol_id, +- UBI_READWRITE); +- if (IS_ERR(vol->gluebi_desc)) +- return PTR_ERR(vol->gluebi_desc); +- vol->gluebi_refcount += 1; ++ gluebi->desc = ubi_open_volume(gluebi->ubi_num, gluebi->vol_id, ++ ubi_mode); ++ if (IS_ERR(gluebi->desc)) { ++ mutex_unlock(&devices_mutex); ++ module_put(THIS_MODULE); ++ return PTR_ERR(gluebi->desc); ++ } ++ gluebi->refcnt += 1; ++ mutex_unlock(&devices_mutex); + return 0; + } + +@@ -83,13 +152,15 @@ static int gluebi_get_device(struct mtd_ + */ + static void gluebi_put_device(struct mtd_info *mtd) + { +- struct ubi_volume *vol; ++ struct gluebi_device *gluebi; + +- vol = container_of(mtd, struct ubi_volume, gluebi_mtd); +- vol->gluebi_refcount -= 1; +- ubi_assert(vol->gluebi_refcount >= 0); +- if (vol->gluebi_refcount == 0) +- ubi_close_volume(vol->gluebi_desc); ++ gluebi = container_of(mtd, struct gluebi_device, mtd); ++ mutex_lock(&devices_mutex); ++ gluebi->refcnt -= 1; ++ if (gluebi->refcnt == 0) ++ ubi_close_volume(gluebi->desc); ++ module_put(THIS_MODULE); ++ mutex_unlock(&devices_mutex); + } + + /** +@@ -107,21 +178,14 @@ static int gluebi_read(struct mtd_info * + size_t *retlen, unsigned char *buf) + { + int err = 0, lnum, offs, total_read; +- struct ubi_volume *vol; +- struct ubi_device *ubi; +- uint64_t tmp = from; +- +- dbg_gen("read %zd bytes from offset %lld", len, from); ++ struct gluebi_device *gluebi; + + if (len < 0 || from < 0 || from + len > mtd->size) + return -EINVAL; + +- vol = container_of(mtd, struct ubi_volume, gluebi_mtd); +- ubi = vol->ubi; +- +- offs = do_div(tmp, mtd->erasesize); +- lnum = tmp; ++ gluebi = container_of(mtd, struct gluebi_device, mtd); + ++ lnum = div_u64_rem(from, mtd->erasesize, &offs); + total_read = len; + while (total_read) { + size_t to_read = mtd->erasesize - offs; +@@ -129,7 +193,7 @@ static int gluebi_read(struct mtd_info * + if (to_read > total_read) + to_read = total_read; + +- err = ubi_eba_read_leb(ubi, vol, lnum, buf, offs, to_read, 0); ++ err = ubi_read(gluebi->desc, lnum, buf, offs, to_read); + if (err) + break; + +@@ -155,26 +219,20 @@ static int gluebi_read(struct mtd_info * + * case of failure. + */ + static int gluebi_write(struct mtd_info *mtd, loff_t to, size_t len, +- size_t *retlen, const u_char *buf) ++ size_t *retlen, const u_char *buf) + { + int err = 0, lnum, offs, total_written; +- struct ubi_volume *vol; +- struct ubi_device *ubi; +- uint64_t tmp = to; +- +- dbg_gen("write %zd bytes to offset %lld", len, to); ++ struct gluebi_device *gluebi; + + if (len < 0 || to < 0 || len + to > mtd->size) + return -EINVAL; + +- vol = container_of(mtd, struct ubi_volume, gluebi_mtd); +- ubi = vol->ubi; ++ gluebi = container_of(mtd, struct gluebi_device, mtd); + +- if (ubi->ro_mode) ++ if (!(mtd->flags & MTD_WRITEABLE)) + return -EROFS; + +- offs = do_div(tmp, mtd->erasesize); +- lnum = tmp; ++ lnum = div_u64_rem(to, mtd->erasesize, &offs); + + if (len % mtd->writesize || offs % mtd->writesize) + return -EINVAL; +@@ -186,8 +244,7 @@ static int gluebi_write(struct mtd_info + if (to_write > total_written) + to_write = total_written; + +- err = ubi_eba_write_leb(ubi, vol, lnum, buf, offs, to_write, +- UBI_UNKNOWN); ++ err = ubi_write(gluebi->desc, lnum, buf, offs, to_write); + if (err) + break; + +@@ -212,40 +269,36 @@ static int gluebi_write(struct mtd_info + static int gluebi_erase(struct mtd_info *mtd, struct erase_info *instr) + { + int err, i, lnum, count; +- struct ubi_volume *vol; +- struct ubi_device *ubi; +- +- dbg_gen("erase %u bytes at offset %u", instr->len, instr->addr); ++ struct gluebi_device *gluebi; + + if (instr->addr < 0 || instr->addr > mtd->size - mtd->erasesize) + return -EINVAL; +- + if (instr->len < 0 || instr->addr + instr->len > mtd->size) + return -EINVAL; +- + if (instr->addr % mtd->writesize || instr->len % mtd->writesize) + return -EINVAL; + +- lnum = instr->addr / mtd->erasesize; +- count = instr->len / mtd->erasesize; ++ lnum = mtd_div_by_eb(instr->addr, mtd); ++ count = mtd_div_by_eb(instr->len, mtd); + +- vol = container_of(mtd, struct ubi_volume, gluebi_mtd); +- ubi = vol->ubi; ++ gluebi = container_of(mtd, struct gluebi_device, mtd); + +- if (ubi->ro_mode) ++ if (!(mtd->flags & MTD_WRITEABLE)) + return -EROFS; + +- for (i = 0; i < count; i++) { +- err = ubi_eba_unmap_leb(ubi, vol, lnum + i); ++ for (i = 0; i < count - 1; i++) { ++ err = ubi_leb_unmap(gluebi->desc, lnum + i); + if (err) + goto out_err; + } +- + /* + * MTD erase operations are synchronous, so we have to make sure the + * physical eraseblock is wiped out. ++ * ++ * Thus, perform leb_erase instead of leb_unmap operation - leb_erase ++ * will wait for the end of operations + */ +- err = ubi_wl_flush(ubi); ++ err = ubi_leb_erase(gluebi->desc, lnum + i); + if (err) + goto out_err; + +@@ -255,33 +308,44 @@ static int gluebi_erase(struct mtd_info + + out_err: + instr->state = MTD_ERASE_FAILED; +- instr->fail_addr = lnum * mtd->erasesize; ++ instr->fail_addr = (long long)lnum * mtd->erasesize; + return err; + } + + /** +- * ubi_create_gluebi - initialize gluebi for an UBI volume. +- * @ubi: UBI device description object +- * @vol: volume description object ++ * gluebi_create - create a gluebi device for an UBI volume. ++ * @di: UBI device description object ++ * @vi: UBI volume description object + * +- * This function is called when an UBI volume is created in order to create ++ * This function is called when a new UBI volume is created in order to create + * corresponding fake MTD device. Returns zero in case of success and a + * negative error code in case of failure. + */ +-int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol) ++static int gluebi_create(struct ubi_device_info *di, ++ struct ubi_volume_info *vi) + { +- struct mtd_info *mtd = &vol->gluebi_mtd; ++ struct gluebi_device *gluebi, *g; ++ struct mtd_info *mtd; + +- mtd->name = kmemdup(vol->name, vol->name_len + 1, GFP_KERNEL); +- if (!mtd->name) ++ gluebi = kzalloc(sizeof(struct gluebi_device), GFP_KERNEL); ++ if (!gluebi) + return -ENOMEM; + ++ mtd = &gluebi->mtd; ++ mtd->name = kmemdup(vi->name, vi->name_len + 1, GFP_KERNEL); ++ if (!mtd->name) { ++ kfree(gluebi); ++ return -ENOMEM; ++ } ++ ++ gluebi->vol_id = vi->vol_id; ++ gluebi->ubi_num = vi->ubi_num; + mtd->type = MTD_UBIVOLUME; +- if (!ubi->ro_mode) ++ if (!di->ro_mode) + mtd->flags = MTD_WRITEABLE; +- mtd->writesize = ubi->min_io_size; + mtd->owner = THIS_MODULE; +- mtd->erasesize = vol->usable_leb_size; ++ mtd->writesize = di->min_io_size; ++ mtd->erasesize = vi->usable_leb_size; + mtd->read = gluebi_read; + mtd->write = gluebi_write; + mtd->erase = gluebi_erase; +@@ -289,60 +353,196 @@ int ubi_create_gluebi(struct ubi_device + mtd->put_device = gluebi_put_device; + + /* +- * In case of dynamic volume, MTD device size is just volume size. In ++ * In case of dynamic a volume, MTD device size is just volume size. In + * case of a static volume the size is equivalent to the amount of data + * bytes. + */ +- if (vol->vol_type == UBI_DYNAMIC_VOLUME) +- mtd->size = vol->usable_leb_size * vol->reserved_pebs; ++ if (vi->vol_type == UBI_DYNAMIC_VOLUME) ++ mtd->size = (unsigned long long)vi->usable_leb_size * vi->size; + else +- mtd->size = vol->used_bytes; ++ mtd->size = vi->used_bytes; ++ ++ /* Just a sanity check - make sure this gluebi device does not exist */ ++ mutex_lock(&devices_mutex); ++ g = find_gluebi_nolock(vi->ubi_num, vi->vol_id); ++ if (g) ++ err_msg("gluebi MTD device %d form UBI device %d volume %d " ++ "already exists", g->mtd.index, vi->ubi_num, ++ vi->vol_id); ++ mutex_unlock(&devices_mutex); + + if (add_mtd_device(mtd)) { +- ubi_err("cannot not add MTD device"); ++ err_msg("cannot add MTD device"); + kfree(mtd->name); ++ kfree(gluebi); + return -ENFILE; + } + +- dbg_gen("added mtd%d (\"%s\"), size %u, EB size %u", +- mtd->index, mtd->name, mtd->size, mtd->erasesize); ++ mutex_lock(&devices_mutex); ++ list_add_tail(&gluebi->list, &gluebi_devices); ++ mutex_unlock(&devices_mutex); + return 0; + } + + /** +- * ubi_destroy_gluebi - close gluebi for an UBI volume. +- * @vol: volume description object ++ * gluebi_remove - remove a gluebi device. ++ * @vi: UBI volume description object + * +- * This function is called when an UBI volume is removed in order to remove ++ * This function is called when an UBI volume is removed and it removes + * corresponding fake MTD device. Returns zero in case of success and a + * negative error code in case of failure. + */ +-int ubi_destroy_gluebi(struct ubi_volume *vol) ++static int gluebi_remove(struct ubi_volume_info *vi) + { +- int err; +- struct mtd_info *mtd = &vol->gluebi_mtd; ++ int err = 0; ++ struct mtd_info *mtd; ++ struct gluebi_device *gluebi; ++ ++ mutex_lock(&devices_mutex); ++ gluebi = find_gluebi_nolock(vi->ubi_num, vi->vol_id); ++ if (!gluebi) { ++ err_msg("got remove notification for unknown UBI device %d " ++ "volume %d", vi->ubi_num, vi->vol_id); ++ err = -ENOENT; ++ } else if (gluebi->refcnt) ++ err = -EBUSY; ++ else ++ list_del(&gluebi->list); ++ mutex_unlock(&devices_mutex); ++ if (err) ++ return err; + +- dbg_gen("remove mtd%d", mtd->index); ++ mtd = &gluebi->mtd; + err = del_mtd_device(mtd); +- if (err) ++ if (err) { ++ err_msg("cannot remove fake MTD device %d, UBI device %d, " ++ "volume %d, error %d", mtd->index, gluebi->ubi_num, ++ gluebi->vol_id, err); ++ mutex_lock(&devices_mutex); ++ list_add_tail(&gluebi->list, &gluebi_devices); ++ mutex_unlock(&devices_mutex); + return err; ++ } ++ + kfree(mtd->name); ++ kfree(gluebi); + return 0; + } + + /** +- * ubi_gluebi_updated - UBI volume was updated notifier. +- * @vol: volume description object ++ * gluebi_updated - UBI volume was updated notifier. ++ * @vi: volume info structure + * +- * This function is called every time an UBI volume is updated. This function +- * does nothing if volume @vol is dynamic, and changes MTD device size if the ++ * This function is called every time an UBI volume is updated. It does nothing ++ * if te volume @vol is dynamic, and changes MTD device size if the + * volume is static. This is needed because static volumes cannot be read past +- * data they contain. ++ * data they contain. This function returns zero in case of success and a ++ * negative error code in case of error. + */ +-void ubi_gluebi_updated(struct ubi_volume *vol) ++static int gluebi_updated(struct ubi_volume_info *vi) + { +- struct mtd_info *mtd = &vol->gluebi_mtd; ++ struct gluebi_device *gluebi; ++ ++ mutex_lock(&devices_mutex); ++ gluebi = find_gluebi_nolock(vi->ubi_num, vi->vol_id); ++ if (!gluebi) { ++ mutex_unlock(&devices_mutex); ++ err_msg("got update notification for unknown UBI device %d " ++ "volume %d", vi->ubi_num, vi->vol_id); ++ return -ENOENT; ++ } + +- if (vol->vol_type == UBI_STATIC_VOLUME) +- mtd->size = vol->used_bytes; ++ if (vi->vol_type == UBI_STATIC_VOLUME) ++ gluebi->mtd.size = vi->used_bytes; ++ mutex_unlock(&devices_mutex); ++ return 0; + } ++ ++/** ++ * gluebi_resized - UBI volume was re-sized notifier. ++ * @vi: volume info structure ++ * ++ * This function is called every time an UBI volume is re-size. It changes the ++ * corresponding fake MTD device size. This function returns zero in case of ++ * success and a negative error code in case of error. ++ */ ++static int gluebi_resized(struct ubi_volume_info *vi) ++{ ++ struct gluebi_device *gluebi; ++ ++ mutex_lock(&devices_mutex); ++ gluebi = find_gluebi_nolock(vi->ubi_num, vi->vol_id); ++ if (!gluebi) { ++ mutex_unlock(&devices_mutex); ++ err_msg("got update notification for unknown UBI device %d " ++ "volume %d", vi->ubi_num, vi->vol_id); ++ return -ENOENT; ++ } ++ gluebi->mtd.size = vi->used_bytes; ++ mutex_unlock(&devices_mutex); ++ return 0; ++} ++ ++/** ++ * gluebi_notify - UBI notification handler. ++ * @nb: registered notifier block ++ * @l: notification type ++ * @ptr: pointer to the &struct ubi_notification object ++ */ ++static int gluebi_notify(struct notifier_block *nb, unsigned long l, ++ void *ns_ptr) ++{ ++ struct ubi_notification *nt = ns_ptr; ++ ++ switch (l) { ++ case UBI_VOLUME_ADDED: ++ gluebi_create(&nt->di, &nt->vi); ++ break; ++ case UBI_VOLUME_REMOVED: ++ gluebi_remove(&nt->vi); ++ break; ++ case UBI_VOLUME_RESIZED: ++ gluebi_resized(&nt->vi); ++ break; ++ case UBI_VOLUME_UPDATED: ++ gluebi_updated(&nt->vi); ++ break; ++ default: ++ break; ++ } ++ return NOTIFY_OK; ++} ++ ++static struct notifier_block gluebi_notifier = { ++ .notifier_call = gluebi_notify, ++}; ++ ++static int __init ubi_gluebi_init(void) ++{ ++ return ubi_register_volume_notifier(&gluebi_notifier, 0); ++} ++ ++static void __exit ubi_gluebi_exit(void) ++{ ++ struct gluebi_device *gluebi, *g; ++ ++ list_for_each_entry_safe(gluebi, g, &gluebi_devices, list) { ++ int err; ++ struct mtd_info *mtd = &gluebi->mtd; ++ ++ err = del_mtd_device(mtd); ++ if (err) ++ err_msg("error %d while removing gluebi MTD device %d, " ++ "UBI device %d, volume %d - ignoring", err, ++ mtd->index, gluebi->ubi_num, gluebi->vol_id); ++ kfree(mtd->name); ++ kfree(gluebi); ++ } ++ ubi_unregister_volume_notifier(&gluebi_notifier); ++} ++ ++module_init(ubi_gluebi_init); ++module_exit(ubi_gluebi_exit); ++MODULE_DESCRIPTION("MTD emulation layer over UBI volumes"); ++MODULE_AUTHOR("Artem Bityutskiy, Joern Engel"); ++MODULE_LICENSE("GPL"); +diff -uprN linux-2.6.28/drivers/mtd/ubi/io.c ubifs-v2.6.28/drivers/mtd/ubi/io.c +--- linux-2.6.28/drivers/mtd/ubi/io.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/drivers/mtd/ubi/io.c 2011-06-15 14:22:07.000000000 -0400 +@@ -64,9 +64,9 @@ + * device, e.g., make @ubi->min_io_size = 512 in the example above? + * + * A: because when writing a sub-page, MTD still writes a full 2K page but the +- * bytes which are no relevant to the sub-page are 0xFF. So, basically, writing +- * 4x512 sub-pages is 4 times slower then writing one 2KiB NAND page. Thus, we +- * prefer to use sub-pages only for EV and VID headers. ++ * bytes which are not relevant to the sub-page are 0xFF. So, basically, ++ * writing 4x512 sub-pages is 4 times slower than writing one 2KiB NAND page. ++ * Thus, we prefer to use sub-pages only for EC and VID headers. + * + * As it was noted above, the VID header may start at a non-aligned offset. + * For example, in case of a 2KiB page NAND flash with a 512 bytes sub-page, +@@ -90,7 +90,7 @@ + #include + #include "ubi.h" + +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID ++#ifdef CONFIG_MTD_UBI_DEBUG + static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum); + static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum); + static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum, +@@ -98,15 +98,12 @@ static int paranoid_check_ec_hdr(const s + static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum); + static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum, + const struct ubi_vid_hdr *vid_hdr); +-static int paranoid_check_all_ff(struct ubi_device *ubi, int pnum, int offset, +- int len); + #else + #define paranoid_check_not_bad(ubi, pnum) 0 + #define paranoid_check_peb_ec_hdr(ubi, pnum) 0 + #define paranoid_check_ec_hdr(ubi, pnum, ec_hdr) 0 + #define paranoid_check_peb_vid_hdr(ubi, pnum) 0 + #define paranoid_check_vid_hdr(ubi, pnum, vid_hdr) 0 +-#define paranoid_check_all_ff(ubi, pnum, offset, len) 0 + #endif + + /** +@@ -146,12 +143,36 @@ int ubi_io_read(const struct ubi_device + + err = paranoid_check_not_bad(ubi, pnum); + if (err) +- return err > 0 ? -EINVAL : err; ++ return err; ++ ++ /* ++ * Deliberately corrupt the buffer to improve robustness. Indeed, if we ++ * do not do this, the following may happen: ++ * 1. The buffer contains data from previous operation, e.g., read from ++ * another PEB previously. The data looks like expected, e.g., if we ++ * just do not read anything and return - the caller would not ++ * notice this. E.g., if we are reading a VID header, the buffer may ++ * contain a valid VID header from another PEB. ++ * 2. The driver is buggy and returns us success or -EBADMSG or ++ * -EUCLEAN, but it does not actually put any data to the buffer. ++ * ++ * This may confuse UBI or upper layers - they may think the buffer ++ * contains valid data while in fact it is just old data. This is ++ * especially possible because UBI (and UBIFS) relies on CRC, and ++ * treats data as correct even in case of ECC errors if the CRC is ++ * correct. ++ * ++ * Try to prevent this situation by changing the first byte of the ++ * buffer. ++ */ ++ *((uint8_t *)buf) ^= 0xFF; + + addr = (loff_t)pnum * ubi->peb_size + offset; + retry: + err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf); + if (err) { ++ const char *errstr = (err == -EBADMSG) ? " (ECC error)" : ""; ++ + if (err == -EUCLEAN) { + /* + * -EUCLEAN is reported if there was a bit-flip which +@@ -166,16 +187,16 @@ retry: + return UBI_IO_BITFLIPS; + } + +- if (read != len && retries++ < UBI_IO_RETRIES) { +- dbg_io("error %d while reading %d bytes from PEB %d:%d," +- " read only %zd bytes, retry", +- err, len, pnum, offset, read); ++ if (retries++ < UBI_IO_RETRIES) { ++ dbg_io("error %d%s while reading %d bytes from PEB " ++ "%d:%d, read only %zd bytes, retry", ++ err, errstr, len, pnum, offset, read); + yield(); + goto retry; + } + +- ubi_err("error %d while reading %d bytes from PEB %d:%d, " +- "read %zd bytes", err, len, pnum, offset, read); ++ ubi_err("error %d%s while reading %d bytes from PEB %d:%d, " ++ "read %zd bytes", err, errstr, len, pnum, offset, read); + ubi_dbg_dump_stack(); + + /* +@@ -239,12 +260,12 @@ int ubi_io_write(struct ubi_device *ubi, + + err = paranoid_check_not_bad(ubi, pnum); + if (err) +- return err > 0 ? -EINVAL : err; ++ return err; + + /* The area we are writing to has to contain all 0xFF bytes */ +- err = paranoid_check_all_ff(ubi, pnum, offset, len); ++ err = ubi_dbg_check_all_ff(ubi, pnum, offset, len); + if (err) +- return err > 0 ? -EINVAL : err; ++ return err; + + if (offset >= ubi->leb_start) { + /* +@@ -253,10 +274,10 @@ int ubi_io_write(struct ubi_device *ubi, + */ + err = paranoid_check_peb_ec_hdr(ubi, pnum); + if (err) +- return err > 0 ? -EINVAL : err; ++ return err; + err = paranoid_check_peb_vid_hdr(ubi, pnum); + if (err) +- return err > 0 ? -EINVAL : err; ++ return err; + } + + if (ubi_dbg_is_write_failure()) { +@@ -269,12 +290,28 @@ int ubi_io_write(struct ubi_device *ubi, + addr = (loff_t)pnum * ubi->peb_size + offset; + err = ubi->mtd->write(ubi->mtd, addr, len, &written, buf); + if (err) { +- ubi_err("error %d while writing %d bytes to PEB %d:%d, written" +- " %zd bytes", err, len, pnum, offset, written); ++ ubi_err("error %d while writing %d bytes to PEB %d:%d, written " ++ "%zd bytes", err, len, pnum, offset, written); + ubi_dbg_dump_stack(); ++ ubi_dbg_dump_flash(ubi, pnum, offset, len); + } else + ubi_assert(written == len); + ++ if (!err) { ++ err = ubi_dbg_check_write(ubi, buf, pnum, offset, len); ++ if (err) ++ return err; ++ ++ /* ++ * Since we always write sequentially, the rest of the PEB has ++ * to contain only 0xFF bytes. ++ */ ++ offset += len; ++ len = ubi->peb_size - offset; ++ if (len) ++ err = ubi_dbg_check_all_ff(ubi, pnum, offset, len); ++ } ++ + return err; + } + +@@ -306,6 +343,12 @@ static int do_sync_erase(struct ubi_devi + wait_queue_head_t wq; + + dbg_io("erase PEB %d", pnum); ++ ubi_assert(pnum >= 0 && pnum < ubi->peb_count); ++ ++ if (ubi->ro_mode) { ++ ubi_err("read-only mode"); ++ return -EROFS; ++ } + + retry: + init_waitqueue_head(&wq); +@@ -348,11 +391,11 @@ retry: + return -EIO; + } + +- err = paranoid_check_all_ff(ubi, pnum, 0, ubi->peb_size); ++ err = ubi_dbg_check_all_ff(ubi, pnum, 0, ubi->peb_size); + if (err) +- return err > 0 ? -EINVAL : err; ++ return err; + +- if (ubi_dbg_is_erase_failure() && !err) { ++ if (ubi_dbg_is_erase_failure()) { + dbg_err("cannot erase PEB %d (emulated)", pnum); + return -EIO; + } +@@ -360,25 +403,6 @@ retry: + return 0; + } + +-/** +- * check_pattern - check if buffer contains only a certain byte pattern. +- * @buf: buffer to check +- * @patt: the pattern to check +- * @size: buffer size in bytes +- * +- * This function returns %1 in there are only @patt bytes in @buf, and %0 if +- * something else was also found. +- */ +-static int check_pattern(const void *buf, uint8_t patt, int size) +-{ +- int i; +- +- for (i = 0; i < size; i++) +- if (((const uint8_t *)buf)[i] != patt) +- return 0; +- return 1; +-} +- + /* Patterns to write to a physical eraseblock when torturing it */ + static uint8_t patterns[] = {0xa5, 0x5a, 0x0}; + +@@ -410,7 +434,7 @@ static int torture_peb(struct ubi_device + if (err) + goto out; + +- err = check_pattern(ubi->peb_buf1, 0xFF, ubi->peb_size); ++ err = ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->peb_size); + if (err == 0) { + ubi_err("erased PEB %d, but a non-0xFF byte found", + pnum); +@@ -429,7 +453,8 @@ static int torture_peb(struct ubi_device + if (err) + goto out; + +- err = check_pattern(ubi->peb_buf1, patterns[i], ubi->peb_size); ++ err = ubi_check_pattern(ubi->peb_buf1, patterns[i], ++ ubi->peb_size); + if (err == 0) { + ubi_err("pattern %x checking failed for PEB %d", + patterns[i], pnum); +@@ -439,7 +464,7 @@ static int torture_peb(struct ubi_device + } + + err = patt_count; +- ubi_msg("PEB %d passed torture test, do not mark it a bad", pnum); ++ ubi_msg("PEB %d passed torture test, do not mark it as bad", pnum); + + out: + mutex_unlock(&ubi->buf_mutex); +@@ -457,6 +482,92 @@ out: + } + + /** ++ * nor_erase_prepare - prepare a NOR flash PEB for erasure. ++ * @ubi: UBI device description object ++ * @pnum: physical eraseblock number to prepare ++ * ++ * NOR flash, or at least some of them, have peculiar embedded PEB erasure ++ * algorithm: the PEB is first filled with zeroes, then it is erased. And ++ * filling with zeroes starts from the end of the PEB. This was observed with ++ * Spansion S29GL512N NOR flash. ++ * ++ * This means that in case of a power cut we may end up with intact data at the ++ * beginning of the PEB, and all zeroes at the end of PEB. In other words, the ++ * EC and VID headers are OK, but a large chunk of data at the end of PEB is ++ * zeroed. This makes UBI mistakenly treat this PEB as used and associate it ++ * with an LEB, which leads to subsequent failures (e.g., UBIFS fails). ++ * ++ * This function is called before erasing NOR PEBs and it zeroes out EC and VID ++ * magic numbers in order to invalidate them and prevent the failures. Returns ++ * zero in case of success and a negative error code in case of failure. ++ */ ++static int nor_erase_prepare(struct ubi_device *ubi, int pnum) ++{ ++ int err, err1; ++ size_t written; ++ loff_t addr; ++ uint32_t data = 0; ++ /* ++ * Note, we cannot generally define VID header buffers on stack, ++ * because of the way we deal with these buffers (see the header ++ * comment in this file). But we know this is a NOR-specific piece of ++ * code, so we can do this. But yes, this is error-prone and we should ++ * (pre-)allocate VID header buffer instead. ++ */ ++ struct ubi_vid_hdr vid_hdr; ++ ++ /* ++ * It is important to first invalidate the EC header, and then the VID ++ * header. Otherwise a power cut may lead to valid EC header and ++ * invalid VID header, in which case UBI will treat this PEB as ++ * corrupted and will try to preserve it, and print scary warnings (see ++ * the header comment in scan.c for more information). ++ */ ++ addr = (loff_t)pnum * ubi->peb_size; ++ err = ubi->mtd->write(ubi->mtd, addr, 4, &written, (void *)&data); ++ if (!err) { ++ addr += ubi->vid_hdr_aloffset; ++ err = ubi->mtd->write(ubi->mtd, addr, 4, &written, ++ (void *)&data); ++ if (!err) ++ return 0; ++ } ++ ++ /* ++ * We failed to write to the media. This was observed with Spansion ++ * S29GL512N NOR flash. Most probably the previously eraseblock erasure ++ * was interrupted at a very inappropriate moment, so it became ++ * unwritable. In this case we probably anyway have garbage in this ++ * PEB. ++ */ ++ err1 = ubi_io_read_vid_hdr(ubi, pnum, &vid_hdr, 0); ++ if (err1 == UBI_IO_BAD_HDR_EBADMSG || err1 == UBI_IO_BAD_HDR || ++ err1 == UBI_IO_FF) { ++ struct ubi_ec_hdr ec_hdr; ++ ++ err1 = ubi_io_read_ec_hdr(ubi, pnum, &ec_hdr, 0); ++ if (err1 == UBI_IO_BAD_HDR_EBADMSG || err1 == UBI_IO_BAD_HDR || ++ err1 == UBI_IO_FF) ++ /* ++ * Both VID and EC headers are corrupted, so we can ++ * safely erase this PEB and not afraid that it will be ++ * treated as a valid PEB in case of an unclean reboot. ++ */ ++ return 0; ++ } ++ ++ /* ++ * The PEB contains a valid VID header, but we cannot invalidate it. ++ * Supposedly the flash media or the driver is screwed up, so return an ++ * error. ++ */ ++ ubi_err("cannot invalidate PEB %d, write returned %d read returned %d", ++ pnum, err, err1); ++ ubi_dbg_dump_flash(ubi, pnum, 0, ubi->peb_size); ++ return -EIO; ++} ++ ++/** + * ubi_io_sync_erase - synchronously erase a physical eraseblock. + * @ubi: UBI device description object + * @pnum: physical eraseblock number to erase +@@ -465,7 +576,7 @@ out: + * This function synchronously erases physical eraseblock @pnum. If @torture + * flag is not zero, the physical eraseblock is checked by means of writing + * different patterns to it and reading them back. If the torturing is enabled, +- * the physical eraseblock is erased more then once. ++ * the physical eraseblock is erased more than once. + * + * This function returns the number of erasures made in case of success, %-EIO + * if the erasure failed or the torturing test failed, and other negative error +@@ -480,13 +591,19 @@ int ubi_io_sync_erase(struct ubi_device + + err = paranoid_check_not_bad(ubi, pnum); + if (err != 0) +- return err > 0 ? -EINVAL : err; ++ return err; + + if (ubi->ro_mode) { + ubi_err("read-only mode"); + return -EROFS; + } + ++ if (ubi->nor_flash) { ++ err = nor_erase_prepare(ubi, pnum); ++ if (err) ++ return err; ++ } ++ + if (torture) { + ret = torture_peb(ubi, pnum); + if (ret < 0) +@@ -566,16 +683,15 @@ int ubi_io_mark_bad(const struct ubi_dev + * This function returns zero if the erase counter header is OK, and %1 if + * not. + */ +-static int validate_ec_hdr(struct ubi_device *ubi, ++static int validate_ec_hdr(const struct ubi_device *ubi, + const struct ubi_ec_hdr *ec_hdr) + { + long long ec; +- int vid_hdr_offset, leb_start, image_seq; ++ int vid_hdr_offset, leb_start; + + ec = be64_to_cpu(ec_hdr->ec); + vid_hdr_offset = be32_to_cpu(ec_hdr->vid_hdr_offset); + leb_start = be32_to_cpu(ec_hdr->data_offset); +- image_seq = be32_to_cpu(ec_hdr->image_seq); + + if (ec_hdr->version != UBI_VERSION) { + ubi_err("node with incompatible UBI version found: " +@@ -601,15 +717,6 @@ static int validate_ec_hdr(struct ubi_de + goto bad; + } + +- if (!ubi->image_seq_set) { +- ubi->image_seq = image_seq; +- ubi->image_seq_set = 1; +- } else if (ubi->image_seq && image_seq && ubi->image_seq != image_seq) { +- ubi_err("bad image sequence number %d, expected %d", +- image_seq, ubi->image_seq); +- goto bad; +- } +- + return 0; + + bad: +@@ -635,68 +742,58 @@ bad: + * o %UBI_IO_BITFLIPS if the CRC is correct, but bit-flips were detected + * and corrected by the flash driver; this is harmless but may indicate that + * this eraseblock may become bad soon (but may be not); +- * o %UBI_IO_BAD_EC_HDR if the erase counter header is corrupted (a CRC error); +- * o %UBI_IO_PEB_EMPTY if the physical eraseblock is empty; ++ * o %UBI_IO_BAD_HDR if the erase counter header is corrupted (a CRC error); ++ * o %UBI_IO_BAD_HDR_EBADMSG is the same as %UBI_IO_BAD_HDR, but there also was ++ * a data integrity error (uncorrectable ECC error in case of NAND); ++ * o %UBI_IO_FF if only 0xFF bytes were read (the PEB is supposedly empty) + * o a negative error code in case of failure. + */ + int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum, + struct ubi_ec_hdr *ec_hdr, int verbose) + { +- int err, read_err = 0; ++ int err, read_err; + uint32_t crc, magic, hdr_crc; + + dbg_io("read EC header from PEB %d", pnum); + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + +- err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); +- if (err) { +- if (err != UBI_IO_BITFLIPS && err != -EBADMSG) +- return err; ++ read_err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE); ++ if (read_err) { ++ if (read_err != UBI_IO_BITFLIPS && read_err != -EBADMSG) ++ return read_err; + + /* + * We read all the data, but either a correctable bit-flip +- * occurred, or MTD reported about some data integrity error, +- * like an ECC error in case of NAND. The former is harmless, +- * the later may mean that the read data is corrupted. But we +- * have a CRC check-sum and we will detect this. If the EC +- * header is still OK, we just report this as there was a +- * bit-flip. ++ * occurred, or MTD reported a data integrity error ++ * (uncorrectable ECC error in case of NAND). The former is ++ * harmless, the later may mean that the read data is ++ * corrupted. But we have a CRC check-sum and we will detect ++ * this. If the EC header is still OK, we just report this as ++ * there was a bit-flip, to force scrubbing. + */ +- read_err = err; + } + + magic = be32_to_cpu(ec_hdr->magic); + if (magic != UBI_EC_HDR_MAGIC) { ++ if (read_err == -EBADMSG) ++ return UBI_IO_BAD_HDR_EBADMSG; ++ + /* + * The magic field is wrong. Let's check if we have read all + * 0xFF. If yes, this physical eraseblock is assumed to be + * empty. +- * +- * But if there was a read error, we do not test it for all +- * 0xFFs. Even if it does contain all 0xFFs, this error +- * indicates that something is still wrong with this physical +- * eraseblock and we anyway cannot treat it as empty. + */ +- if (read_err != -EBADMSG && +- check_pattern(ec_hdr, 0xFF, UBI_EC_HDR_SIZE)) { ++ if (ubi_check_pattern(ec_hdr, 0xFF, UBI_EC_HDR_SIZE)) { + /* The physical eraseblock is supposedly empty */ +- +- /* +- * The below is just a paranoid check, it has to be +- * compiled out if paranoid checks are disabled. +- */ +- err = paranoid_check_all_ff(ubi, pnum, 0, +- ubi->peb_size); +- if (err) +- return err > 0 ? UBI_IO_BAD_EC_HDR : err; +- + if (verbose) + ubi_warn("no EC header found at PEB %d, " + "only 0xFF bytes", pnum); +- else if (UBI_IO_DEBUG) +- dbg_msg("no EC header found at PEB %d, " +- "only 0xFF bytes", pnum); +- return UBI_IO_PEB_EMPTY; ++ dbg_bld("no EC header found at PEB %d, " ++ "only 0xFF bytes", pnum); ++ if (!read_err) ++ return UBI_IO_FF; ++ else ++ return UBI_IO_FF_BITFLIPS; + } + + /* +@@ -707,10 +804,10 @@ int ubi_io_read_ec_hdr(struct ubi_device + ubi_warn("bad magic number at PEB %d: %08x instead of " + "%08x", pnum, magic, UBI_EC_HDR_MAGIC); + ubi_dbg_dump_ec_hdr(ec_hdr); +- } else if (UBI_IO_DEBUG) +- dbg_msg("bad magic number at PEB %d: %08x instead of " +- "%08x", pnum, magic, UBI_EC_HDR_MAGIC); +- return UBI_IO_BAD_EC_HDR; ++ } ++ dbg_bld("bad magic number at PEB %d: %08x instead of " ++ "%08x", pnum, magic, UBI_EC_HDR_MAGIC); ++ return UBI_IO_BAD_HDR; + } + + crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC); +@@ -721,10 +818,14 @@ int ubi_io_read_ec_hdr(struct ubi_device + ubi_warn("bad EC header CRC at PEB %d, calculated " + "%#08x, read %#08x", pnum, crc, hdr_crc); + ubi_dbg_dump_ec_hdr(ec_hdr); +- } else if (UBI_IO_DEBUG) +- dbg_msg("bad EC header CRC at PEB %d, calculated " +- "%#08x, read %#08x", pnum, crc, hdr_crc); +- return UBI_IO_BAD_EC_HDR; ++ } ++ dbg_bld("bad EC header CRC at PEB %d, calculated " ++ "%#08x, read %#08x", pnum, crc, hdr_crc); ++ ++ if (!read_err) ++ return UBI_IO_BAD_HDR; ++ else ++ return UBI_IO_BAD_HDR_EBADMSG; + } + + /* And of course validate what has just been read from the media */ +@@ -734,6 +835,10 @@ int ubi_io_read_ec_hdr(struct ubi_device + return -EINVAL; + } + ++ /* ++ * If there was %-EBADMSG, but the header CRC is still OK, report about ++ * a bit-flip to force scrubbing on this PEB. ++ */ + return read_err ? UBI_IO_BITFLIPS : 0; + } + +@@ -771,7 +876,7 @@ int ubi_io_write_ec_hdr(struct ubi_devic + + err = paranoid_check_ec_hdr(ubi, pnum, ec_hdr); + if (err) +- return -EINVAL; ++ return err; + + err = ubi_io_write(ubi, ec_hdr, pnum, 0, ubi->ec_hdr_alsize); + return err; +@@ -907,22 +1012,16 @@ bad: + * + * This function reads the volume identifier header from physical eraseblock + * @pnum and stores it in @vid_hdr. It also checks CRC checksum of the read +- * volume identifier header. The following codes may be returned: ++ * volume identifier header. The error codes are the same as in ++ * 'ubi_io_read_ec_hdr()'. + * +- * o %0 if the CRC checksum is correct and the header was successfully read; +- * o %UBI_IO_BITFLIPS if the CRC is correct, but bit-flips were detected +- * and corrected by the flash driver; this is harmless but may indicate that +- * this eraseblock may become bad soon; +- * o %UBI_IO_BAD_VID_HDR if the volume identifier header is corrupted (a CRC +- * error detected); +- * o %UBI_IO_PEB_FREE if the physical eraseblock is free (i.e., there is no VID +- * header there); +- * o a negative error code in case of failure. ++ * Note, the implementation of this function is also very similar to ++ * 'ubi_io_read_ec_hdr()', so refer commentaries in 'ubi_io_read_ec_hdr()'. + */ + int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum, + struct ubi_vid_hdr *vid_hdr, int verbose) + { +- int err, read_err = 0; ++ int err, read_err; + uint32_t crc, magic, hdr_crc; + void *p; + +@@ -930,68 +1029,36 @@ int ubi_io_read_vid_hdr(struct ubi_devic + ubi_assert(pnum >= 0 && pnum < ubi->peb_count); + + p = (char *)vid_hdr - ubi->vid_hdr_shift; +- err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, ++ read_err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset, + ubi->vid_hdr_alsize); +- if (err) { +- if (err != UBI_IO_BITFLIPS && err != -EBADMSG) +- return err; +- +- /* +- * We read all the data, but either a correctable bit-flip +- * occurred, or MTD reported about some data integrity error, +- * like an ECC error in case of NAND. The former is harmless, +- * the later may mean the read data is corrupted. But we have a +- * CRC check-sum and we will identify this. If the VID header is +- * still OK, we just report this as there was a bit-flip. +- */ +- read_err = err; +- } ++ if (read_err && read_err != UBI_IO_BITFLIPS && read_err != -EBADMSG) ++ return read_err; + + magic = be32_to_cpu(vid_hdr->magic); + if (magic != UBI_VID_HDR_MAGIC) { +- /* +- * If we have read all 0xFF bytes, the VID header probably does +- * not exist and the physical eraseblock is assumed to be free. +- * +- * But if there was a read error, we do not test the data for +- * 0xFFs. Even if it does contain all 0xFFs, this error +- * indicates that something is still wrong with this physical +- * eraseblock and it cannot be regarded as free. +- */ +- if (read_err != -EBADMSG && +- check_pattern(vid_hdr, 0xFF, UBI_VID_HDR_SIZE)) { +- /* The physical eraseblock is supposedly free */ +- +- /* +- * The below is just a paranoid check, it has to be +- * compiled out if paranoid checks are disabled. +- */ +- err = paranoid_check_all_ff(ubi, pnum, ubi->leb_start, +- ubi->leb_size); +- if (err) +- return err > 0 ? UBI_IO_BAD_VID_HDR : err; ++ if (read_err == -EBADMSG) ++ return UBI_IO_BAD_HDR_EBADMSG; + ++ if (ubi_check_pattern(vid_hdr, 0xFF, UBI_VID_HDR_SIZE)) { + if (verbose) + ubi_warn("no VID header found at PEB %d, " + "only 0xFF bytes", pnum); +- else if (UBI_IO_DEBUG) +- dbg_msg("no VID header found at PEB %d, " +- "only 0xFF bytes", pnum); +- return UBI_IO_PEB_FREE; ++ dbg_bld("no VID header found at PEB %d, " ++ "only 0xFF bytes", pnum); ++ if (!read_err) ++ return UBI_IO_FF; ++ else ++ return UBI_IO_FF_BITFLIPS; + } + +- /* +- * This is not a valid VID header, and these are not 0xFF +- * bytes. Report that the header is corrupted. +- */ + if (verbose) { + ubi_warn("bad magic number at PEB %d: %08x instead of " + "%08x", pnum, magic, UBI_VID_HDR_MAGIC); + ubi_dbg_dump_vid_hdr(vid_hdr); +- } else if (UBI_IO_DEBUG) +- dbg_msg("bad magic number at PEB %d: %08x instead of " +- "%08x", pnum, magic, UBI_VID_HDR_MAGIC); +- return UBI_IO_BAD_VID_HDR; ++ } ++ dbg_bld("bad magic number at PEB %d: %08x instead of " ++ "%08x", pnum, magic, UBI_VID_HDR_MAGIC); ++ return UBI_IO_BAD_HDR; + } + + crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC); +@@ -1002,13 +1069,15 @@ int ubi_io_read_vid_hdr(struct ubi_devic + ubi_warn("bad CRC at PEB %d, calculated %#08x, " + "read %#08x", pnum, crc, hdr_crc); + ubi_dbg_dump_vid_hdr(vid_hdr); +- } else if (UBI_IO_DEBUG) +- dbg_msg("bad CRC at PEB %d, calculated %#08x, " +- "read %#08x", pnum, crc, hdr_crc); +- return UBI_IO_BAD_VID_HDR; ++ } ++ dbg_bld("bad CRC at PEB %d, calculated %#08x, " ++ "read %#08x", pnum, crc, hdr_crc); ++ if (!read_err) ++ return UBI_IO_BAD_HDR; ++ else ++ return UBI_IO_BAD_HDR_EBADMSG; + } + +- /* Validate the VID header that we have just read */ + err = validate_vid_hdr(ubi, vid_hdr); + if (err) { + ubi_err("validation failed for PEB %d", pnum); +@@ -1045,7 +1114,7 @@ int ubi_io_write_vid_hdr(struct ubi_devi + + err = paranoid_check_peb_ec_hdr(ubi, pnum); + if (err) +- return err > 0 ? -EINVAL : err; ++ return err; + + vid_hdr->magic = cpu_to_be32(UBI_VID_HDR_MAGIC); + vid_hdr->version = UBI_VERSION; +@@ -1054,7 +1123,7 @@ int ubi_io_write_vid_hdr(struct ubi_devi + + err = paranoid_check_vid_hdr(ubi, pnum, vid_hdr); + if (err) +- return -EINVAL; ++ return err; + + p = (char *)vid_hdr - ubi->vid_hdr_shift; + err = ubi_io_write(ubi, p, pnum, ubi->vid_hdr_aloffset, +@@ -1062,27 +1131,30 @@ int ubi_io_write_vid_hdr(struct ubi_devi + return err; + } + +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID ++#ifdef CONFIG_MTD_UBI_DEBUG + + /** + * paranoid_check_not_bad - ensure that a physical eraseblock is not bad. + * @ubi: UBI device description object + * @pnum: physical eraseblock number to check + * +- * This function returns zero if the physical eraseblock is good, a positive +- * number if it is bad and a negative error code if an error occurred. ++ * This function returns zero if the physical eraseblock is good, %-EINVAL if ++ * it is bad and a negative error code if an error occurred. + */ + static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum) + { + int err; + ++ if (!(ubi_chk_flags & UBI_CHK_IO)) ++ return 0; ++ + err = ubi_io_is_bad(ubi, pnum); + if (!err) + return err; + + ubi_err("paranoid check failed for PEB %d", pnum); + ubi_dbg_dump_stack(); +- return err; ++ return err > 0 ? -EINVAL : err; + } + + /** +@@ -1092,7 +1164,7 @@ static int paranoid_check_not_bad(const + * @ec_hdr: the erase counter header to check + * + * This function returns zero if the erase counter header contains valid +- * values, and %1 if not. ++ * values, and %-EINVAL if not. + */ + static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum, + const struct ubi_ec_hdr *ec_hdr) +@@ -1100,6 +1172,9 @@ static int paranoid_check_ec_hdr(const s + int err; + uint32_t magic; + ++ if (!(ubi_chk_flags & UBI_CHK_IO)) ++ return 0; ++ + magic = be32_to_cpu(ec_hdr->magic); + if (magic != UBI_EC_HDR_MAGIC) { + ubi_err("bad magic %#08x, must be %#08x", +@@ -1118,7 +1193,7 @@ static int paranoid_check_ec_hdr(const s + fail: + ubi_dbg_dump_ec_hdr(ec_hdr); + ubi_dbg_dump_stack(); +- return 1; ++ return -EINVAL; + } + + /** +@@ -1126,8 +1201,8 @@ fail: + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * +- * This function returns zero if the erase counter header is all right, %1 if +- * not, and a negative error code if an error occurred. ++ * This function returns zero if the erase counter header is all right and and ++ * a negative error code if not or if an error occurred. + */ + static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum) + { +@@ -1135,6 +1210,9 @@ static int paranoid_check_peb_ec_hdr(con + uint32_t crc, hdr_crc; + struct ubi_ec_hdr *ec_hdr; + ++ if (!(ubi_chk_flags & UBI_CHK_IO)) ++ return 0; ++ + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); + if (!ec_hdr) + return -ENOMEM; +@@ -1150,7 +1228,7 @@ static int paranoid_check_peb_ec_hdr(con + ubi_err("paranoid check failed for PEB %d", pnum); + ubi_dbg_dump_ec_hdr(ec_hdr); + ubi_dbg_dump_stack(); +- err = 1; ++ err = -EINVAL; + goto exit; + } + +@@ -1168,7 +1246,7 @@ exit: + * @vid_hdr: the volume identifier header to check + * + * This function returns zero if the volume identifier header is all right, and +- * %1 if not. ++ * %-EINVAL if not. + */ + static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum, + const struct ubi_vid_hdr *vid_hdr) +@@ -1176,6 +1254,9 @@ static int paranoid_check_vid_hdr(const + int err; + uint32_t magic; + ++ if (!(ubi_chk_flags & UBI_CHK_IO)) ++ return 0; ++ + magic = be32_to_cpu(vid_hdr->magic); + if (magic != UBI_VID_HDR_MAGIC) { + ubi_err("bad VID header magic %#08x at PEB %d, must be %#08x", +@@ -1195,7 +1276,7 @@ fail: + ubi_err("paranoid check failed for PEB %d", pnum); + ubi_dbg_dump_vid_hdr(vid_hdr); + ubi_dbg_dump_stack(); +- return 1; ++ return -EINVAL; + + } + +@@ -1205,7 +1286,7 @@ fail: + * @pnum: the physical eraseblock number to check + * + * This function returns zero if the volume identifier header is all right, +- * %1 if not, and a negative error code if an error occurred. ++ * and a negative error code if not or if an error occurred. + */ + static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum) + { +@@ -1214,6 +1295,9 @@ static int paranoid_check_peb_vid_hdr(co + struct ubi_vid_hdr *vid_hdr; + void *p; + ++ if (!(ubi_chk_flags & UBI_CHK_IO)) ++ return 0; ++ + vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS); + if (!vid_hdr) + return -ENOMEM; +@@ -1232,7 +1316,7 @@ static int paranoid_check_peb_vid_hdr(co + ubi_err("paranoid check failed for PEB %d", pnum); + ubi_dbg_dump_vid_hdr(vid_hdr); + ubi_dbg_dump_stack(); +- err = 1; ++ err = -EINVAL; + goto exit; + } + +@@ -1244,51 +1328,124 @@ exit: + } + + /** +- * paranoid_check_all_ff - check that a region of flash is empty. ++ * ubi_dbg_check_write - make sure write succeeded. ++ * @ubi: UBI device description object ++ * @buf: buffer with data which were written ++ * @pnum: physical eraseblock number the data were written to ++ * @offset: offset within the physical eraseblock the data were written to ++ * @len: how many bytes were written ++ * ++ * This functions reads data which were recently written and compares it with ++ * the original data buffer - the data have to match. Returns zero if the data ++ * match and a negative error code if not or in case of failure. ++ */ ++int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum, ++ int offset, int len) ++{ ++ int err, i; ++ size_t read; ++ void *buf1; ++ loff_t addr = (loff_t)pnum * ubi->peb_size + offset; ++ ++ if (!(ubi_chk_flags & UBI_CHK_IO)) ++ return 0; ++ ++ buf1 = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); ++ if (!buf1) { ++ ubi_err("cannot allocate memory to check writes"); ++ return 0; ++ } ++ ++ err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf1); ++ if (err && err != -EUCLEAN) ++ goto out_free; ++ ++ for (i = 0; i < len; i++) { ++ uint8_t c = ((uint8_t *)buf)[i]; ++ uint8_t c1 = ((uint8_t *)buf1)[i]; ++ int dump_len; ++ ++ if (c == c1) ++ continue; ++ ++ ubi_err("paranoid check failed for PEB %d:%d, len %d", ++ pnum, offset, len); ++ ubi_msg("data differ at position %d", i); ++ dump_len = max_t(int, 128, len - i); ++ ubi_msg("hex dump of the original buffer from %d to %d", ++ i, i + dump_len); ++ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, ++ buf + i, dump_len, 1); ++ ubi_msg("hex dump of the read buffer from %d to %d", ++ i, i + dump_len); ++ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, ++ buf1 + i, dump_len, 1); ++ ubi_dbg_dump_stack(); ++ err = -EINVAL; ++ goto out_free; ++ } ++ ++ vfree(buf1); ++ return 0; ++ ++out_free: ++ vfree(buf1); ++ return err; ++} ++ ++/** ++ * ubi_dbg_check_all_ff - check that a region of flash is empty. + * @ubi: UBI device description object + * @pnum: the physical eraseblock number to check + * @offset: the starting offset within the physical eraseblock to check + * @len: the length of the region to check + * + * This function returns zero if only 0xFF bytes are present at offset +- * @offset of the physical eraseblock @pnum, %1 if not, and a negative error +- * code if an error occurred. ++ * @offset of the physical eraseblock @pnum, and a negative error code if not ++ * or if an error occurred. + */ +-static int paranoid_check_all_ff(struct ubi_device *ubi, int pnum, int offset, +- int len) ++int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len) + { + size_t read; + int err; ++ void *buf; + loff_t addr = (loff_t)pnum * ubi->peb_size + offset; + +- mutex_lock(&ubi->dbg_buf_mutex); +- err = ubi->mtd->read(ubi->mtd, addr, len, &read, ubi->dbg_peb_buf); ++ if (!(ubi_chk_flags & UBI_CHK_IO)) ++ return 0; ++ ++ buf = __vmalloc(len, GFP_NOFS, PAGE_KERNEL); ++ if (!buf) { ++ ubi_err("cannot allocate memory to check for 0xFFs"); ++ return 0; ++ } ++ ++ err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf); + if (err && err != -EUCLEAN) { + ubi_err("error %d while reading %d bytes from PEB %d:%d, " + "read %zd bytes", err, len, pnum, offset, read); + goto error; + } + +- err = check_pattern(ubi->dbg_peb_buf, 0xFF, len); ++ err = ubi_check_pattern(buf, 0xFF, len); + if (err == 0) { + ubi_err("flash region at PEB %d:%d, length %d does not " + "contain all 0xFF bytes", pnum, offset, len); + goto fail; + } +- mutex_unlock(&ubi->dbg_buf_mutex); + ++ vfree(buf); + return 0; + + fail: + ubi_err("paranoid check failed for PEB %d", pnum); + ubi_msg("hex dump of the %d-%d region", offset, offset + len); +- print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, +- ubi->dbg_peb_buf, len, 1); +- err = 1; ++ print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, buf, len, 1); ++ err = -EINVAL; + error: + ubi_dbg_dump_stack(); +- mutex_unlock(&ubi->dbg_buf_mutex); ++ vfree(buf); + return err; + } + +-#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ ++#endif /* CONFIG_MTD_UBI_DEBUG */ +diff -uprN linux-2.6.28/drivers/mtd/ubi/kapi.c ubifs-v2.6.28/drivers/mtd/ubi/kapi.c +--- linux-2.6.28/drivers/mtd/ubi/kapi.c 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/drivers/mtd/ubi/kapi.c 2011-06-15 14:22:07.000000000 -0400 +@@ -22,10 +22,32 @@ + + #include + #include ++#include ++#include + #include + #include "ubi.h" + + /** ++ * ubi_do_get_device_info - get information about UBI device. ++ * @ubi: UBI device description object ++ * @di: the information is stored here ++ * ++ * This function is the same as 'ubi_get_device_info()', but it assumes the UBI ++ * device is locked and cannot disappear. ++ */ ++void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di) ++{ ++ di->ubi_num = ubi->ubi_num; ++ di->leb_size = ubi->leb_size; ++ di->leb_start = ubi->leb_start; ++ di->min_io_size = ubi->min_io_size; ++ di->max_write_size = ubi->max_write_size; ++ di->ro_mode = ubi->ro_mode; ++ di->cdev = ubi->cdev.dev; ++} ++EXPORT_SYMBOL_GPL(ubi_do_get_device_info); ++ ++/** + * ubi_get_device_info - get information about UBI device. + * @ubi_num: UBI device number + * @di: the information is stored here +@@ -39,33 +61,24 @@ int ubi_get_device_info(int ubi_num, str + + if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) + return -EINVAL; +- + ubi = ubi_get_device(ubi_num); + if (!ubi) + return -ENODEV; +- +- di->ubi_num = ubi->ubi_num; +- di->leb_size = ubi->leb_size; +- di->min_io_size = ubi->min_io_size; +- di->ro_mode = ubi->ro_mode; +- di->cdev = ubi->cdev.dev; +- ++ ubi_do_get_device_info(ubi, di); + ubi_put_device(ubi); + return 0; + } + EXPORT_SYMBOL_GPL(ubi_get_device_info); + + /** +- * ubi_get_volume_info - get information about UBI volume. +- * @desc: volume descriptor ++ * ubi_do_get_volume_info - get information about UBI volume. ++ * @ubi: UBI device description object ++ * @vol: volume description object + * @vi: the information is stored here + */ +-void ubi_get_volume_info(struct ubi_volume_desc *desc, +- struct ubi_volume_info *vi) ++void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol, ++ struct ubi_volume_info *vi) + { +- const struct ubi_volume *vol = desc->vol; +- const struct ubi_device *ubi = vol->ubi; +- + vi->vol_id = vol->vol_id; + vi->ubi_num = ubi->ubi_num; + vi->size = vol->reserved_pebs; +@@ -79,6 +92,17 @@ void ubi_get_volume_info(struct ubi_volu + vi->name = vol->name; + vi->cdev = vol->cdev.dev; + } ++ ++/** ++ * ubi_get_volume_info - get information about UBI volume. ++ * @desc: volume descriptor ++ * @vi: the information is stored here ++ */ ++void ubi_get_volume_info(struct ubi_volume_desc *desc, ++ struct ubi_volume_info *vi) ++{ ++ ubi_do_get_volume_info(desc->vol->ubi, desc->vol, vi); ++} + EXPORT_SYMBOL_GPL(ubi_get_volume_info); + + /** +@@ -106,7 +130,7 @@ struct ubi_volume_desc *ubi_open_volume( + struct ubi_device *ubi; + struct ubi_volume *vol; + +- dbg_gen("open device %d volume %d, mode %d", ubi_num, vol_id, mode); ++ dbg_gen("open device %d, volume %d, mode %d", ubi_num, vol_id, mode); + + if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES) + return ERR_PTR(-EINVAL); +@@ -196,6 +220,8 @@ out_free: + kfree(desc); + out_put_ubi: + ubi_put_device(ubi); ++ dbg_err("cannot open device %d, volume %d, error %d", ++ ubi_num, vol_id, err); + return ERR_PTR(err); + } + EXPORT_SYMBOL_GPL(ubi_open_volume); +@@ -215,7 +241,7 @@ struct ubi_volume_desc *ubi_open_volume_ + struct ubi_device *ubi; + struct ubi_volume_desc *ret; + +- dbg_gen("open volume %s, mode %d", name, mode); ++ dbg_gen("open device %d, volume %s, mode %d", ubi_num, name, mode); + + if (!name) + return ERR_PTR(-EINVAL); +@@ -258,6 +284,43 @@ struct ubi_volume_desc *ubi_open_volume_ + EXPORT_SYMBOL_GPL(ubi_open_volume_nm); + + /** ++ * ubi_open_volume_path - open UBI volume by its character device node path. ++ * @pathname: volume character device node path ++ * @mode: open mode ++ * ++ * This function is similar to 'ubi_open_volume()', but opens a volume the path ++ * to its character device node. ++ */ ++struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode) ++{ ++ int error, ubi_num, vol_id, mod; ++ struct inode *inode; ++ struct path path; ++ ++ dbg_gen("open volume %s, mode %d", pathname, mode); ++ ++ if (!pathname || !*pathname) ++ return ERR_PTR(-EINVAL); ++ ++ error = kern_path(pathname, LOOKUP_FOLLOW, &path); ++ if (error) ++ return ERR_PTR(error); ++ ++ inode = path.dentry->d_inode; ++ mod = inode->i_mode; ++ ubi_num = ubi_major2num(imajor(inode)); ++ vol_id = iminor(inode) - 1; ++ path_put(&path); ++ ++ if (!S_ISCHR(mod)) ++ return ERR_PTR(-EINVAL); ++ if (vol_id >= 0 && ubi_num >= 0) ++ return ubi_open_volume(ubi_num, vol_id, mode); ++ return ERR_PTR(-ENODEV); ++} ++EXPORT_SYMBOL_GPL(ubi_open_volume_path); ++ ++/** + * ubi_close_volume - close UBI volume. + * @desc: volume descriptor + */ +@@ -266,7 +329,8 @@ void ubi_close_volume(struct ubi_volume_ + struct ubi_volume *vol = desc->vol; + struct ubi_device *ubi = vol->ubi; + +- dbg_gen("close volume %d, mode %d", vol->vol_id, desc->mode); ++ dbg_gen("close device %d, volume %d, mode %d", ++ ubi->ubi_num, vol->vol_id, desc->mode); + + spin_lock(&ubi->volumes_lock); + switch (desc->mode) { +@@ -425,7 +489,7 @@ EXPORT_SYMBOL_GPL(ubi_leb_write); + * + * This function changes the contents of a logical eraseblock atomically. @buf + * has to contain new logical eraseblock data, and @len - the length of the +- * data, which has to be aligned. The length may be shorter then the logical ++ * data, which has to be aligned. The length may be shorter than the logical + * eraseblock size, ant the logical eraseblock may be appended to more times + * later on. This function guarantees that in case of an unclean reboot the old + * contents is preserved. Returns zero in case of success and a negative error +@@ -508,7 +572,7 @@ EXPORT_SYMBOL_GPL(ubi_leb_erase); + * + * This function un-maps logical eraseblock @lnum and schedules the + * corresponding physical eraseblock for erasure, so that it will eventually be +- * physically erased in background. This operation is much faster then the ++ * physically erased in background. This operation is much faster than the + * erase operation. + * + * Unlike erase, the un-map operation does not guarantee that the logical +@@ -527,7 +591,7 @@ EXPORT_SYMBOL_GPL(ubi_leb_erase); + * + * The main and obvious use-case of this function is when the contents of a + * logical eraseblock has to be re-written. Then it is much more efficient to +- * first un-map it, then write new data, rather then first erase it, then write ++ * first un-map it, then write new data, rather than first erase it, then write + * new data. Note, once new data has been written to the logical eraseblock, + * UBI guarantees that the old contents has gone forever. In other words, if an + * unclean reboot happens after the logical eraseblock has been un-mapped and +@@ -558,13 +622,13 @@ int ubi_leb_unmap(struct ubi_volume_desc + EXPORT_SYMBOL_GPL(ubi_leb_unmap); + + /** +- * ubi_leb_map - map logical erasblock to a physical eraseblock. ++ * ubi_leb_map - map logical eraseblock to a physical eraseblock. + * @desc: volume descriptor + * @lnum: logical eraseblock number + * @dtype: expected data type + * + * This function maps an un-mapped logical eraseblock @lnum to a physical +- * eraseblock. This means, that after a successfull invocation of this ++ * eraseblock. This means, that after a successful invocation of this + * function the logical eraseblock @lnum will be empty (contain only %0xFF + * bytes) and be mapped to a physical eraseblock, even if an unclean reboot + * happens. +@@ -656,3 +720,59 @@ int ubi_sync(int ubi_num) + return 0; + } + EXPORT_SYMBOL_GPL(ubi_sync); ++ ++BLOCKING_NOTIFIER_HEAD(ubi_notifiers); ++ ++/** ++ * ubi_register_volume_notifier - register a volume notifier. ++ * @nb: the notifier description object ++ * @ignore_existing: if non-zero, do not send "added" notification for all ++ * already existing volumes ++ * ++ * This function registers a volume notifier, which means that ++ * 'nb->notifier_call()' will be invoked when an UBI volume is created, ++ * removed, re-sized, re-named, or updated. The first argument of the function ++ * is the notification type. The second argument is pointer to a ++ * &struct ubi_notification object which describes the notification event. ++ * Using UBI API from the volume notifier is prohibited. ++ * ++ * This function returns zero in case of success and a negative error code ++ * in case of failure. ++ */ ++int ubi_register_volume_notifier(struct notifier_block *nb, ++ int ignore_existing) ++{ ++ int err; ++ ++ err = blocking_notifier_chain_register(&ubi_notifiers, nb); ++ if (err != 0) ++ return err; ++ if (ignore_existing) ++ return 0; ++ ++ /* ++ * We are going to walk all UBI devices and all volumes, and ++ * notify the user about existing volumes by the %UBI_VOLUME_ADDED ++ * event. We have to lock the @ubi_devices_mutex to make sure UBI ++ * devices do not disappear. ++ */ ++ mutex_lock(&ubi_devices_mutex); ++ ubi_enumerate_volumes(nb); ++ mutex_unlock(&ubi_devices_mutex); ++ ++ return err; ++} ++EXPORT_SYMBOL_GPL(ubi_register_volume_notifier); ++ ++/** ++ * ubi_unregister_volume_notifier - unregister the volume notifier. ++ * @nb: the notifier description object ++ * ++ * This function unregisters volume notifier @nm and returns zero in case of ++ * success and a negative error code in case of failure. ++ */ ++int ubi_unregister_volume_notifier(struct notifier_block *nb) ++{ ++ return blocking_notifier_chain_unregister(&ubi_notifiers, nb); ++} ++EXPORT_SYMBOL_GPL(ubi_unregister_volume_notifier); +diff -uprN linux-2.6.28/drivers/mtd/ubi/Kconfig ubifs-v2.6.28/drivers/mtd/ubi/Kconfig +--- linux-2.6.28/drivers/mtd/ubi/Kconfig 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/drivers/mtd/ubi/Kconfig 2011-06-15 14:22:07.000000000 -0400 +@@ -1,11 +1,7 @@ + # drivers/mtd/ubi/Kconfig + +-menu "UBI - Unsorted block images" +- depends on MTD +- +-config MTD_UBI +- tristate "Enable UBI" +- depends on MTD ++menuconfig MTD_UBI ++ tristate "Enable UBI - Unsorted block images" + select CRC32 + help + UBI is a software layer above MTD layer which admits of LVM-like +@@ -14,11 +10,12 @@ config MTD_UBI + capabilities. Please, consult the MTD web site for more details + (www.linux-mtd.infradead.org). + ++if MTD_UBI ++ + config MTD_UBI_WL_THRESHOLD + int "UBI wear-leveling threshold" + default 4096 + range 2 65536 +- depends on MTD_UBI + help + This parameter defines the maximum difference between the highest + erase counter value and the lowest erase counter value of eraseblocks +@@ -29,14 +26,13 @@ config MTD_UBI_WL_THRESHOLD + The default value should be OK for SLC NAND flashes, NOR flashes and + other flashes which have eraseblock life-cycle 100000 or more. + However, in case of MLC NAND flashes which typically have eraseblock +- life-cycle less then 10000, the threshold should be lessened (e.g., ++ life-cycle less than 10000, the threshold should be lessened (e.g., + to 128 or 256, although it does not have to be power of 2). + + config MTD_UBI_BEB_RESERVE + int "Percentage of reserved eraseblocks for bad eraseblocks handling" + default 1 + range 0 25 +- depends on MTD_UBI + help + If the MTD device admits of bad eraseblocks (e.g. NAND flash), UBI + reserves some amount of physical eraseblocks to handle new bad +@@ -49,15 +45,21 @@ config MTD_UBI_BEB_RESERVE + reserved. Leave the default value if unsure. + + config MTD_UBI_GLUEBI +- bool "Emulate MTD devices" +- default n +- depends on MTD_UBI ++ tristate "MTD devices emulation driver (gluebi)" ++ help ++ This option enables gluebi - an additional driver which emulates MTD ++ devices on top of UBI volumes: for each UBI volumes an MTD device is ++ created, and all I/O to this MTD device is redirected to the UBI ++ volume. This is handy to make MTD-oriented software (like JFFS2) ++ work on top of UBI. Do not enable this unless you use legacy ++ software. ++ ++config MTD_UBI_DEBUG ++ bool "UBI debugging" ++ depends on SYSFS ++ select DEBUG_FS ++ select KALLSYMS + help +- This option enables MTD devices emulation on top of UBI volumes: for +- each UBI volumes an MTD device is created, and all I/O to this MTD +- device is redirected to the UBI volume. This is handy to make +- MTD-oriented software (like JFFS2) work on top of UBI. Do not enable +- this if no legacy software will be used. ++ This option enables UBI debugging. + +-source "drivers/mtd/ubi/Kconfig.debug" +-endmenu ++endif # MTD_UBI +diff -uprN linux-2.6.28/drivers/mtd/ubi/Kconfig.debug ubifs-v2.6.28/drivers/mtd/ubi/Kconfig.debug +--- linux-2.6.28/drivers/mtd/ubi/Kconfig.debug 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/drivers/mtd/ubi/Kconfig.debug 1969-12-31 19:00:00.000000000 -0500 +@@ -1,104 +0,0 @@ +-comment "UBI debugging options" +- depends on MTD_UBI +- +-config MTD_UBI_DEBUG +- bool "UBI debugging" +- depends on SYSFS +- depends on MTD_UBI +- select DEBUG_FS +- select KALLSYMS_ALL +- help +- This option enables UBI debugging. +- +-config MTD_UBI_DEBUG_MSG +- bool "UBI debugging messages" +- depends on MTD_UBI_DEBUG +- default n +- help +- This option enables UBI debugging messages. +- +-config MTD_UBI_DEBUG_PARANOID +- bool "Extra self-checks" +- default n +- depends on MTD_UBI_DEBUG +- help +- This option enables extra checks in UBI code. Note this slows UBI down +- significantly. +- +-config MTD_UBI_DEBUG_DISABLE_BGT +- bool "Do not enable the UBI background thread" +- depends on MTD_UBI_DEBUG +- default n +- help +- This option switches the background thread off by default. The thread +- may be also be enabled/disabled via UBI sysfs. +- +-config MTD_UBI_DEBUG_USERSPACE_IO +- bool "Direct user-space write/erase support" +- default n +- depends on MTD_UBI_DEBUG +- help +- By default, users cannot directly write and erase individual +- eraseblocks of dynamic volumes, and have to use update operation +- instead. This option enables this capability - it is very useful for +- debugging and testing. +- +-config MTD_UBI_DEBUG_EMULATE_BITFLIPS +- bool "Emulate flash bit-flips" +- depends on MTD_UBI_DEBUG +- default n +- help +- This option emulates bit-flips with probability 1/50, which in turn +- causes scrubbing. Useful for debugging and stressing UBI. +- +-config MTD_UBI_DEBUG_EMULATE_WRITE_FAILURES +- bool "Emulate flash write failures" +- depends on MTD_UBI_DEBUG +- default n +- help +- This option emulates write failures with probability 1/100. Useful for +- debugging and testing how UBI handlines errors. +- +-config MTD_UBI_DEBUG_EMULATE_ERASE_FAILURES +- bool "Emulate flash erase failures" +- depends on MTD_UBI_DEBUG +- default n +- help +- This option emulates erase failures with probability 1/100. Useful for +- debugging and testing how UBI handlines errors. +- +-menu "Additional UBI debugging messages" +- depends on MTD_UBI_DEBUG +- +-config MTD_UBI_DEBUG_MSG_BLD +- bool "Additional UBI initialization and build messages" +- default n +- depends on MTD_UBI_DEBUG +- help +- This option enables detailed UBI initialization and device build +- debugging messages. +- +-config MTD_UBI_DEBUG_MSG_EBA +- bool "Eraseblock association unit messages" +- default n +- depends on MTD_UBI_DEBUG +- help +- This option enables debugging messages from the UBI eraseblock +- association unit. +- +-config MTD_UBI_DEBUG_MSG_WL +- bool "Wear-leveling unit messages" +- default n +- depends on MTD_UBI_DEBUG +- help +- This option enables debugging messages from the UBI wear-leveling +- unit. +- +-config MTD_UBI_DEBUG_MSG_IO +- bool "Input/output unit messages" +- default n +- depends on MTD_UBI_DEBUG +- help +- This option enables debugging messages from the UBI input/output unit. +- +-endmenu # UBI debugging messages +diff -uprN linux-2.6.28/drivers/mtd/ubi/Makefile ubifs-v2.6.28/drivers/mtd/ubi/Makefile +--- linux-2.6.28/drivers/mtd/ubi/Makefile 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/drivers/mtd/ubi/Makefile 2011-06-15 14:22:07.000000000 -0400 +@@ -4,4 +4,4 @@ ubi-y += vtbl.o vmt.o upd.o build.o cdev + ubi-y += misc.o + + ubi-$(CONFIG_MTD_UBI_DEBUG) += debug.o +-ubi-$(CONFIG_MTD_UBI_GLUEBI) += gluebi.o ++obj-$(CONFIG_MTD_UBI_GLUEBI) += gluebi.o +diff -uprN linux-2.6.28/drivers/mtd/ubi/misc.c ubifs-v2.6.28/drivers/mtd/ubi/misc.c +--- linux-2.6.28/drivers/mtd/ubi/misc.c 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/drivers/mtd/ubi/misc.c 2011-06-15 14:22:07.000000000 -0400 +@@ -103,3 +103,22 @@ void ubi_calculate_reserved(struct ubi_d + if (ubi->beb_rsvd_level < MIN_RESEVED_PEBS) + ubi->beb_rsvd_level = MIN_RESEVED_PEBS; + } ++ ++/** ++ * ubi_check_pattern - check if buffer contains only a certain byte pattern. ++ * @buf: buffer to check ++ * @patt: the pattern to check ++ * @size: buffer size in bytes ++ * ++ * This function returns %1 in there are only @patt bytes in @buf, and %0 if ++ * something else was also found. ++ */ ++int ubi_check_pattern(const void *buf, uint8_t patt, int size) ++{ ++ int i; ++ ++ for (i = 0; i < size; i++) ++ if (((const uint8_t *)buf)[i] != patt) ++ return 0; ++ return 1; ++} +diff -uprN linux-2.6.28/drivers/mtd/ubi/scan.c ubifs-v2.6.28/drivers/mtd/ubi/scan.c +--- linux-2.6.28/drivers/mtd/ubi/scan.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/drivers/mtd/ubi/scan.c 2011-06-15 14:22:07.000000000 -0400 +@@ -29,7 +29,7 @@ + * objects which are kept in volume RB-tree with root at the @volumes field. + * The RB-tree is indexed by the volume ID. + * +- * Found logical eraseblocks are represented by &struct ubi_scan_leb objects. ++ * Scanned logical eraseblocks are represented by &struct ubi_scan_leb objects. + * These objects are kept in per-volume RB-trees with the root at the + * corresponding &struct ubi_scan_volume object. To put it differently, we keep + * an RB-tree of per-volume objects and each of these objects is the root of +@@ -38,14 +38,56 @@ + * Corrupted physical eraseblocks are put to the @corr list, free physical + * eraseblocks are put to the @free list and the physical eraseblock to be + * erased are put to the @erase list. ++ * ++ * About corruptions ++ * ~~~~~~~~~~~~~~~~~ ++ * ++ * UBI protects EC and VID headers with CRC-32 checksums, so it can detect ++ * whether the headers are corrupted or not. Sometimes UBI also protects the ++ * data with CRC-32, e.g., when it executes the atomic LEB change operation, or ++ * when it moves the contents of a PEB for wear-leveling purposes. ++ * ++ * UBI tries to distinguish between 2 types of corruptions. ++ * ++ * 1. Corruptions caused by power cuts. These are expected corruptions and UBI ++ * tries to handle them gracefully, without printing too many warnings and ++ * error messages. The idea is that we do not lose important data in these case ++ * - we may lose only the data which was being written to the media just before ++ * the power cut happened, and the upper layers (e.g., UBIFS) are supposed to ++ * handle such data losses (e.g., by using the FS journal). ++ * ++ * When UBI detects a corruption (CRC-32 mismatch) in a PEB, and it looks like ++ * the reason is a power cut, UBI puts this PEB to the @erase list, and all ++ * PEBs in the @erase list are scheduled for erasure later. ++ * ++ * 2. Unexpected corruptions which are not caused by power cuts. During ++ * scanning, such PEBs are put to the @corr list and UBI preserves them. ++ * Obviously, this lessens the amount of available PEBs, and if at some point ++ * UBI runs out of free PEBs, it switches to R/O mode. UBI also loudly informs ++ * about such PEBs every time the MTD device is attached. ++ * ++ * However, it is difficult to reliably distinguish between these types of ++ * corruptions and UBI's strategy is as follows. UBI assumes corruption type 2 ++ * if the VID header is corrupted and the data area does not contain all 0xFFs, ++ * and there were no bit-flips or integrity errors while reading the data area. ++ * Otherwise UBI assumes corruption type 1. So the decision criteria are as ++ * follows. ++ * o If the data area contains only 0xFFs, there is no data, and it is safe ++ * to just erase this PEB - this is corruption type 1. ++ * o If the data area has bit-flips or data integrity errors (ECC errors on ++ * NAND), it is probably a PEB which was being erased when power cut ++ * happened, so this is corruption type 1. However, this is just a guess, ++ * which might be wrong. ++ * o Otherwise this it corruption type 2. + */ + + #include + #include +-#include ++#include ++#include + #include "ubi.h" + +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID ++#ifdef CONFIG_MTD_UBI_DEBUG + static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si); + #else + #define paranoid_check_si(ubi, si) 0 +@@ -60,35 +102,69 @@ static struct ubi_vid_hdr *vidh; + * @si: scanning information + * @pnum: physical eraseblock number to add + * @ec: erase counter of the physical eraseblock ++ * @to_head: if not zero, add to the head of the list + * @list: the list to add to + * +- * This function adds physical eraseblock @pnum to free, erase, corrupted or +- * alien lists. Returns zero in case of success and a negative error code in +- * case of failure. ++ * This function adds physical eraseblock @pnum to free, erase, or alien lists. ++ * If @to_head is not zero, PEB will be added to the head of the list, which ++ * basically means it will be processed first later. E.g., we add corrupted ++ * PEBs (corrupted due to power cuts) to the head of the erase list to make ++ * sure we erase them first and get rid of corruptions ASAP. This function ++ * returns zero in case of success and a negative error code in case of ++ * failure. + */ +-static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, ++static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, int to_head, + struct list_head *list) + { + struct ubi_scan_leb *seb; + +- if (list == &si->free) ++ if (list == &si->free) { + dbg_bld("add to free: PEB %d, EC %d", pnum, ec); +- else if (list == &si->erase) ++ } else if (list == &si->erase) { + dbg_bld("add to erase: PEB %d, EC %d", pnum, ec); +- else if (list == &si->corr) +- dbg_bld("add to corrupted: PEB %d, EC %d", pnum, ec); +- else if (list == &si->alien) ++ } else if (list == &si->alien) { + dbg_bld("add to alien: PEB %d, EC %d", pnum, ec); +- else ++ si->alien_peb_count += 1; ++ } else + BUG(); + +- seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL); ++ seb = kmem_cache_alloc(si->scan_leb_slab, GFP_KERNEL); ++ if (!seb) ++ return -ENOMEM; ++ ++ seb->pnum = pnum; ++ seb->ec = ec; ++ if (to_head) ++ list_add(&seb->u.list, list); ++ else ++ list_add_tail(&seb->u.list, list); ++ return 0; ++} ++ ++/** ++ * add_corrupted - add a corrupted physical eraseblock. ++ * @si: scanning information ++ * @pnum: physical eraseblock number to add ++ * @ec: erase counter of the physical eraseblock ++ * ++ * This function adds corrupted physical eraseblock @pnum to the 'corr' list. ++ * The corruption was presumably not caused by a power cut. Returns zero in ++ * case of success and a negative error code in case of failure. ++ */ ++static int add_corrupted(struct ubi_scan_info *si, int pnum, int ec) ++{ ++ struct ubi_scan_leb *seb; ++ ++ dbg_bld("add to corrupted: PEB %d, EC %d", pnum, ec); ++ ++ seb = kmem_cache_alloc(si->scan_leb_slab, GFP_KERNEL); + if (!seb) + return -ENOMEM; + ++ si->corr_peb_count += 1; + seb->pnum = pnum; + seb->ec = ec; +- list_add_tail(&seb->u.list, list); ++ list_add(&seb->u.list, &si->corr); + return 0; + } + +@@ -229,7 +305,7 @@ static struct ubi_scan_volume *add_volum + * case of success this function returns a positive value, in case of failure, a + * negative error code is returned. The success return codes use the following + * bits: +- * o bit 0 is cleared: the first PEB (described by @seb) is newer then the ++ * o bit 0 is cleared: the first PEB (described by @seb) is newer than the + * second PEB (described by @pnum and @vid_hdr); + * o bit 0 is set: the second PEB is newer; + * o bit 1 is cleared: no bit-flips were detected in the newer LEB; +@@ -252,8 +328,8 @@ static int compare_lebs(struct ubi_devic + * created before sequence numbers support has been added. At + * that times we used 32-bit LEB versions stored in logical + * eraseblocks. That was before UBI got into mainline. We do not +- * support these images anymore. Well, those images will work +- * still work, but only if no unclean reboots happened. ++ * support these images anymore. Well, those images still work, ++ * but only if no unclean reboots happened. + */ + ubi_err("unsupported on-flash UBI format\n"); + return -EINVAL; +@@ -279,19 +355,25 @@ static int compare_lebs(struct ubi_devic + return 1; + } + } else { +- pnum = seb->pnum; ++ if (!seb->copy_flag) { ++ /* It is not a copy, so it is newer */ ++ dbg_bld("first PEB %d is newer, copy_flag is unset", ++ pnum); ++ return bitflips << 1; ++ } + + vh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); + if (!vh) + return -ENOMEM; + ++ pnum = seb->pnum; + err = ubi_io_read_vid_hdr(ubi, pnum, vh, 0); + if (err) { + if (err == UBI_IO_BITFLIPS) + bitflips = 1; + else { + dbg_err("VID of PEB %d header is bad, but it " +- "was OK earlier", pnum); ++ "was OK earlier, err %d", pnum, err); + if (err > 0) + err = -EIO; + +@@ -299,14 +381,6 @@ static int compare_lebs(struct ubi_devic + } + } + +- if (!vh->copy_flag) { +- /* It is not a copy, so it is newer */ +- dbg_bld("first PEB %d is newer, copy_flag is unset", +- pnum); +- err = bitflips << 1; +- goto out_free_vidh; +- } +- + vid_hdr = vh; + } + +@@ -450,25 +524,22 @@ int ubi_scan_add_used(struct ubi_device + + if (cmp_res & 1) { + /* +- * This logical eraseblock is newer then the one ++ * This logical eraseblock is newer than the one + * found earlier. + */ + err = validate_vid_hdr(vid_hdr, sv, pnum); + if (err) + return err; + +- if (cmp_res & 4) +- err = add_to_list(si, seb->pnum, seb->ec, +- &si->corr); +- else +- err = add_to_list(si, seb->pnum, seb->ec, +- &si->erase); ++ err = add_to_list(si, seb->pnum, seb->ec, cmp_res & 4, ++ &si->erase); + if (err) + return err; + + seb->ec = ec; + seb->pnum = pnum; + seb->scrub = ((cmp_res & 2) || bitflips); ++ seb->copy_flag = vid_hdr->copy_flag; + seb->sqnum = sqnum; + + if (sv->highest_lnum == lnum) +@@ -478,13 +549,11 @@ int ubi_scan_add_used(struct ubi_device + return 0; + } else { + /* +- * This logical eraseblock is older then the one found ++ * This logical eraseblock is older than the one found + * previously. + */ +- if (cmp_res & 4) +- return add_to_list(si, pnum, ec, &si->corr); +- else +- return add_to_list(si, pnum, ec, &si->erase); ++ return add_to_list(si, pnum, ec, cmp_res & 4, ++ &si->erase); + } + } + +@@ -497,15 +566,16 @@ int ubi_scan_add_used(struct ubi_device + if (err) + return err; + +- seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL); ++ seb = kmem_cache_alloc(si->scan_leb_slab, GFP_KERNEL); + if (!seb) + return -ENOMEM; + + seb->ec = ec; + seb->pnum = pnum; + seb->lnum = lnum; +- seb->sqnum = sqnum; + seb->scrub = bitflips; ++ seb->copy_flag = vid_hdr->copy_flag; ++ seb->sqnum = sqnum; + + if (sv->highest_lnum <= lnum) { + sv->highest_lnum = lnum; +@@ -661,8 +731,8 @@ out_free: + struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi, + struct ubi_scan_info *si) + { +- int err = 0, i; +- struct ubi_scan_leb *seb; ++ int err = 0; ++ struct ubi_scan_leb *seb, *tmp_seb; + + if (!list_empty(&si->free)) { + seb = list_entry(si->free.next, struct ubi_scan_leb, u.list); +@@ -671,38 +741,88 @@ struct ubi_scan_leb *ubi_scan_get_free_p + return seb; + } + +- for (i = 0; i < 2; i++) { +- struct list_head *head; +- struct ubi_scan_leb *tmp_seb; ++ /* ++ * We try to erase the first physical eraseblock from the erase list ++ * and pick it if we succeed, or try to erase the next one if not. And ++ * so forth. We don't want to take care about bad eraseblocks here - ++ * they'll be handled later. ++ */ ++ list_for_each_entry_safe(seb, tmp_seb, &si->erase, u.list) { ++ if (seb->ec == UBI_SCAN_UNKNOWN_EC) ++ seb->ec = si->mean_ec; + +- if (i == 0) +- head = &si->erase; +- else +- head = &si->corr; ++ err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1); ++ if (err) ++ continue; + ++ seb->ec += 1; ++ list_del(&seb->u.list); ++ dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec); ++ return seb; ++ } ++ ++ ubi_err("no free eraseblocks"); ++ return ERR_PTR(-ENOSPC); ++} ++ ++/** ++ * check_corruption - check the data area of PEB. ++ * @ubi: UBI device description object ++ * @vid_hrd: the (corrupted) VID header of this PEB ++ * @pnum: the physical eraseblock number to check ++ * ++ * This is a helper function which is used to distinguish between VID header ++ * corruptions caused by power cuts and other reasons. If the PEB contains only ++ * 0xFF bytes in the data area, the VID header is most probably corrupted ++ * because of a power cut (%0 is returned in this case). Otherwise, it was ++ * probably corrupted for some other reasons (%1 is returned in this case). A ++ * negative error code is returned if a read error occurred. ++ * ++ * If the corruption reason was a power cut, UBI can safely erase this PEB. ++ * Otherwise, it should preserve it to avoid possibly destroying important ++ * information. ++ */ ++static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr, ++ int pnum) ++{ ++ int err; ++ ++ mutex_lock(&ubi->buf_mutex); ++ memset(ubi->peb_buf1, 0x00, ubi->leb_size); ++ ++ err = ubi_io_read(ubi, ubi->peb_buf1, pnum, ubi->leb_start, ++ ubi->leb_size); ++ if (err == UBI_IO_BITFLIPS || err == -EBADMSG) { + /* +- * We try to erase the first physical eraseblock from the @head +- * list and pick it if we succeed, or try to erase the +- * next one if not. And so forth. We don't want to take care +- * about bad eraseblocks here - they'll be handled later. ++ * Bit-flips or integrity errors while reading the data area. ++ * It is difficult to say for sure what type of corruption is ++ * this, but presumably a power cut happened while this PEB was ++ * erased, so it became unstable and corrupted, and should be ++ * erased. + */ +- list_for_each_entry_safe(seb, tmp_seb, head, u.list) { +- if (seb->ec == UBI_SCAN_UNKNOWN_EC) +- seb->ec = si->mean_ec; ++ err = 0; ++ goto out_unlock; ++ } + +- err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1); +- if (err) +- continue; ++ if (err) ++ goto out_unlock; + +- seb->ec += 1; +- list_del(&seb->u.list); +- dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec); +- return seb; +- } +- } ++ if (ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->leb_size)) ++ goto out_unlock; + +- ubi_err("no eraseblocks found"); +- return ERR_PTR(-ENOSPC); ++ ubi_err("PEB %d contains corrupted VID header, and the data does not " ++ "contain all 0xFF, this may be a non-UBI PEB or a severe VID " ++ "header corruption which requires manual inspection", pnum); ++ ubi_dbg_dump_vid_hdr(vid_hdr); ++ dbg_msg("hexdump of PEB %d offset %d, length %d", ++ pnum, ubi->leb_start, ubi->leb_size); ++ ubi_dbg_print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, ++ ubi->peb_buf1, ubi->leb_size, 1); ++ err = 1; ++ ++out_unlock: ++ mutex_unlock(&ubi->buf_mutex); ++ return err; + } + + /** +@@ -718,7 +838,7 @@ static int process_eb(struct ubi_device + int pnum) + { + long long uninitialized_var(ec); +- int err, bitflips = 0, vol_id, ec_corr = 0; ++ int err, bitflips = 0, vol_id, ec_err = 0; + + dbg_bld("scan PEB %d", pnum); + +@@ -739,24 +859,39 @@ static int process_eb(struct ubi_device + err = ubi_io_read_ec_hdr(ubi, pnum, ech, 0); + if (err < 0) + return err; +- else if (err == UBI_IO_BITFLIPS) ++ switch (err) { ++ case 0: ++ break; ++ case UBI_IO_BITFLIPS: + bitflips = 1; +- else if (err == UBI_IO_PEB_EMPTY) +- return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, &si->erase); +- else if (err == UBI_IO_BAD_EC_HDR) { ++ break; ++ case UBI_IO_FF: ++ si->empty_peb_count += 1; ++ return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, 0, ++ &si->erase); ++ case UBI_IO_FF_BITFLIPS: ++ si->empty_peb_count += 1; ++ return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, 1, ++ &si->erase); ++ case UBI_IO_BAD_HDR_EBADMSG: ++ case UBI_IO_BAD_HDR: + /* + * We have to also look at the VID header, possibly it is not + * corrupted. Set %bitflips flag in order to make this PEB be + * moved and EC be re-created. + */ +- ec_corr = 1; ++ ec_err = err; + ec = UBI_SCAN_UNKNOWN_EC; + bitflips = 1; ++ break; ++ default: ++ ubi_err("'ubi_io_read_ec_hdr()' returned unknown code %d", err); ++ return -EINVAL; + } + +- si->is_empty = 0; ++ if (!ec_err) { ++ int image_seq; + +- if (!ec_corr) { + /* Make sure UBI version is OK */ + if (ech->version != UBI_VERSION) { + ubi_err("this UBI version is %d, image version is %d", +@@ -778,6 +913,28 @@ static int process_eb(struct ubi_device + ubi_dbg_dump_ec_hdr(ech); + return -EINVAL; + } ++ ++ /* ++ * Make sure that all PEBs have the same image sequence number. ++ * This allows us to detect situations when users flash UBI ++ * images incorrectly, so that the flash has the new UBI image ++ * and leftovers from the old one. This feature was added ++ * relatively recently, and the sequence number was always ++ * zero, because old UBI implementations always set it to zero. ++ * For this reasons, we do not panic if some PEBs have zero ++ * sequence number, while other PEBs have non-zero sequence ++ * number. ++ */ ++ image_seq = be32_to_cpu(ech->image_seq); ++ if (!ubi->image_seq && image_seq) ++ ubi->image_seq = image_seq; ++ if (ubi->image_seq && image_seq && ++ ubi->image_seq != image_seq) { ++ ubi_err("bad image sequence number %d in PEB %d, " ++ "expected %d", image_seq, pnum, ubi->image_seq); ++ ubi_dbg_dump_ec_hdr(ech); ++ return -EINVAL; ++ } + } + + /* OK, we've done with the EC header, let's look at the VID header */ +@@ -785,21 +942,71 @@ static int process_eb(struct ubi_device + err = ubi_io_read_vid_hdr(ubi, pnum, vidh, 0); + if (err < 0) + return err; +- else if (err == UBI_IO_BITFLIPS) ++ switch (err) { ++ case 0: ++ break; ++ case UBI_IO_BITFLIPS: + bitflips = 1; +- else if (err == UBI_IO_BAD_VID_HDR || +- (err == UBI_IO_PEB_FREE && ec_corr)) { +- /* VID header is corrupted */ +- err = add_to_list(si, pnum, ec, &si->corr); ++ break; ++ case UBI_IO_BAD_HDR_EBADMSG: ++ if (ec_err == UBI_IO_BAD_HDR_EBADMSG) ++ /* ++ * Both EC and VID headers are corrupted and were read ++ * with data integrity error, probably this is a bad ++ * PEB, bit it is not marked as bad yet. This may also ++ * be a result of power cut during erasure. ++ */ ++ si->maybe_bad_peb_count += 1; ++ case UBI_IO_BAD_HDR: ++ if (ec_err) ++ /* ++ * Both headers are corrupted. There is a possibility ++ * that this a valid UBI PEB which has corresponding ++ * LEB, but the headers are corrupted. However, it is ++ * impossible to distinguish it from a PEB which just ++ * contains garbage because of a power cut during erase ++ * operation. So we just schedule this PEB for erasure. ++ * ++ * Besides, in case of NOR flash, we deliberatly ++ * corrupt both headers because NOR flash erasure is ++ * slow and can start from the end. ++ */ ++ err = 0; ++ else ++ /* ++ * The EC was OK, but the VID header is corrupted. We ++ * have to check what is in the data area. ++ */ ++ err = check_corruption(ubi, vidh, pnum); ++ ++ if (err < 0) ++ return err; ++ else if (!err) ++ /* This corruption is caused by a power cut */ ++ err = add_to_list(si, pnum, ec, 1, &si->erase); ++ else ++ /* This is an unexpected corruption */ ++ err = add_corrupted(si, pnum, ec); + if (err) + return err; + goto adjust_mean_ec; +- } else if (err == UBI_IO_PEB_FREE) { +- /* No VID header - the physical eraseblock is free */ +- err = add_to_list(si, pnum, ec, &si->free); ++ case UBI_IO_FF_BITFLIPS: ++ err = add_to_list(si, pnum, ec, 1, &si->erase); + if (err) + return err; + goto adjust_mean_ec; ++ case UBI_IO_FF: ++ if (ec_err) ++ err = add_to_list(si, pnum, ec, 1, &si->erase); ++ else ++ err = add_to_list(si, pnum, ec, 0, &si->free); ++ if (err) ++ return err; ++ goto adjust_mean_ec; ++ default: ++ ubi_err("'ubi_io_read_vid_hdr()' returned unknown code %d", ++ err); ++ return -EINVAL; + } + + vol_id = be32_to_cpu(vidh->vol_id); +@@ -810,11 +1017,11 @@ static int process_eb(struct ubi_device + switch (vidh->compat) { + case UBI_COMPAT_DELETE: + ubi_msg("\"delete\" compatible internal volume %d:%d" +- " found, remove it", vol_id, lnum); +- err = add_to_list(si, pnum, ec, &si->corr); ++ " found, will remove it", vol_id, lnum); ++ err = add_to_list(si, pnum, ec, 1, &si->erase); + if (err) + return err; +- break; ++ return 0; + + case UBI_COMPAT_RO: + ubi_msg("read-only compatible internal volume %d:%d" +@@ -826,10 +1033,9 @@ static int process_eb(struct ubi_device + case UBI_COMPAT_PRESERVE: + ubi_msg("\"preserve\" compatible internal volume %d:%d" + " found", vol_id, lnum); +- err = add_to_list(si, pnum, ec, &si->alien); ++ err = add_to_list(si, pnum, ec, 0, &si->alien); + if (err) + return err; +- si->alien_peb_count += 1; + return 0; + + case UBI_COMPAT_REJECT: +@@ -839,13 +1045,15 @@ static int process_eb(struct ubi_device + } + } + +- /* Both UBI headers seem to be fine */ ++ if (ec_err) ++ ubi_warn("valid VID header but corrupted EC header at PEB %d", ++ pnum); + err = ubi_scan_add_used(ubi, si, pnum, ec, vidh, bitflips); + if (err) + return err; + + adjust_mean_ec: +- if (!ec_corr) { ++ if (!ec_err) { + si->ec_sum += ec; + si->ec_count += 1; + if (ec > si->max_ec) +@@ -858,6 +1066,80 @@ adjust_mean_ec: + } + + /** ++ * check_what_we_have - check what PEB were found by scanning. ++ * @ubi: UBI device description object ++ * @si: scanning information ++ * ++ * This is a helper function which takes a look what PEBs were found by ++ * scanning, and decides whether the flash is empty and should be formatted and ++ * whether there are too many corrupted PEBs and we should not attach this ++ * MTD device. Returns zero if we should proceed with attaching the MTD device, ++ * and %-EINVAL if we should not. ++ */ ++static int check_what_we_have(struct ubi_device *ubi, struct ubi_scan_info *si) ++{ ++ struct ubi_scan_leb *seb; ++ int max_corr, peb_count; ++ ++ peb_count = ubi->peb_count - si->bad_peb_count - si->alien_peb_count; ++ max_corr = peb_count / 20 ?: 8; ++ ++ /* ++ * Few corrupted PEBs is not a problem and may be just a result of ++ * unclean reboots. However, many of them may indicate some problems ++ * with the flash HW or driver. ++ */ ++ if (si->corr_peb_count) { ++ ubi_err("%d PEBs are corrupted and preserved", ++ si->corr_peb_count); ++ printk(KERN_ERR "Corrupted PEBs are:"); ++ list_for_each_entry(seb, &si->corr, u.list) ++ printk(KERN_CONT " %d", seb->pnum); ++ printk(KERN_CONT "\n"); ++ ++ /* ++ * If too many PEBs are corrupted, we refuse attaching, ++ * otherwise, only print a warning. ++ */ ++ if (si->corr_peb_count >= max_corr) { ++ ubi_err("too many corrupted PEBs, refusing"); ++ return -EINVAL; ++ } ++ } ++ ++ if (si->empty_peb_count + si->maybe_bad_peb_count == peb_count) { ++ /* ++ * All PEBs are empty, or almost all - a couple PEBs look like ++ * they may be bad PEBs which were not marked as bad yet. ++ * ++ * This piece of code basically tries to distinguish between ++ * the following situations: ++ * ++ * 1. Flash is empty, but there are few bad PEBs, which are not ++ * marked as bad so far, and which were read with error. We ++ * want to go ahead and format this flash. While formatting, ++ * the faulty PEBs will probably be marked as bad. ++ * ++ * 2. Flash contains non-UBI data and we do not want to format ++ * it and destroy possibly important information. ++ */ ++ if (si->maybe_bad_peb_count <= 2) { ++ si->is_empty = 1; ++ ubi_msg("empty MTD device detected"); ++ get_random_bytes(&ubi->image_seq, ++ sizeof(ubi->image_seq)); ++ } else { ++ ubi_err("MTD device is not UBI-formatted and possibly " ++ "contains non-UBI data - refusing it"); ++ return -EINVAL; ++ } ++ ++ } ++ ++ return 0; ++} ++ ++/** + * ubi_scan - scan an MTD device. + * @ubi: UBI device description object + * +@@ -881,12 +1163,17 @@ struct ubi_scan_info *ubi_scan(struct ub + INIT_LIST_HEAD(&si->erase); + INIT_LIST_HEAD(&si->alien); + si->volumes = RB_ROOT; +- si->is_empty = 1; + + err = -ENOMEM; ++ si->scan_leb_slab = kmem_cache_create("ubi_scan_leb_slab", ++ sizeof(struct ubi_scan_leb), ++ 0, 0, NULL); ++ if (!si->scan_leb_slab) ++ goto out_si; ++ + ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL); + if (!ech) +- goto out_si; ++ goto out_slab; + + vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL); + if (!vidh) +@@ -904,15 +1191,12 @@ struct ubi_scan_info *ubi_scan(struct ub + dbg_msg("scanning is finished"); + + /* Calculate mean erase counter */ +- if (si->ec_count) { +- do_div(si->ec_sum, si->ec_count); +- si->mean_ec = si->ec_sum; +- } ++ if (si->ec_count) ++ si->mean_ec = div_u64(si->ec_sum, si->ec_count); + +- if (si->is_empty) +- ubi_msg("empty MTD device detected"); +- +- ubi->image_seq_set = 1; ++ err = check_what_we_have(ubi, si); ++ if (err) ++ goto out_vidh; + + /* + * In case of unknown erase counter we use the mean erase counter +@@ -938,11 +1222,8 @@ struct ubi_scan_info *ubi_scan(struct ub + seb->ec = si->mean_ec; + + err = paranoid_check_si(ubi, si); +- if (err) { +- if (err > 0) +- err = -EINVAL; ++ if (err) + goto out_vidh; +- } + + ubi_free_vid_hdr(ubi, vidh); + kfree(ech); +@@ -953,6 +1234,8 @@ out_vidh: + ubi_free_vid_hdr(ubi, vidh); + out_ech: + kfree(ech); ++out_slab: ++ kmem_cache_destroy(si->scan_leb_slab); + out_si: + ubi_scan_destroy_si(si); + return ERR_PTR(err); +@@ -961,11 +1244,12 @@ out_si: + /** + * destroy_sv - free the scanning volume information + * @sv: scanning volume information ++ * @si: scanning information + * + * This function destroys the volume RB-tree (@sv->root) and the scanning + * volume information. + */ +-static void destroy_sv(struct ubi_scan_volume *sv) ++static void destroy_sv(struct ubi_scan_info *si, struct ubi_scan_volume *sv) + { + struct ubi_scan_leb *seb; + struct rb_node *this = sv->root.rb_node; +@@ -985,7 +1269,7 @@ static void destroy_sv(struct ubi_scan_v + this->rb_right = NULL; + } + +- kfree(seb); ++ kmem_cache_free(si->scan_leb_slab, seb); + } + } + kfree(sv); +@@ -1003,19 +1287,19 @@ void ubi_scan_destroy_si(struct ubi_scan + + list_for_each_entry_safe(seb, seb_tmp, &si->alien, u.list) { + list_del(&seb->u.list); +- kfree(seb); ++ kmem_cache_free(si->scan_leb_slab, seb); + } + list_for_each_entry_safe(seb, seb_tmp, &si->erase, u.list) { + list_del(&seb->u.list); +- kfree(seb); ++ kmem_cache_free(si->scan_leb_slab, seb); + } + list_for_each_entry_safe(seb, seb_tmp, &si->corr, u.list) { + list_del(&seb->u.list); +- kfree(seb); ++ kmem_cache_free(si->scan_leb_slab, seb); + } + list_for_each_entry_safe(seb, seb_tmp, &si->free, u.list) { + list_del(&seb->u.list); +- kfree(seb); ++ kmem_cache_free(si->scan_leb_slab, seb); + } + + /* Destroy the volume RB-tree */ +@@ -1036,22 +1320,23 @@ void ubi_scan_destroy_si(struct ubi_scan + rb->rb_right = NULL; + } + +- destroy_sv(sv); ++ destroy_sv(si, sv); + } + } + ++ kmem_cache_destroy(si->scan_leb_slab); + kfree(si); + } + +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID ++#ifdef CONFIG_MTD_UBI_DEBUG + + /** + * paranoid_check_si - check the scanning information. + * @ubi: UBI device description object + * @si: scanning information + * +- * This function returns zero if the scanning information is all right, %1 if +- * not and a negative error code if an error occurred. ++ * This function returns zero if the scanning information is all right, and a ++ * negative error code if not or if an error occurred. + */ + static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si) + { +@@ -1061,6 +1346,9 @@ static int paranoid_check_si(struct ubi_ + struct ubi_scan_leb *seb, *last_seb; + uint8_t *buf; + ++ if (!(ubi_chk_flags & UBI_CHK_GEN)) ++ return 0; ++ + /* + * At first, check that scanning information is OK. + */ +@@ -1310,7 +1598,7 @@ bad_vid_hdr: + + out: + ubi_dbg_dump_stack(); +- return 1; ++ return -EINVAL; + } + +-#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ ++#endif /* CONFIG_MTD_UBI_DEBUG */ +diff -uprN linux-2.6.28/drivers/mtd/ubi/scan.h ubifs-v2.6.28/drivers/mtd/ubi/scan.h +--- linux-2.6.28/drivers/mtd/ubi/scan.h 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/drivers/mtd/ubi/scan.h 2011-06-15 14:22:07.000000000 -0400 +@@ -30,6 +30,7 @@ + * @pnum: physical eraseblock number + * @lnum: logical eraseblock number + * @scrub: if this physical eraseblock needs scrubbing ++ * @copy_flag: this LEB is a copy (@copy_flag is set in VID header of this LEB) + * @sqnum: sequence number + * @u: unions RB-tree or @list links + * @u.rb: link in the per-volume RB-tree of &struct ubi_scan_leb objects +@@ -42,7 +43,8 @@ struct ubi_scan_leb { + int ec; + int pnum; + int lnum; +- int scrub; ++ unsigned int scrub:1; ++ unsigned int copy_flag:1; + unsigned long long sqnum; + union { + struct rb_node rb; +@@ -91,10 +93,15 @@ struct ubi_scan_volume { + * @erase: list of physical eraseblocks which have to be erased + * @alien: list of physical eraseblocks which should not be used by UBI (e.g., + * those belonging to "preserve"-compatible internal volumes) ++ * @corr_peb_count: count of PEBs in the @corr list ++ * @empty_peb_count: count of PEBs which are presumably empty (contain only ++ * 0xFF bytes) ++ * @alien_peb_count: count of PEBs in the @alien list + * @bad_peb_count: count of bad physical eraseblocks ++ * @maybe_bad_peb_count: count of bad physical eraseblocks which are not marked ++ * as bad yet, but which look like bad + * @vols_found: number of volumes found during scanning + * @highest_vol_id: highest volume ID +- * @alien_peb_count: count of physical eraseblocks in the @alien list + * @is_empty: flag indicating whether the MTD device is empty or not + * @min_ec: lowest erase counter value + * @max_ec: highest erase counter value +@@ -102,6 +109,7 @@ struct ubi_scan_volume { + * @mean_ec: mean erase counter value + * @ec_sum: a temporary variable used when calculating @mean_ec + * @ec_count: a temporary variable used when calculating @mean_ec ++ * @scan_leb_slab: slab cache for &struct ubi_scan_leb objects + * + * This data structure contains the result of scanning and may be used by other + * UBI sub-systems to build final UBI data structures, further error-recovery +@@ -113,10 +121,13 @@ struct ubi_scan_info { + struct list_head free; + struct list_head erase; + struct list_head alien; ++ int corr_peb_count; ++ int empty_peb_count; ++ int alien_peb_count; + int bad_peb_count; ++ int maybe_bad_peb_count; + int vols_found; + int highest_vol_id; +- int alien_peb_count; + int is_empty; + int min_ec; + int max_ec; +@@ -124,6 +135,7 @@ struct ubi_scan_info { + int mean_ec; + uint64_t ec_sum; + int ec_count; ++ struct kmem_cache *scan_leb_slab; + }; + + struct ubi_device; +@@ -133,7 +145,7 @@ struct ubi_vid_hdr; + * ubi_scan_move_to_list - move a PEB from the volume tree to a list. + * + * @sv: volume scanning information +- * @seb: scanning eraseblock infprmation ++ * @seb: scanning eraseblock information + * @list: the list to move to + */ + static inline void ubi_scan_move_to_list(struct ubi_scan_volume *sv, +diff -uprN linux-2.6.28/drivers/mtd/ubi/ubi.h ubifs-v2.6.28/drivers/mtd/ubi/ubi.h +--- linux-2.6.28/drivers/mtd/ubi/ubi.h 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/drivers/mtd/ubi/ubi.h 2011-06-15 14:22:07.000000000 -0400 +@@ -36,8 +36,10 @@ + #include + #include + #include ++#include + #include + #include ++#include + + #include "ubi-media.h" + #include "scan.h" +@@ -83,21 +85,26 @@ + /* + * Error codes returned by the I/O sub-system. + * +- * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only +- * %0xFF bytes +- * UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a +- * valid erase counter header, and the rest are %0xFF bytes +- * UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC) +- * UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or +- * CRC) ++ * UBI_IO_FF: the read region of flash contains only 0xFFs ++ * UBI_IO_FF_BITFLIPS: the same as %UBI_IO_FF, but also also there was a data ++ * integrity error reported by the MTD driver ++ * (uncorrectable ECC error in case of NAND) ++ * UBI_IO_BAD_HDR: the EC or VID header is corrupted (bad magic or CRC) ++ * UBI_IO_BAD_HDR_EBADMSG: the same as %UBI_IO_BAD_HDR, but also there was a ++ * data integrity error reported by the MTD driver ++ * (uncorrectable ECC error in case of NAND) + * UBI_IO_BITFLIPS: bit-flips were detected and corrected ++ * ++ * Note, it is probably better to have bit-flip and ebadmsg as flags which can ++ * be or'ed with other error code. But this is a big change because there are ++ * may callers, so it does not worth the risk of introducing a bug + */ + enum { +- UBI_IO_PEB_EMPTY = 1, +- UBI_IO_PEB_FREE, +- UBI_IO_BAD_EC_HDR, +- UBI_IO_BAD_VID_HDR, +- UBI_IO_BITFLIPS ++ UBI_IO_FF = 1, ++ UBI_IO_FF_BITFLIPS, ++ UBI_IO_BAD_HDR, ++ UBI_IO_BAD_HDR_EBADMSG, ++ UBI_IO_BITFLIPS, + }; + + /* +@@ -228,10 +235,7 @@ struct ubi_volume_desc; + * @upd_marker: %1 if the update marker is set for this volume + * @updating: %1 if the volume is being updated + * @changing_leb: %1 if the atomic LEB change ioctl command is in progress +- * +- * @gluebi_desc: gluebi UBI volume descriptor +- * @gluebi_refcount: reference count of the gluebi MTD device +- * @gluebi_mtd: MTD device description object of the gluebi MTD device ++ * @direct_writes: %1 if direct writes are enabled for this volume + * + * The @corrupted field indicates that the volume's contents is corrupted. + * Since UBI protects only static volumes, this field is not relevant to +@@ -275,17 +279,7 @@ struct ubi_volume { + unsigned int upd_marker:1; + unsigned int updating:1; + unsigned int changing_leb:1; +- +-#ifdef CONFIG_MTD_UBI_GLUEBI +- /* +- * Gluebi-related stuff may be compiled out. +- * Note: this should not be built into UBI but should be a separate +- * ubimtd driver which works on top of UBI and emulates MTD devices. +- */ +- struct ubi_volume_desc *gluebi_desc; +- int gluebi_refcount; +- struct mtd_info gluebi_mtd; +-#endif ++ unsigned int direct_writes:1; + }; + + /** +@@ -314,7 +308,6 @@ struct ubi_wl_entry; + * @vol->ref_count, @vol->mapping and @vol->eba_tbl. + * @ref_count: count of references on the UBI device + * @image_seq: image sequence number recorded on EC headers +- * @image_seq_set: indicates @image_seq is known + * + * @rsvd_pebs: count of reserved physical eraseblocks + * @avail_pebs: count of available physical eraseblocks +@@ -327,8 +320,9 @@ struct ubi_wl_entry; + * @vtbl_slots: how many slots are available in the volume table + * @vtbl_size: size of the volume table in bytes + * @vtbl: in-RAM volume table copy +- * @volumes_mutex: protects on-flash volume table and serializes volume +- * changes, like creation, deletion, update, re-size and re-name ++ * @device_mutex: protects on-flash volume table and serializes volume ++ * creation, deletion, update, re-size, re-name and set ++ * property + * + * @max_ec: current highest erase counter value + * @mean_ec: current mean erase counter value +@@ -346,8 +340,8 @@ struct ubi_wl_entry; + * protected from the wear-leveling worker) + * @pq_head: protection queue head + * @wl_lock: protects the @used, @free, @pq, @pq_head, @lookuptbl, @move_from, +- * @move_to, @move_to_put @erase_pending, @wl_scheduled, @works, +- * @erroneous, and @erroneous_peb_count fields ++ * @move_to, @move_to_put @erase_pending, @wl_scheduled, @works, ++ * @erroneous, and @erroneous_peb_count fields + * @move_mutex: serializes eraseblock moves + * @work_sem: synchronizes the WL worker with use tasks + * @wl_scheduled: non-zero if the wear-leveling was scheduled +@@ -367,6 +361,8 @@ struct ubi_wl_entry; + * @peb_size: physical eraseblock size + * @bad_peb_count: count of bad physical eraseblocks + * @good_peb_count: count of good physical eraseblocks ++ * @corr_peb_count: count of corrupted physical eraseblocks (preserved and not ++ * used by UBI) + * @erroneous_peb_count: count of erroneous physical eraseblocks in @erroneous + * @max_erroneous: maximum allowed amount of erroneous physical eraseblocks + * @min_io_size: minimal input/output unit size of the underlying MTD device +@@ -384,15 +380,15 @@ struct ubi_wl_entry; + * @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset + * @bad_allowed: whether the MTD device admits of bad physical eraseblocks or + * not ++ * @nor_flash: non-zero if working on top of NOR flash ++ * @max_write_size: maximum amount of bytes the underlying flash can write at a ++ * time (MTD write buffer size) + * @mtd: MTD device descriptor + * + * @peb_buf1: a buffer of PEB size used for different purposes + * @peb_buf2: another buffer of PEB size used for different purposes + * @buf_mutex: protects @peb_buf1 and @peb_buf2 + * @ckvol_mutex: serializes static volume checking when opening +- * @mult_mutex: serializes operations on multiple volumes, like re-naming +- * @dbg_peb_buf: buffer of PEB size used for debugging +- * @dbg_buf_mutex: protects @dbg_peb_buf + */ + struct ubi_device { + struct cdev cdev; +@@ -404,7 +400,6 @@ struct ubi_device { + spinlock_t volumes_lock; + int ref_count; + int image_seq; +- int image_seq_set; + + int rsvd_pebs; + int avail_pebs; +@@ -415,7 +410,7 @@ struct ubi_device { + int vtbl_slots; + int vtbl_size; + struct ubi_vtbl_record *vtbl; +- struct mutex volumes_mutex; ++ struct mutex device_mutex; + + int max_ec; + /* Note, mean_ec is not updated run-time - should be fixed */ +@@ -454,6 +449,7 @@ struct ubi_device { + int peb_size; + int bad_peb_count; + int good_peb_count; ++ int corr_peb_count; + int erroneous_peb_count; + int max_erroneous; + int min_io_size; +@@ -466,26 +462,24 @@ struct ubi_device { + int vid_hdr_offset; + int vid_hdr_aloffset; + int vid_hdr_shift; +- int bad_allowed; ++ unsigned int bad_allowed:1; ++ unsigned int nor_flash:1; ++ int max_write_size; + struct mtd_info *mtd; + + void *peb_buf1; + void *peb_buf2; + struct mutex buf_mutex; + struct mutex ckvol_mutex; +- struct mutex mult_mutex; +-#ifdef CONFIG_MTD_UBI_DEBUG +- void *dbg_peb_buf; +- struct mutex dbg_buf_mutex; +-#endif + }; + + extern struct kmem_cache *ubi_wl_entry_slab; +-extern struct file_operations ubi_ctrl_cdev_operations; +-extern struct file_operations ubi_cdev_operations; +-extern struct file_operations ubi_vol_cdev_operations; ++extern const struct file_operations ubi_ctrl_cdev_operations; ++extern const struct file_operations ubi_cdev_operations; ++extern const struct file_operations ubi_vol_cdev_operations; + extern struct class *ubi_class; + extern struct mutex ubi_devices_mutex; ++extern struct blocking_notifier_head ubi_notifiers; + + /* vtbl.c */ + int ubi_change_vtbl_record(struct ubi_device *ubi, int idx, +@@ -517,17 +511,7 @@ int ubi_calc_data_len(const struct ubi_d + int length); + int ubi_check_volume(struct ubi_device *ubi, int vol_id); + void ubi_calculate_reserved(struct ubi_device *ubi); +- +-/* gluebi.c */ +-#ifdef CONFIG_MTD_UBI_GLUEBI +-int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol); +-int ubi_destroy_gluebi(struct ubi_volume *vol); +-void ubi_gluebi_updated(struct ubi_volume *vol); +-#else +-#define ubi_create_gluebi(ubi, vol) 0 +-#define ubi_destroy_gluebi(vol) 0 +-#define ubi_gluebi_updated(vol) +-#endif ++int ubi_check_pattern(const void *buf, uint8_t patt, int size); + + /* eba.c */ + int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol, +@@ -578,6 +562,16 @@ struct ubi_device *ubi_get_device(int ub + void ubi_put_device(struct ubi_device *ubi); + struct ubi_device *ubi_get_by_major(int major); + int ubi_major2num(int major); ++int ubi_volume_notify(struct ubi_device *ubi, struct ubi_volume *vol, ++ int ntype); ++int ubi_notify_all(struct ubi_device *ubi, int ntype, ++ struct notifier_block *nb); ++int ubi_enumerate_volumes(struct notifier_block *nb); ++ ++/* kapi.c */ ++void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di); ++void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol, ++ struct ubi_volume_info *vi); + + /* + * ubi_rb_for_each_entry - walk an RB-tree. +@@ -590,7 +584,8 @@ int ubi_major2num(int major); + for (rb = rb_first(root), \ + pos = (rb ? container_of(rb, typeof(*pos), member) : NULL); \ + rb; \ +- rb = rb_next(rb), pos = container_of(rb, typeof(*pos), member)) ++ rb = rb_next(rb), \ ++ pos = (rb ? container_of(rb, typeof(*pos), member) : NULL)) + + /** + * ubi_zalloc_vid_hdr - allocate a volume identifier header object. +diff -uprN linux-2.6.28/drivers/mtd/ubi/ubi-media.h ubifs-v2.6.28/drivers/mtd/ubi/ubi-media.h +--- linux-2.6.28/drivers/mtd/ubi/ubi-media.h 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/drivers/mtd/ubi/ubi-media.h 2011-06-15 14:22:07.000000000 -0400 +@@ -136,7 +136,7 @@ enum { + * The erase counter header takes 64 bytes and has a plenty of unused space for + * future usage. The unused fields are zeroed. The @version field is used to + * indicate the version of UBI implementation which is supposed to be able to +- * work with this UBI image. If @version is greater then the current UBI ++ * work with this UBI image. If @version is greater than the current UBI + * version, the image is rejected. This may be useful in future if something + * is changed radically. This field is duplicated in the volume identifier + * header. +@@ -164,7 +164,7 @@ struct ubi_ec_hdr { + __be32 image_seq; + __u8 padding2[32]; + __be32 hdr_crc; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubi_vid_hdr - on-flash UBI volume identifier header. +@@ -197,7 +197,7 @@ struct ubi_ec_hdr { + * (sequence number) is used to distinguish between older and newer versions of + * logical eraseblocks. + * +- * There are 2 situations when there may be more then one physical eraseblock ++ * There are 2 situations when there may be more than one physical eraseblock + * corresponding to the same logical eraseblock, i.e., having the same @vol_id + * and @lnum values in the volume identifier header. Suppose we have a logical + * eraseblock L and it is mapped to the physical eraseblock P. +@@ -292,7 +292,7 @@ struct ubi_vid_hdr { + __be64 sqnum; + __u8 padding3[12]; + __be32 hdr_crc; +-} __attribute__ ((packed)); ++} __packed; + + /* Internal UBI volumes count */ + #define UBI_INT_VOL_COUNT 1 +@@ -373,6 +373,6 @@ struct ubi_vtbl_record { + __u8 flags; + __u8 padding[23]; + __be32 crc; +-} __attribute__ ((packed)); ++} __packed; + + #endif /* !__UBI_MEDIA_H__ */ +diff -uprN linux-2.6.28/drivers/mtd/ubi/upd.c ubifs-v2.6.28/drivers/mtd/ubi/upd.c +--- linux-2.6.28/drivers/mtd/ubi/upd.c 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/drivers/mtd/ubi/upd.c 2011-06-15 14:22:07.000000000 -0400 +@@ -40,7 +40,7 @@ + + #include + #include +-#include ++#include + #include "ubi.h" + + /** +@@ -68,10 +68,10 @@ static int set_update_marker(struct ubi_ + sizeof(struct ubi_vtbl_record)); + vtbl_rec.upd_marker = 1; + +- mutex_lock(&ubi->volumes_mutex); ++ mutex_lock(&ubi->device_mutex); + err = ubi_change_vtbl_record(ubi, vol->vol_id, &vtbl_rec); +- mutex_unlock(&ubi->volumes_mutex); + vol->upd_marker = 1; ++ mutex_unlock(&ubi->device_mutex); + return err; + } + +@@ -89,7 +89,6 @@ static int clear_update_marker(struct ub + long long bytes) + { + int err; +- uint64_t tmp; + struct ubi_vtbl_record vtbl_rec; + + dbg_gen("clear update marker for volume %d", vol->vol_id); +@@ -101,19 +100,19 @@ static int clear_update_marker(struct ub + + if (vol->vol_type == UBI_STATIC_VOLUME) { + vol->corrupted = 0; +- vol->used_bytes = tmp = bytes; +- vol->last_eb_bytes = do_div(tmp, vol->usable_leb_size); +- vol->used_ebs = tmp; ++ vol->used_bytes = bytes; ++ vol->used_ebs = div_u64_rem(bytes, vol->usable_leb_size, ++ &vol->last_eb_bytes); + if (vol->last_eb_bytes) + vol->used_ebs += 1; + else + vol->last_eb_bytes = vol->usable_leb_size; + } + +- mutex_lock(&ubi->volumes_mutex); ++ mutex_lock(&ubi->device_mutex); + err = ubi_change_vtbl_record(ubi, vol->vol_id, &vtbl_rec); +- mutex_unlock(&ubi->volumes_mutex); + vol->upd_marker = 0; ++ mutex_unlock(&ubi->device_mutex); + return err; + } + +@@ -131,7 +130,6 @@ int ubi_start_update(struct ubi_device * + long long bytes) + { + int i, err; +- uint64_t tmp; + + dbg_gen("start update of volume %d, %llu bytes", vol->vol_id, bytes); + ubi_assert(!vol->updating && !vol->changing_leb); +@@ -149,21 +147,23 @@ int ubi_start_update(struct ubi_device * + } + + if (bytes == 0) { ++ err = ubi_wl_flush(ubi); ++ if (err) ++ return err; ++ + err = clear_update_marker(ubi, vol, 0); + if (err) + return err; +- err = ubi_wl_flush(ubi); +- if (!err) +- vol->updating = 0; ++ vol->updating = 0; ++ return 0; + } + + vol->upd_buf = vmalloc(ubi->leb_size); + if (!vol->upd_buf) + return -ENOMEM; + +- tmp = bytes; +- vol->upd_ebs = !!do_div(tmp, vol->usable_leb_size); +- vol->upd_ebs += tmp; ++ vol->upd_ebs = div_u64(bytes + vol->usable_leb_size - 1, ++ vol->usable_leb_size); + vol->upd_bytes = bytes; + vol->upd_received = 0; + return 0; +@@ -282,7 +282,6 @@ static int write_leb(struct ubi_device * + int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol, + const void __user *buf, int count) + { +- uint64_t tmp; + int lnum, offs, err = 0, len, to_write = count; + + dbg_gen("write %d of %lld bytes, %lld already passed", +@@ -291,10 +290,7 @@ int ubi_more_update_data(struct ubi_devi + if (ubi->ro_mode) + return -EROFS; + +- tmp = vol->upd_received; +- offs = do_div(tmp, vol->usable_leb_size); +- lnum = tmp; +- ++ lnum = div_u64_rem(vol->upd_received, vol->usable_leb_size, &offs); + if (vol->upd_received + count > vol->upd_bytes) + to_write = count = vol->upd_bytes - vol->upd_received; + +@@ -369,16 +365,16 @@ int ubi_more_update_data(struct ubi_devi + + ubi_assert(vol->upd_received <= vol->upd_bytes); + if (vol->upd_received == vol->upd_bytes) { ++ err = ubi_wl_flush(ubi); ++ if (err) ++ return err; + /* The update is finished, clear the update marker */ + err = clear_update_marker(ubi, vol, vol->upd_bytes); + if (err) + return err; +- err = ubi_wl_flush(ubi); +- if (err == 0) { +- vol->updating = 0; +- err = to_write; +- vfree(vol->upd_buf); +- } ++ vol->updating = 0; ++ err = to_write; ++ vfree(vol->upd_buf); + } + + return err; +diff -uprN linux-2.6.28/drivers/mtd/ubi/vmt.c ubifs-v2.6.28/drivers/mtd/ubi/vmt.c +--- linux-2.6.28/drivers/mtd/ubi/vmt.c 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/drivers/mtd/ubi/vmt.c 2011-06-15 14:22:07.000000000 -0400 +@@ -24,10 +24,10 @@ + */ + + #include +-#include ++#include + #include "ubi.h" + +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID ++#ifdef CONFIG_MTD_UBI_DEBUG + static int paranoid_check_volumes(struct ubi_device *ubi); + #else + #define paranoid_check_volumes(ubi) 0 +@@ -198,14 +198,13 @@ static void volume_sysfs_close(struct ub + * %UBI_VOL_NUM_AUTO, this function automatically assign ID to the new volume + * and saves it in @req->vol_id. Returns zero in case of success and a negative + * error code in case of failure. Note, the caller has to have the +- * @ubi->volumes_mutex locked. ++ * @ubi->device_mutex locked. + */ + int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req) + { + int i, err, vol_id = req->vol_id, do_free = 1; + struct ubi_volume *vol; + struct ubi_vtbl_record vtbl_rec; +- uint64_t bytes; + dev_t dev; + + if (ubi->ro_mode) +@@ -233,8 +232,8 @@ int ubi_create_volume(struct ubi_device + req->vol_id = vol_id; + } + +- dbg_gen("volume ID %d, %llu bytes, type %d, name %s", +- vol_id, (unsigned long long)req->bytes, ++ dbg_gen("create device %d, volume %d, %llu bytes, type %d, name %s", ++ ubi->ubi_num, vol_id, (unsigned long long)req->bytes, + (int)req->vol_type, req->name); + + /* Ensure that this volume does not exist */ +@@ -255,14 +254,15 @@ int ubi_create_volume(struct ubi_device + + /* Calculate how many eraseblocks are requested */ + vol->usable_leb_size = ubi->leb_size - ubi->leb_size % req->alignment; +- bytes = req->bytes; +- if (do_div(bytes, vol->usable_leb_size)) +- vol->reserved_pebs = 1; +- vol->reserved_pebs += bytes; ++ vol->reserved_pebs += div_u64(req->bytes + vol->usable_leb_size - 1, ++ vol->usable_leb_size); + + /* Reserve physical eraseblocks */ + if (vol->reserved_pebs > ubi->avail_pebs) { + dbg_err("not enough PEBs, only %d available", ubi->avail_pebs); ++ if (ubi->corr_peb_count) ++ dbg_err("%d PEBs are corrupted and not used", ++ ubi->corr_peb_count); + err = -ENOSPC; + goto out_unlock; + } +@@ -301,10 +301,10 @@ int ubi_create_volume(struct ubi_device + vol->used_bytes = + (long long)vol->used_ebs * vol->usable_leb_size; + } else { +- bytes = vol->used_bytes; +- vol->last_eb_bytes = do_div(bytes, vol->usable_leb_size); +- vol->used_ebs = bytes; +- if (vol->last_eb_bytes) ++ vol->used_ebs = div_u64_rem(vol->used_bytes, ++ vol->usable_leb_size, ++ &vol->last_eb_bytes); ++ if (vol->last_eb_bytes != 0) + vol->used_ebs += 1; + else + vol->last_eb_bytes = vol->usable_leb_size; +@@ -320,10 +320,6 @@ int ubi_create_volume(struct ubi_device + goto out_mapping; + } + +- err = ubi_create_gluebi(ubi, vol); +- if (err) +- goto out_cdev; +- + vol->dev.release = vol_release; + vol->dev.parent = &ubi->dev; + vol->dev.devt = dev; +@@ -333,7 +329,7 @@ int ubi_create_volume(struct ubi_device + err = device_register(&vol->dev); + if (err) { + ubi_err("cannot register device"); +- goto out_gluebi; ++ goto out_cdev; + } + + err = volume_sysfs_init(ubi, vol); +@@ -361,7 +357,9 @@ int ubi_create_volume(struct ubi_device + ubi->vol_count += 1; + spin_unlock(&ubi->volumes_lock); + +- err = paranoid_check_volumes(ubi); ++ ubi_volume_notify(ubi, vol, UBI_VOLUME_ADDED); ++ if (paranoid_check_volumes(ubi)) ++ dbg_err("check failed while creating volume %d", vol_id); + return err; + + out_sysfs: +@@ -376,10 +374,6 @@ out_sysfs: + do_free = 0; + get_device(&vol->dev); + volume_sysfs_close(vol); +-out_gluebi: +- if (ubi_destroy_gluebi(vol)) +- dbg_err("cannot destroy gluebi for volume %d:%d", +- ubi->ubi_num, vol_id); + out_cdev: + cdev_del(&vol->cdev); + out_mapping: +@@ -406,7 +400,7 @@ out_unlock: + * + * This function removes volume described by @desc. The volume has to be opened + * in "exclusive" mode. Returns zero in case of success and a negative error +- * code in case of failure. The caller has to have the @ubi->volumes_mutex ++ * code in case of failure. The caller has to have the @ubi->device_mutex + * locked. + */ + int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl) +@@ -415,7 +409,7 @@ int ubi_remove_volume(struct ubi_volume_ + struct ubi_device *ubi = vol->ubi; + int i, err, vol_id = vol->vol_id, reserved_pebs = vol->reserved_pebs; + +- dbg_gen("remove UBI volume %d", vol_id); ++ dbg_gen("remove device %d, volume %d", ubi->ubi_num, vol_id); + ubi_assert(desc->mode == UBI_EXCLUSIVE); + ubi_assert(vol == ubi->volumes[vol_id]); + +@@ -434,10 +428,6 @@ int ubi_remove_volume(struct ubi_volume_ + ubi->volumes[vol_id] = NULL; + spin_unlock(&ubi->volumes_lock); + +- err = ubi_destroy_gluebi(vol); +- if (err) +- goto out_err; +- + if (!no_vtbl) { + err = ubi_change_vtbl_record(ubi, vol_id, NULL); + if (err) +@@ -468,8 +458,10 @@ int ubi_remove_volume(struct ubi_volume_ + ubi->vol_count -= 1; + spin_unlock(&ubi->volumes_lock); + +- if (!no_vtbl) +- err = paranoid_check_volumes(ubi); ++ ubi_volume_notify(ubi, vol, UBI_VOLUME_REMOVED); ++ if (!no_vtbl && paranoid_check_volumes(ubi)) ++ dbg_err("check failed while removing volume %d", vol_id); ++ + return err; + + out_err: +@@ -488,7 +480,7 @@ out_unlock: + * + * This function re-sizes the volume and returns zero in case of success, and a + * negative error code in case of failure. The caller has to have the +- * @ubi->volumes_mutex locked. ++ * @ubi->device_mutex locked. + */ + int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs) + { +@@ -501,8 +493,8 @@ int ubi_resize_volume(struct ubi_volume_ + if (ubi->ro_mode) + return -EROFS; + +- dbg_gen("re-size volume %d to from %d to %d PEBs", +- vol_id, vol->reserved_pebs, reserved_pebs); ++ dbg_gen("re-size device %d, volume %d to from %d to %d PEBs", ++ ubi->ubi_num, vol_id, vol->reserved_pebs, reserved_pebs); + + if (vol->vol_type == UBI_STATIC_VOLUME && + reserved_pebs < vol->used_ebs) { +@@ -537,6 +529,9 @@ int ubi_resize_volume(struct ubi_volume_ + if (pebs > ubi->avail_pebs) { + dbg_err("not enough PEBs: requested %d, available %d", + pebs, ubi->avail_pebs); ++ if (ubi->corr_peb_count) ++ dbg_err("%d PEBs are corrupted and not used", ++ ubi->corr_peb_count); + spin_unlock(&ubi->volumes_lock); + err = -ENOSPC; + goto out_free; +@@ -590,7 +585,9 @@ int ubi_resize_volume(struct ubi_volume_ + (long long)vol->used_ebs * vol->usable_leb_size; + } + +- err = paranoid_check_volumes(ubi); ++ ubi_volume_notify(ubi, vol, UBI_VOLUME_RESIZED); ++ if (paranoid_check_volumes(ubi)) ++ dbg_err("check failed while re-sizing volume %d", vol_id); + return err; + + out_acc: +@@ -635,11 +632,12 @@ int ubi_rename_volumes(struct ubi_device + vol->name_len = re->new_name_len; + memcpy(vol->name, re->new_name, re->new_name_len + 1); + spin_unlock(&ubi->volumes_lock); ++ ubi_volume_notify(ubi, vol, UBI_VOLUME_RENAMED); + } + } + +- if (!err) +- err = paranoid_check_volumes(ubi); ++ if (!err && paranoid_check_volumes(ubi)) ++ ; + return err; + } + +@@ -670,10 +668,6 @@ int ubi_add_volume(struct ubi_device *ub + return err; + } + +- err = ubi_create_gluebi(ubi, vol); +- if (err) +- goto out_cdev; +- + vol->dev.release = vol_release; + vol->dev.parent = &ubi->dev; + vol->dev.devt = dev; +@@ -681,21 +675,19 @@ int ubi_add_volume(struct ubi_device *ub + sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id); + err = device_register(&vol->dev); + if (err) +- goto out_gluebi; ++ goto out_cdev; + + err = volume_sysfs_init(ubi, vol); + if (err) { + cdev_del(&vol->cdev); +- err = ubi_destroy_gluebi(vol); + volume_sysfs_close(vol); + return err; + } + +- err = paranoid_check_volumes(ubi); ++ if (paranoid_check_volumes(ubi)) ++ dbg_err("check failed while adding volume %d", vol_id); + return err; + +-out_gluebi: +- err = ubi_destroy_gluebi(vol); + out_cdev: + cdev_del(&vol->cdev); + return err; +@@ -711,17 +703,14 @@ out_cdev: + */ + void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol) + { +- int err; +- + dbg_gen("free volume %d", vol->vol_id); + + ubi->volumes[vol->vol_id] = NULL; +- err = ubi_destroy_gluebi(vol); + cdev_del(&vol->cdev); + volume_sysfs_close(vol); + } + +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID ++#ifdef CONFIG_MTD_UBI_DEBUG + + /** + * paranoid_check_volume - check volume information. +@@ -800,11 +789,6 @@ static int paranoid_check_volume(struct + goto fail; + } + +- if (!vol->name) { +- ubi_err("NULL volume name"); +- goto fail; +- } +- + n = strnlen(vol->name, vol->name_len + 1); + if (n != vol->name_len) { + ubi_err("bad name_len %lld", n); +@@ -871,6 +855,7 @@ fail: + if (vol) + ubi_dbg_dump_vol_info(vol); + ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id); ++ dump_stack(); + spin_unlock(&ubi->volumes_lock); + return -EINVAL; + } +@@ -885,6 +870,9 @@ static int paranoid_check_volumes(struct + { + int i, err = 0; + ++ if (!(ubi_chk_flags & UBI_CHK_GEN)) ++ return 0; ++ + for (i = 0; i < ubi->vtbl_slots; i++) { + err = paranoid_check_volume(ubi, i); + if (err) +diff -uprN linux-2.6.28/drivers/mtd/ubi/vtbl.c ubifs-v2.6.28/drivers/mtd/ubi/vtbl.c +--- linux-2.6.28/drivers/mtd/ubi/vtbl.c 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/drivers/mtd/ubi/vtbl.c 2011-06-15 14:22:07.000000000 -0400 +@@ -61,7 +61,7 @@ + #include + #include "ubi.h" + +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID ++#ifdef CONFIG_MTD_UBI_DEBUG + static void paranoid_vtbl_check(const struct ubi_device *ubi); + #else + #define paranoid_vtbl_check(ubi) +@@ -365,7 +365,7 @@ write_error: + * Probably this physical eraseblock went bad, try to pick + * another one. + */ +- list_add_tail(&new_seb->u.list, &si->corr); ++ list_add(&new_seb->u.list, &si->erase); + goto retry; + } + kfree(new_seb); +@@ -413,7 +413,7 @@ static struct ubi_vtbl_record *process_l + * 0 contains more recent information. + * + * So the plan is to first check LEB 0. Then +- * a. if LEB 0 is OK, it must be containing the most resent data; then ++ * a. if LEB 0 is OK, it must be containing the most recent data; then + * we compare it with LEB 1, and if they are different, we copy LEB + * 0 to LEB 1; + * b. if LEB 0 is corrupted, but LEB 1 has to be OK, and we copy LEB 1 +@@ -566,6 +566,7 @@ static int init_volumes(struct ubi_devic + vol->reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs); + vol->alignment = be32_to_cpu(vtbl[i].alignment); + vol->data_pad = be32_to_cpu(vtbl[i].data_pad); ++ vol->upd_marker = vtbl[i].upd_marker; + vol->vol_type = vtbl[i].vol_type == UBI_VID_DYNAMIC ? + UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME; + vol->name_len = be16_to_cpu(vtbl[i].name_len); +@@ -577,7 +578,7 @@ static int init_volumes(struct ubi_devic + if (vtbl[i].flags & UBI_VTBL_AUTORESIZE_FLG) { + /* Auto re-size flag may be set only for one volume */ + if (ubi->autoresize_vol_id != -1) { +- ubi_err("more then one auto-resize volume (%d " ++ ubi_err("more than one auto-resize volume (%d " + "and %d)", ubi->autoresize_vol_id, i); + kfree(vol); + return -EINVAL; +@@ -660,9 +661,13 @@ static int init_volumes(struct ubi_devic + ubi->vol_count += 1; + vol->ubi = ubi; + +- if (reserved_pebs > ubi->avail_pebs) ++ if (reserved_pebs > ubi->avail_pebs) { + ubi_err("not enough PEBs, required %d, available %d", + reserved_pebs, ubi->avail_pebs); ++ if (ubi->corr_peb_count) ++ ubi_err("%d PEBs are corrupted and not used", ++ ubi->corr_peb_count); ++ } + ubi->rsvd_pebs += reserved_pebs; + ubi->avail_pebs -= reserved_pebs; + +@@ -835,7 +840,7 @@ int ubi_read_volume_table(struct ubi_dev + return PTR_ERR(ubi->vtbl); + } + +- ubi->avail_pebs = ubi->good_peb_count; ++ ubi->avail_pebs = ubi->good_peb_count - ubi->corr_peb_count; + + /* + * The layout volume is OK, initialize the corresponding in-RAM data +@@ -846,7 +851,7 @@ int ubi_read_volume_table(struct ubi_dev + goto out_free; + + /* +- * Get sure that the scanning information is consistent to the ++ * Make sure that the scanning information is consistent to the + * information stored in the volume table. + */ + err = check_scanning_info(ubi, si); +@@ -864,7 +869,7 @@ out_free: + return err; + } + +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID ++#ifdef CONFIG_MTD_UBI_DEBUG + + /** + * paranoid_vtbl_check - check volume table. +@@ -872,10 +877,13 @@ out_free: + */ + static void paranoid_vtbl_check(const struct ubi_device *ubi) + { ++ if (!(ubi_chk_flags & UBI_CHK_GEN)) ++ return; ++ + if (vtbl_check(ubi, ubi->vtbl)) { + ubi_err("paranoid check failed"); + BUG(); + } + } + +-#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ ++#endif /* CONFIG_MTD_UBI_DEBUG */ +diff -uprN linux-2.6.28/drivers/mtd/ubi/wl.c ubifs-v2.6.28/drivers/mtd/ubi/wl.c +--- linux-2.6.28/drivers/mtd/ubi/wl.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/drivers/mtd/ubi/wl.c 2011-06-15 14:22:07.000000000 -0400 +@@ -130,7 +130,7 @@ + * situation when the picked physical eraseblock is constantly erased after the + * data is written to it. So, we have a constant which limits the highest erase + * counter of the free physical eraseblock to pick. Namely, the WL sub-system +- * does not pick eraseblocks with erase counter greater then the lowest erase ++ * does not pick eraseblocks with erase counter greater than the lowest erase + * counter plus %WL_FREE_MAX_DIFF. + */ + #define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD) +@@ -161,7 +161,7 @@ struct ubi_work { + int torture; + }; + +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID ++#ifdef CONFIG_MTD_UBI_DEBUG + static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec); + static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, + struct rb_root *root); +@@ -350,7 +350,7 @@ static void prot_queue_add(struct ubi_de + * @max: highest possible erase counter + * + * This function looks for a wear leveling entry with erase counter closest to +- * @max and less then @max. ++ * @max and less than @max. + */ + static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max) + { +@@ -459,6 +459,14 @@ retry: + dbg_wl("PEB %d EC %d", e->pnum, e->ec); + prot_queue_add(ubi, e); + spin_unlock(&ubi->wl_lock); ++ ++ err = ubi_dbg_check_all_ff(ubi, e->pnum, ubi->vid_hdr_aloffset, ++ ubi->peb_size - ubi->vid_hdr_aloffset); ++ if (err) { ++ ubi_err("new PEB %d does not contain all 0xFF bytes", e->pnum); ++ return err; ++ } ++ + return e->pnum; + } + +@@ -505,7 +513,7 @@ static int sync_erase(struct ubi_device + dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec); + + err = paranoid_check_ec(ubi, e->pnum, e->ec); +- if (err > 0) ++ if (err) + return -EINVAL; + + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); +@@ -605,7 +613,7 @@ static void schedule_ubi_work(struct ubi + list_add_tail(&wrk->list, &ubi->works); + ubi_assert(ubi->works_count >= 0); + ubi->works_count += 1; +- if (ubi->thread_enabled) ++ if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled()) + wake_up_process(ubi->bgt_thread); + spin_unlock(&ubi->wl_lock); + } +@@ -656,6 +664,7 @@ static int wear_leveling_worker(struct u + int cancel) + { + int err, scrubbing = 0, torture = 0, protect = 0, erroneous = 0; ++ int vol_id = -1, uninitialized_var(lnum); + struct ubi_wl_entry *e1, *e2; + struct ubi_vid_hdr *vid_hdr; + +@@ -736,7 +745,7 @@ static int wear_leveling_worker(struct u + + err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0); + if (err && err != UBI_IO_BITFLIPS) { +- if (err == UBI_IO_PEB_FREE) { ++ if (err == UBI_IO_FF) { + /* + * We are trying to move PEB without a VID header. UBI + * always write VID headers shortly after the PEB was +@@ -750,6 +759,16 @@ static int wear_leveling_worker(struct u + dbg_wl("PEB %d has no VID header", e1->pnum); + protect = 1; + goto out_not_moved; ++ } else if (err == UBI_IO_FF_BITFLIPS) { ++ /* ++ * The same situation as %UBI_IO_FF, but bit-flips were ++ * detected. It is better to schedule this PEB for ++ * scrubbing. ++ */ ++ dbg_wl("PEB %d has no VID header but has bit-flips", ++ e1->pnum); ++ scrubbing = 1; ++ goto out_not_moved; + } + + ubi_err("error %d while reading VID header from PEB %d", +@@ -757,6 +776,9 @@ static int wear_leveling_worker(struct u + goto out_error; + } + ++ vol_id = be32_to_cpu(vid_hdr->vol_id); ++ lnum = be32_to_cpu(vid_hdr->lnum); ++ + err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr); + if (err) { + if (err == MOVE_CANCEL_RACE) { +@@ -773,7 +795,9 @@ static int wear_leveling_worker(struct u + + if (err == MOVE_CANCEL_BITFLIPS || err == MOVE_TARGET_WR_ERR || + err == MOVE_TARGET_RD_ERR) { +- /* Target PEB bit-flips or write error, torture it */ ++ /* ++ * Target PEB had bit-flips or write error - torture it. ++ */ + torture = 1; + goto out_not_moved; + } +@@ -803,10 +827,10 @@ static int wear_leveling_worker(struct u + } + + /* The PEB has been successfully moved */ +- ubi_free_vid_hdr(ubi, vid_hdr); + if (scrubbing) +- ubi_msg("scrubbed PEB %d, data moved to PEB %d", +- e1->pnum, e2->pnum); ++ ubi_msg("scrubbed PEB %d (LEB %d:%d), data moved to PEB %d", ++ e1->pnum, vol_id, lnum, e2->pnum); ++ ubi_free_vid_hdr(ubi, vid_hdr); + + spin_lock(&ubi->wl_lock); + if (!ubi->move_to_put) { +@@ -830,7 +854,8 @@ static int wear_leveling_worker(struct u + * Well, the target PEB was put meanwhile, schedule it for + * erasure. + */ +- dbg_wl("PEB %d was put meanwhile, erase", e2->pnum); ++ dbg_wl("PEB %d (LEB %d:%d) was put meanwhile, erase", ++ e2->pnum, vol_id, lnum); + err = schedule_erase(ubi, e2, 0); + if (err) { + kmem_cache_free(ubi_wl_entry_slab, e2); +@@ -848,8 +873,12 @@ static int wear_leveling_worker(struct u + * have been changed, schedule it for erasure. + */ + out_not_moved: +- dbg_wl("cancel moving PEB %d to PEB %d (%d)", +- e1->pnum, e2->pnum, err); ++ if (vol_id != -1) ++ dbg_wl("cancel moving PEB %d (LEB %d:%d) to PEB %d (%d)", ++ e1->pnum, vol_id, lnum, e2->pnum, err); ++ else ++ dbg_wl("cancel moving PEB %d to PEB %d (%d)", ++ e1->pnum, e2->pnum, err); + spin_lock(&ubi->wl_lock); + if (protect) + prot_queue_add(ubi, e1); +@@ -875,8 +904,12 @@ out_not_moved: + return 0; + + out_error: +- ubi_err("error %d while moving PEB %d to PEB %d", +- err, e1->pnum, e2->pnum); ++ if (vol_id != -1) ++ ubi_err("error %d while moving PEB %d to PEB %d", ++ err, e1->pnum, e2->pnum); ++ else ++ ubi_err("error %d while moving PEB %d (LEB %d:%d) to PEB %d", ++ err, e1->pnum, vol_id, lnum, e2->pnum); + spin_lock(&ubi->wl_lock); + ubi->move_from = ubi->move_to = NULL; + ubi->move_to_put = ubi->wl_scheduled = 0; +@@ -932,7 +965,7 @@ static int ensure_wear_leveling(struct u + /* + * We schedule wear-leveling only if the difference between the + * lowest erase counter of used physical eraseblocks and a high +- * erase counter of free physical eraseblocks is greater then ++ * erase counter of free physical eraseblocks is greater than + * %UBI_WL_THRESHOLD. + */ + e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb); +@@ -1058,10 +1091,9 @@ static int erase_worker(struct ubi_devic + ubi_err("no reserved physical eraseblocks"); + goto out_ro; + } +- + spin_unlock(&ubi->volumes_lock); +- ubi_msg("mark PEB %d as bad", pnum); + ++ ubi_msg("mark PEB %d as bad", pnum); + err = ubi_io_mark_bad(ubi, pnum); + if (err) + goto out_ro; +@@ -1071,7 +1103,9 @@ static int erase_worker(struct ubi_devic + ubi->bad_peb_count += 1; + ubi->good_peb_count -= 1; + ubi_calculate_reserved(ubi); +- if (ubi->beb_rsvd_pebs == 0) ++ if (ubi->beb_rsvd_pebs) ++ ubi_msg("%d PEBs left in the reserve", ubi->beb_rsvd_pebs); ++ else + ubi_warn("last PEB from the reserved pool was used"); + spin_unlock(&ubi->volumes_lock); + +@@ -1188,7 +1222,8 @@ int ubi_wl_scrub_peb(struct ubi_device * + retry: + spin_lock(&ubi->wl_lock); + e = ubi->lookuptbl[pnum]; +- if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub)) { ++ if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub) || ++ in_wl_tree(e, &ubi->erroneous)) { + spin_unlock(&ubi->wl_lock); + return 0; + } +@@ -1329,7 +1364,7 @@ int ubi_thread(void *u) + + spin_lock(&ubi->wl_lock); + if (list_empty(&ubi->works) || ubi->ro_mode || +- !ubi->thread_enabled) { ++ !ubi->thread_enabled || ubi_dbg_is_bgt_disabled()) { + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock(&ubi->wl_lock); + schedule(); +@@ -1443,22 +1478,6 @@ int ubi_wl_init_scan(struct ubi_device * + ubi->lookuptbl[e->pnum] = e; + } + +- list_for_each_entry(seb, &si->corr, u.list) { +- cond_resched(); +- +- e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL); +- if (!e) +- goto out_free; +- +- e->pnum = seb->pnum; +- e->ec = seb->ec; +- ubi->lookuptbl[e->pnum] = e; +- if (schedule_erase(ubi, e, 0)) { +- kmem_cache_free(ubi_wl_entry_slab, e); +- goto out_free; +- } +- } +- + ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) { + ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) { + cond_resched(); +@@ -1485,6 +1504,9 @@ int ubi_wl_init_scan(struct ubi_device * + if (ubi->avail_pebs < WL_RESERVED_PEBS) { + ubi_err("no enough physical eraseblocks (%d, need %d)", + ubi->avail_pebs, WL_RESERVED_PEBS); ++ if (ubi->corr_peb_count) ++ ubi_err("%d PEBs are corrupted and not used", ++ ubi->corr_peb_count); + goto out_free; + } + ubi->avail_pebs -= WL_RESERVED_PEBS; +@@ -1539,7 +1561,7 @@ void ubi_wl_close(struct ubi_device *ubi + kfree(ubi->lookuptbl); + } + +-#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID ++#ifdef CONFIG_MTD_UBI_DEBUG + + /** + * paranoid_check_ec - make sure that the erase counter of a PEB is correct. +@@ -1548,7 +1570,7 @@ void ubi_wl_close(struct ubi_device *ubi + * @ec: the erase counter to check + * + * This function returns zero if the erase counter of physical eraseblock @pnum +- * is equivalent to @ec, %1 if not, and a negative error code if an error ++ * is equivalent to @ec, and a negative error code if not or if an error + * occurred. + */ + static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec) +@@ -1557,6 +1579,9 @@ static int paranoid_check_ec(struct ubi_ + long long read_ec; + struct ubi_ec_hdr *ec_hdr; + ++ if (!(ubi_chk_flags & UBI_CHK_GEN)) ++ return 0; ++ + ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS); + if (!ec_hdr) + return -ENOMEM; +@@ -1587,19 +1612,22 @@ out_free: + * @e: the wear-leveling entry to check + * @root: the root of the tree + * +- * This function returns zero if @e is in the @root RB-tree and %1 if it is +- * not. ++ * This function returns zero if @e is in the @root RB-tree and %-EINVAL if it ++ * is not. + */ + static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, + struct rb_root *root) + { ++ if (!(ubi_chk_flags & UBI_CHK_GEN)) ++ return 0; ++ + if (in_wl_tree(e, root)) + return 0; + + ubi_err("paranoid check failed for PEB %d, EC %d, RB-tree %p ", + e->pnum, e->ec, root); + ubi_dbg_dump_stack(); +- return 1; ++ return -EINVAL; + } + + /** +@@ -1608,13 +1636,16 @@ static int paranoid_check_in_wl_tree(str + * @ubi: UBI device description object + * @e: the wear-leveling entry to check + * +- * This function returns zero if @e is in @ubi->pq and %1 if it is not. ++ * This function returns zero if @e is in @ubi->pq and %-EINVAL if it is not. + */ + static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e) + { + struct ubi_wl_entry *p; + int i; + ++ if (!(ubi_chk_flags & UBI_CHK_GEN)) ++ return 0; ++ + for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) + list_for_each_entry(p, &ubi->pq[i], u.list) + if (p == e) +@@ -1623,6 +1654,7 @@ static int paranoid_check_in_pq(struct u + ubi_err("paranoid check failed for PEB %d, EC %d, Protect queue", + e->pnum, e->ec); + ubi_dbg_dump_stack(); +- return 1; ++ return -EINVAL; + } +-#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ ++ ++#endif /* CONFIG_MTD_UBI_DEBUG */ +diff -uprN linux-2.6.28/fs/ubifs/budget.c ubifs-v2.6.28/fs/ubifs/budget.c +--- linux-2.6.28/fs/ubifs/budget.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/budget.c 2011-06-15 14:22:09.000000000 -0400 +@@ -91,7 +91,6 @@ static int shrink_liability(struct ubifs + return nr_written; + } + +- + /** + * run_gc - run garbage collector. + * @c: UBIFS file-system description object +@@ -131,7 +130,7 @@ static long long get_liability(struct ub + long long liab; + + spin_lock(&c->space_lock); +- liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth; ++ liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth; + spin_unlock(&c->space_lock); + return liab; + } +@@ -142,7 +141,7 @@ static long long get_liability(struct ub + * + * This function is called when an operation cannot be budgeted because there + * is supposedly no free space. But in most cases there is some free space: +- * o budgeting is pessimistic, so it always budgets more then it is actually ++ * o budgeting is pessimistic, so it always budgets more than it is actually + * needed, so shrinking the liability is one way to make free space - the + * cached data will take less space then it was budgeted for; + * o GC may turn some dark space into free space (budgeting treats dark space +@@ -194,29 +193,26 @@ static int make_free_space(struct ubifs_ + } + + /** +- * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index. ++ * ubifs_calc_min_idx_lebs - calculate amount of LEBs for the index. + * @c: UBIFS file-system description object + * +- * This function calculates and returns the number of eraseblocks which should +- * be kept for index usage. ++ * This function calculates and returns the number of LEBs which should be kept ++ * for index usage. + */ + int ubifs_calc_min_idx_lebs(struct ubifs_info *c) + { +- int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz; ++ int idx_lebs; + long long idx_size; + +- idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; +- ++ idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx; + /* And make sure we have thrice the index size of space reserved */ +- idx_size = idx_size + (idx_size << 1); +- ++ idx_size += idx_size << 1; + /* + * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' + * pair, nor similarly the two variables for the new index size, so we + * have to do this costly 64-bit division on fast-path. + */ +- idx_size += eff_leb_size - 1; +- idx_lebs = div_u64(idx_size, eff_leb_size); ++ idx_lebs = div_u64(idx_size + c->idx_leb_size - 1, c->idx_leb_size); + /* + * The index head is not available for the in-the-gaps method, so add an + * extra LEB to compensate. +@@ -300,7 +296,7 @@ long long ubifs_calc_available(const str + */ + static int can_use_rp(struct ubifs_info *c) + { +- if (current->fsuid == c->rp_uid || capable(CAP_SYS_RESOURCE) || ++ if (current_fsuid() == c->rp_uid || capable(CAP_SYS_RESOURCE) || + (c->rp_gid != 0 && in_group_p(c->rp_gid))) + return 1; + return 0; +@@ -310,23 +306,23 @@ static int can_use_rp(struct ubifs_info + * do_budget_space - reserve flash space for index and data growth. + * @c: UBIFS file-system description object + * +- * This function makes sure UBIFS has enough free eraseblocks for index growth +- * and data. ++ * This function makes sure UBIFS has enough free LEBs for index growth and ++ * data. + * + * When budgeting index space, UBIFS reserves thrice as many LEBs as the index + * would take if it was consolidated and written to the flash. This guarantees + * that the "in-the-gaps" commit method always succeeds and UBIFS will always + * be able to commit dirty index. So this function basically adds amount of + * budgeted index space to the size of the current index, multiplies this by 3, +- * and makes sure this does not exceed the amount of free eraseblocks. ++ * and makes sure this does not exceed the amount of free LEBs. + * +- * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: ++ * Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables: + * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might + * be large, because UBIFS does not do any index consolidation as long as + * there is free space. IOW, the index may take a lot of LEBs, but the LEBs + * will contain a lot of dirt. +- * o @c->min_idx_lebs is the the index presumably takes. IOW, the index may be +- * consolidated to take up to @c->min_idx_lebs LEBs. ++ * o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW, ++ * the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs. + * + * This function returns zero in case of success, and %-ENOSPC in case of + * failure. +@@ -371,13 +367,13 @@ static int do_budget_space(struct ubifs_ + c->lst.taken_empty_lebs; + if (unlikely(rsvd_idx_lebs > lebs)) { + dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " +- "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs, ++ "rsvd_idx_lebs %d", min_idx_lebs, c->bi.min_idx_lebs, + rsvd_idx_lebs); + return -ENOSPC; + } + + available = ubifs_calc_available(c, min_idx_lebs); +- outstanding = c->budg_data_growth + c->budg_dd_growth; ++ outstanding = c->bi.data_growth + c->bi.dd_growth; + + if (unlikely(available < outstanding)) { + dbg_budg("out of data space: available %lld, outstanding %lld", +@@ -388,7 +384,7 @@ static int do_budget_space(struct ubifs_ + if (available - outstanding <= c->rp_size && !can_use_rp(c)) + return -ENOSPC; + +- c->min_idx_lebs = min_idx_lebs; ++ c->bi.min_idx_lebs = min_idx_lebs; + return 0; + } + +@@ -421,11 +417,11 @@ static int calc_data_growth(const struct + { + int data_growth; + +- data_growth = req->new_ino ? c->inode_budget : 0; ++ data_growth = req->new_ino ? c->bi.inode_budget : 0; + if (req->new_page) +- data_growth += c->page_budget; ++ data_growth += c->bi.page_budget; + if (req->new_dent) +- data_growth += c->dent_budget; ++ data_growth += c->bi.dent_budget; + data_growth += req->new_ino_d; + return data_growth; + } +@@ -441,12 +437,12 @@ static int calc_dd_growth(const struct u + { + int dd_growth; + +- dd_growth = req->dirtied_page ? c->page_budget : 0; ++ dd_growth = req->dirtied_page ? c->bi.page_budget : 0; + + if (req->dirtied_ino) +- dd_growth += c->inode_budget << (req->dirtied_ino - 1); ++ dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1); + if (req->mod_dent) +- dd_growth += c->dent_budget; ++ dd_growth += c->bi.dent_budget; + dd_growth += req->dirtied_ino_d; + return dd_growth; + } +@@ -488,19 +484,19 @@ int ubifs_budget_space(struct ubifs_info + + again: + spin_lock(&c->space_lock); +- ubifs_assert(c->budg_idx_growth >= 0); +- ubifs_assert(c->budg_data_growth >= 0); +- ubifs_assert(c->budg_dd_growth >= 0); ++ ubifs_assert(c->bi.idx_growth >= 0); ++ ubifs_assert(c->bi.data_growth >= 0); ++ ubifs_assert(c->bi.dd_growth >= 0); + +- if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) { ++ if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) { + dbg_budg("no space"); + spin_unlock(&c->space_lock); + return -ENOSPC; + } + +- c->budg_idx_growth += idx_growth; +- c->budg_data_growth += data_growth; +- c->budg_dd_growth += dd_growth; ++ c->bi.idx_growth += idx_growth; ++ c->bi.data_growth += data_growth; ++ c->bi.dd_growth += dd_growth; + + err = do_budget_space(c); + if (likely(!err)) { +@@ -512,9 +508,9 @@ again: + } + + /* Restore the old values */ +- c->budg_idx_growth -= idx_growth; +- c->budg_data_growth -= data_growth; +- c->budg_dd_growth -= dd_growth; ++ c->bi.idx_growth -= idx_growth; ++ c->bi.data_growth -= data_growth; ++ c->bi.dd_growth -= dd_growth; + spin_unlock(&c->space_lock); + + if (req->fast) { +@@ -534,9 +530,9 @@ again: + goto again; + } + dbg_budg("FS is full, -ENOSPC"); +- c->nospace = 1; ++ c->bi.nospace = 1; + if (can_use_rp(c) || c->rp_size == 0) +- c->nospace_rp = 1; ++ c->bi.nospace_rp = 1; + smp_wmb(); + } else + ubifs_err("cannot budget space, error %d", err); +@@ -551,8 +547,8 @@ again: + * This function releases the space budgeted by 'ubifs_budget_space()'. Note, + * since the index changes (which were budgeted for in @req->idx_growth) will + * only be written to the media on commit, this function moves the index budget +- * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be +- * zeroed by the commit operation. ++ * from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed ++ * by the commit operation. + */ + void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) + { +@@ -581,23 +577,23 @@ void ubifs_release_budget(struct ubifs_i + if (!req->data_growth && !req->dd_growth) + return; + +- c->nospace = c->nospace_rp = 0; ++ c->bi.nospace = c->bi.nospace_rp = 0; + smp_wmb(); + + spin_lock(&c->space_lock); +- c->budg_idx_growth -= req->idx_growth; +- c->budg_uncommitted_idx += req->idx_growth; +- c->budg_data_growth -= req->data_growth; +- c->budg_dd_growth -= req->dd_growth; +- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); +- +- ubifs_assert(c->budg_idx_growth >= 0); +- ubifs_assert(c->budg_data_growth >= 0); +- ubifs_assert(c->budg_dd_growth >= 0); +- ubifs_assert(c->min_idx_lebs < c->main_lebs); +- ubifs_assert(!(c->budg_idx_growth & 7)); +- ubifs_assert(!(c->budg_data_growth & 7)); +- ubifs_assert(!(c->budg_dd_growth & 7)); ++ c->bi.idx_growth -= req->idx_growth; ++ c->bi.uncommitted_idx += req->idx_growth; ++ c->bi.data_growth -= req->data_growth; ++ c->bi.dd_growth -= req->dd_growth; ++ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ ++ ubifs_assert(c->bi.idx_growth >= 0); ++ ubifs_assert(c->bi.data_growth >= 0); ++ ubifs_assert(c->bi.dd_growth >= 0); ++ ubifs_assert(c->bi.min_idx_lebs < c->main_lebs); ++ ubifs_assert(!(c->bi.idx_growth & 7)); ++ ubifs_assert(!(c->bi.data_growth & 7)); ++ ubifs_assert(!(c->bi.dd_growth & 7)); + spin_unlock(&c->space_lock); + } + +@@ -606,7 +602,7 @@ void ubifs_release_budget(struct ubifs_i + * @c: UBIFS file-system description object + * + * This function converts budget which was allocated for a new page of data to +- * the budget of changing an existing page of data. The latter is smaller then ++ * the budget of changing an existing page of data. The latter is smaller than + * the former, so this function only does simple re-calculation and does not + * involve any write-back. + */ +@@ -614,13 +610,13 @@ void ubifs_convert_page_budget(struct ub + { + spin_lock(&c->space_lock); + /* Release the index growth reservation */ +- c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; ++ c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; + /* Release the data growth reservation */ +- c->budg_data_growth -= c->page_budget; ++ c->bi.data_growth -= c->bi.page_budget; + /* Increase the dirty data growth reservation instead */ +- c->budg_dd_growth += c->page_budget; ++ c->bi.dd_growth += c->bi.page_budget; + /* And re-calculate the indexing space reservation */ +- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); + spin_unlock(&c->space_lock); + } + +@@ -640,7 +636,7 @@ void ubifs_release_dirty_inode_budget(st + + memset(&req, 0, sizeof(struct ubifs_budget_req)); + /* The "no space" flags will be cleared because dd_growth is > 0 */ +- req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8); ++ req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8); + ubifs_release_budget(c, &req); + } + +@@ -696,12 +692,12 @@ long long ubifs_reported_space(const str + * This function calculates amount of free space to report to user-space. + * + * Because UBIFS may introduce substantial overhead (the index, node headers, +- * alignment, wastage at the end of eraseblocks, etc), it cannot report real +- * amount of free flash space it has (well, because not all dirty space is +- * reclaimable, UBIFS does not actually know the real amount). If UBIFS did so, +- * it would bread user expectations about what free space is. Users seem to +- * accustomed to assume that if the file-system reports N bytes of free space, +- * they would be able to fit a file of N bytes to the FS. This almost works for ++ * alignment, wastage at the end of LEBs, etc), it cannot report real amount of ++ * free flash space it has (well, because not all dirty space is reclaimable, ++ * UBIFS does not actually know the real amount). If UBIFS did so, it would ++ * bread user expectations about what free space is. Users seem to accustomed ++ * to assume that if the file-system reports N bytes of free space, they would ++ * be able to fit a file of N bytes to the FS. This almost works for + * traditional file-systems, because they have way less overhead than UBIFS. + * So, to keep users happy, UBIFS tries to take the overhead into account. + */ +@@ -710,9 +706,9 @@ long long ubifs_get_free_space_nolock(st + int rsvd_idx_lebs, lebs; + long long available, outstanding, free; + +- ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); +- outstanding = c->budg_data_growth + c->budg_dd_growth; +- available = ubifs_calc_available(c, c->min_idx_lebs); ++ ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c)); ++ outstanding = c->bi.data_growth + c->bi.dd_growth; ++ available = ubifs_calc_available(c, c->bi.min_idx_lebs); + + /* + * When reporting free space to user-space, UBIFS guarantees that it is +@@ -725,8 +721,8 @@ long long ubifs_get_free_space_nolock(st + * Note, the calculations below are similar to what we have in + * 'do_budget_space()', so refer there for comments. + */ +- if (c->min_idx_lebs > c->lst.idx_lebs) +- rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; ++ if (c->bi.min_idx_lebs > c->lst.idx_lebs) ++ rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; + else + rsvd_idx_lebs = 0; + lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - +@@ -745,7 +741,7 @@ long long ubifs_get_free_space_nolock(st + * ubifs_get_free_space - return amount of free space. + * @c: UBIFS file-system description object + * +- * This function calculates and retuns amount of free space to report to ++ * This function calculates and returns amount of free space to report to + * user-space. + */ + long long ubifs_get_free_space(struct ubifs_info *c) +diff -uprN linux-2.6.28/fs/ubifs/commit.c ubifs-v2.6.28/fs/ubifs/commit.c +--- linux-2.6.28/fs/ubifs/commit.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/commit.c 2011-06-15 14:22:09.000000000 -0400 +@@ -47,6 +47,56 @@ + #include + #include "ubifs.h" + ++/* ++ * nothing_to_commit - check if there is nothing to commit. ++ * @c: UBIFS file-system description object ++ * ++ * This is a helper function which checks if there is anything to commit. It is ++ * used as an optimization to avoid starting the commit if it is not really ++ * necessary. Indeed, the commit operation always assumes flash I/O (e.g., ++ * writing the commit start node to the log), and it is better to avoid doing ++ * this unnecessarily. E.g., 'ubifs_sync_fs()' runs the commit, but if there is ++ * nothing to commit, it is more optimal to avoid any flash I/O. ++ * ++ * This function has to be called with @c->commit_sem locked for writing - ++ * this function does not take LPT/TNC locks because the @c->commit_sem ++ * guarantees that we have exclusive access to the TNC and LPT data structures. ++ * ++ * This function returns %1 if there is nothing to commit and %0 otherwise. ++ */ ++static int nothing_to_commit(struct ubifs_info *c) ++{ ++ /* ++ * During mounting or remounting from R/O mode to R/W mode we may ++ * commit for various recovery-related reasons. ++ */ ++ if (c->mounting || c->remounting_rw) ++ return 0; ++ ++ /* ++ * If the root TNC node is dirty, we definitely have something to ++ * commit. ++ */ ++ if (c->zroot.znode && test_bit(DIRTY_ZNODE, &c->zroot.znode->flags)) ++ return 0; ++ ++ /* ++ * Even though the TNC is clean, the LPT tree may have dirty nodes. For ++ * example, this may happen if the budgeting subsystem invoked GC to ++ * make some free space, and the GC found an LEB with only dirty and ++ * free space. In this case GC would just change the lprops of this ++ * LEB (by turning all space into free space) and unmap it. ++ */ ++ if (c->nroot && test_bit(DIRTY_CNODE, &c->nroot->flags)) ++ return 0; ++ ++ ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0); ++ ubifs_assert(c->dirty_pn_cnt == 0); ++ ubifs_assert(c->dirty_nn_cnt == 0); ++ ++ return 1; ++} ++ + /** + * do_commit - commit the journal. + * @c: UBIFS file-system description object +@@ -62,11 +112,19 @@ static int do_commit(struct ubifs_info * + struct ubifs_lp_stats lst; + + dbg_cmt("start"); +- if (c->ro_media) { ++ ubifs_assert(!c->ro_media && !c->ro_mount); ++ ++ if (c->ro_error) { + err = -EROFS; + goto out_up; + } + ++ if (nothing_to_commit(c)) { ++ up_write(&c->commit_sem); ++ err = 0; ++ goto out_cancel; ++ } ++ + /* Sync all write buffers (necessary for recovery) */ + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); +@@ -123,7 +181,7 @@ static int do_commit(struct ubifs_info * + c->mst_node->root_len = cpu_to_le32(zroot.len); + c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); + c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); +- c->mst_node->index_size = cpu_to_le64(c->old_idx_sz); ++ c->mst_node->index_size = cpu_to_le64(c->bi.old_idx_sz); + c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); + c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); + c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum); +@@ -159,12 +217,12 @@ static int do_commit(struct ubifs_info * + if (err) + goto out; + ++out_cancel: + spin_lock(&c->cs_lock); + c->cmt_state = COMMIT_RESTING; + wake_up(&c->cmt_wq); + dbg_cmt("commit end"); + spin_unlock(&c->cs_lock); +- + return 0; + + out_up: +@@ -510,7 +568,7 @@ int dbg_check_old_index(struct ubifs_inf + int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt; + int first = 1, iip; + struct ubifs_debug_info *d = c->dbg; +- union ubifs_key lower_key, upper_key, l_key, u_key; ++ union ubifs_key uninitialized_var(lower_key), upper_key, l_key, u_key; + unsigned long long uninitialized_var(last_sqnum); + struct ubifs_idx_node *idx; + struct list_head list; +@@ -518,7 +576,7 @@ int dbg_check_old_index(struct ubifs_inf + size_t sz; + + if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX)) +- goto out; ++ return 0; + + INIT_LIST_HEAD(&list); + +diff -uprN linux-2.6.28/fs/ubifs/compress.c ubifs-v2.6.28/fs/ubifs/compress.c +--- linux-2.6.28/fs/ubifs/compress.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/compress.c 2011-06-15 14:22:09.000000000 -0400 +@@ -46,24 +46,11 @@ static struct ubifs_compressor lzo_compr + .name = "lzo", + .capi_name = "lzo", + }; +- +-static DEFINE_MUTEX(lzo999_mutex); +- +-static struct ubifs_compressor lzo999_compr = { +- .compr_type = UBIFS_COMPR_LZO999, +- .comp_mutex = &lzo999_mutex, +- .name = "lzo999", +- .capi_name = "lzo999", +-}; + #else + static struct ubifs_compressor lzo_compr = { + .compr_type = UBIFS_COMPR_LZO, + .name = "lzo", + }; +-static struct ubifs_compressor lzo_compr = { +- .compr_type = UBIFS_COMPR_LZO999, +- .name = "lzo999", +-}; + #endif + + #ifdef CONFIG_UBIFS_FS_ZLIB +@@ -138,9 +125,6 @@ void ubifs_compress(const void *in_buf, + if (in_len - *out_len < UBIFS_MIN_COMPRESS_DIFF) + goto no_compr; + +- if (*compr_type == UBIFS_COMPR_LZO999) +- *compr_type = UBIFS_COMPR_LZO; +- + return; + + no_compr: +@@ -245,19 +229,13 @@ int __init ubifs_compressors_init(void) + if (err) + return err; + +- err = compr_init(&lzo999_compr); +- if (err) +- goto out_lzo; +- + err = compr_init(&zlib_compr); + if (err) +- goto out_lzo999; ++ goto out_lzo; + + ubifs_compressors[UBIFS_COMPR_NONE] = &none_compr; + return 0; + +-out_lzo999: +- compr_exit(&lzo999_compr); + out_lzo: + compr_exit(&lzo_compr); + return err; +@@ -268,7 +246,6 @@ out_lzo: + */ + void ubifs_compressors_exit(void) + { +- compr_exit(&lzo999_compr); + compr_exit(&lzo_compr); + compr_exit(&zlib_compr); + } +diff -uprN linux-2.6.28/fs/ubifs/debug.c ubifs-v2.6.28/fs/ubifs/debug.c +--- linux-2.6.28/fs/ubifs/debug.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/debug.c 2011-06-15 14:22:09.000000000 -0400 +@@ -42,15 +42,12 @@ DEFINE_SPINLOCK(dbg_lock); + static char dbg_key_buf0[128]; + static char dbg_key_buf1[128]; + +-unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT; +-unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT; ++unsigned int ubifs_chk_flags; + unsigned int ubifs_tst_flags; + +-module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR); + module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); + module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); + +-MODULE_PARM_DESC(debug_msgs, "Debug message type flags"); + MODULE_PARM_DESC(debug_chks, "Debug check flags"); + MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); + +@@ -210,6 +207,20 @@ const char *dbg_cstate(int cmt_state) + } + } + ++const char *dbg_jhead(int jhead) ++{ ++ switch (jhead) { ++ case GCHD: ++ return "0 (GC)"; ++ case BASEHD: ++ return "1 (base)"; ++ case DATAHD: ++ return "2 (data)"; ++ default: ++ return "unknown journal head"; ++ } ++} ++ + static void dump_ch(const struct ubifs_ch *ch) + { + printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic)); +@@ -302,6 +313,8 @@ void dbg_dump_node(const struct ubifs_in + printk(KERN_DEBUG "\tflags %#x\n", sup_flags); + printk(KERN_DEBUG "\t big_lpt %u\n", + !!(sup_flags & UBIFS_FLG_BIGLPT)); ++ printk(KERN_DEBUG "\t space_fixup %u\n", ++ !!(sup_flags & UBIFS_FLG_SPACE_FIXUP)); + printk(KERN_DEBUG "\tmin_io_size %u\n", + le32_to_cpu(sup->min_io_size)); + printk(KERN_DEBUG "\tleb_size %u\n", +@@ -479,9 +492,9 @@ void dbg_dump_node(const struct ubifs_in + "bad or corrupted node)"); + else { + for (i = 0; i < nlen && dent->name[i]; i++) +- printk("%c", dent->name[i]); ++ printk(KERN_CONT "%c", dent->name[i]); + } +- printk("\n"); ++ printk(KERN_CONT "\n"); + + break; + } +@@ -592,7 +605,7 @@ void dbg_dump_lstats(const struct ubifs_ + spin_unlock(&dbg_lock); + } + +-void dbg_dump_budg(struct ubifs_info *c) ++void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi) + { + int i; + struct rb_node *rb; +@@ -600,31 +613,48 @@ void dbg_dump_budg(struct ubifs_info *c) + struct ubifs_gced_idx_leb *idx_gc; + long long available, outstanding, free; + +- ubifs_assert(spin_is_locked(&c->space_lock)); ++ spin_lock(&c->space_lock); + spin_lock(&dbg_lock); +- printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " +- "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, +- c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); +- printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " +- "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, +- c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth, +- c->freeable_cnt); +- printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, " +- "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs, +- c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt); ++ printk(KERN_DEBUG "(pid %d) Budgeting info: data budget sum %lld, " ++ "total budget sum %lld\n", current->pid, ++ bi->data_growth + bi->dd_growth, ++ bi->data_growth + bi->dd_growth + bi->idx_growth); ++ printk(KERN_DEBUG "\tbudg_data_growth %lld, budg_dd_growth %lld, " ++ "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth, ++ bi->idx_growth); ++ printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %llu, " ++ "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz, ++ bi->uncommitted_idx); ++ printk(KERN_DEBUG "\tpage_budget %d, inode_budget %d, dent_budget %d\n", ++ bi->page_budget, bi->inode_budget, bi->dent_budget); ++ printk(KERN_DEBUG "\tnospace %u, nospace_rp %u\n", ++ bi->nospace, bi->nospace_rp); ++ printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", ++ c->dark_wm, c->dead_wm, c->max_idx_node_sz); ++ ++ if (bi != &c->bi) ++ /* ++ * If we are dumping saved budgeting data, do not print ++ * additional information which is about the current state, not ++ * the old one which corresponded to the saved budgeting data. ++ */ ++ goto out_unlock; ++ ++ printk(KERN_DEBUG "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", ++ c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt); + printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " + "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), + atomic_long_read(&c->dirty_zn_cnt), + atomic_long_read(&c->clean_zn_cnt)); +- printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", +- c->dark_wm, c->dead_wm, c->max_idx_node_sz); + printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", + c->gc_lnum, c->ihead_lnum); ++ + /* If we are in R/O mode, journal heads do not exist */ + if (c->jheads) + for (i = 0; i < c->jhead_cnt; i++) +- printk(KERN_DEBUG "\tjhead %d\t LEB %d\n", +- c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum); ++ printk(KERN_DEBUG "\tjhead %s\t LEB %d\n", ++ dbg_jhead(c->jheads[i].wbuf.jhead), ++ c->jheads[i].wbuf.lnum); + for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { + bud = rb_entry(rb, struct ubifs_bud, rb); + printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); +@@ -637,20 +667,109 @@ void dbg_dump_budg(struct ubifs_info *c) + printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); + + /* Print budgeting predictions */ +- available = ubifs_calc_available(c, c->min_idx_lebs); +- outstanding = c->budg_data_growth + c->budg_dd_growth; ++ available = ubifs_calc_available(c, c->bi.min_idx_lebs); ++ outstanding = c->bi.data_growth + c->bi.dd_growth; + free = ubifs_get_free_space_nolock(c); + printk(KERN_DEBUG "Budgeting predictions:\n"); + printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", + available, outstanding, free); ++out_unlock: + spin_unlock(&dbg_lock); ++ spin_unlock(&c->space_lock); + } + + void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) + { +- printk(KERN_DEBUG "LEB %d lprops: free %d, dirty %d (used %d), " +- "flags %#x\n", lp->lnum, lp->free, lp->dirty, +- c->leb_size - lp->free - lp->dirty, lp->flags); ++ int i, spc, dark = 0, dead = 0; ++ struct rb_node *rb; ++ struct ubifs_bud *bud; ++ ++ spc = lp->free + lp->dirty; ++ if (spc < c->dead_wm) ++ dead = spc; ++ else ++ dark = ubifs_calc_dark(c, spc); ++ ++ if (lp->flags & LPROPS_INDEX) ++ printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " ++ "free + dirty %-8d flags %#x (", lp->lnum, lp->free, ++ lp->dirty, c->leb_size - spc, spc, lp->flags); ++ else ++ printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " ++ "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d " ++ "flags %#-4x (", lp->lnum, lp->free, lp->dirty, ++ c->leb_size - spc, spc, dark, dead, ++ (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags); ++ ++ if (lp->flags & LPROPS_TAKEN) { ++ if (lp->flags & LPROPS_INDEX) ++ printk(KERN_CONT "index, taken"); ++ else ++ printk(KERN_CONT "taken"); ++ } else { ++ const char *s; ++ ++ if (lp->flags & LPROPS_INDEX) { ++ switch (lp->flags & LPROPS_CAT_MASK) { ++ case LPROPS_DIRTY_IDX: ++ s = "dirty index"; ++ break; ++ case LPROPS_FRDI_IDX: ++ s = "freeable index"; ++ break; ++ default: ++ s = "index"; ++ } ++ } else { ++ switch (lp->flags & LPROPS_CAT_MASK) { ++ case LPROPS_UNCAT: ++ s = "not categorized"; ++ break; ++ case LPROPS_DIRTY: ++ s = "dirty"; ++ break; ++ case LPROPS_FREE: ++ s = "free"; ++ break; ++ case LPROPS_EMPTY: ++ s = "empty"; ++ break; ++ case LPROPS_FREEABLE: ++ s = "freeable"; ++ break; ++ default: ++ s = NULL; ++ break; ++ } ++ } ++ printk(KERN_CONT "%s", s); ++ } ++ ++ for (rb = rb_first((struct rb_root *)&c->buds); rb; rb = rb_next(rb)) { ++ bud = rb_entry(rb, struct ubifs_bud, rb); ++ if (bud->lnum == lp->lnum) { ++ int head = 0; ++ for (i = 0; i < c->jhead_cnt; i++) { ++ /* ++ * Note, if we are in R/O mode or in the middle ++ * of mounting/re-mounting, the write-buffers do ++ * not exist. ++ */ ++ if (c->jheads && ++ lp->lnum == c->jheads[i].wbuf.lnum) { ++ printk(KERN_CONT ", jhead %s", ++ dbg_jhead(i)); ++ head = 1; ++ } ++ } ++ if (!head) ++ printk(KERN_CONT ", bud of jhead %s", ++ dbg_jhead(bud->jhead)); ++ } ++ } ++ if (lp->lnum == c->gc_lnum) ++ printk(KERN_CONT ", GC LEB"); ++ printk(KERN_CONT ")\n"); + } + + void dbg_dump_lprops(struct ubifs_info *c) +@@ -718,16 +837,24 @@ void dbg_dump_leb(const struct ubifs_inf + { + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; ++ void *buf; + + if (dbg_failure_mode) + return; + + printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", + current->pid, lnum); +- sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); ++ ++ buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); ++ if (!buf) { ++ ubifs_err("cannot allocate memory for dumping LEB %d", lnum); ++ return; ++ } ++ ++ sleb = ubifs_scan(c, lnum, 0, buf, 0); + if (IS_ERR(sleb)) { + ubifs_err("scan error %d", (int)PTR_ERR(sleb)); +- return; ++ goto out; + } + + printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum, +@@ -743,6 +870,9 @@ void dbg_dump_leb(const struct ubifs_inf + printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", + current->pid, lnum); + ubifs_scan_destroy(sleb); ++ ++out: ++ vfree(buf); + return; + } + +@@ -869,11 +999,41 @@ void dbg_dump_index(struct ubifs_info *c + void dbg_save_space_info(struct ubifs_info *c) + { + struct ubifs_debug_info *d = c->dbg; +- +- ubifs_get_lp_stats(c, &d->saved_lst); ++ int freeable_cnt; + + spin_lock(&c->space_lock); ++ memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats)); ++ memcpy(&d->saved_bi, &c->bi, sizeof(struct ubifs_budg_info)); ++ d->saved_idx_gc_cnt = c->idx_gc_cnt; ++ ++ /* ++ * We use a dirty hack here and zero out @c->freeable_cnt, because it ++ * affects the free space calculations, and UBIFS might not know about ++ * all freeable eraseblocks. Indeed, we know about freeable eraseblocks ++ * only when we read their lprops, and we do this only lazily, upon the ++ * need. So at any given point of time @c->freeable_cnt might be not ++ * exactly accurate. ++ * ++ * Just one example about the issue we hit when we did not zero ++ * @c->freeable_cnt. ++ * 1. The file-system is mounted R/O, c->freeable_cnt is %0. We save the ++ * amount of free space in @d->saved_free ++ * 2. We re-mount R/W, which makes UBIFS to read the "lsave" ++ * information from flash, where we cache LEBs from various ++ * categories ('ubifs_remount_fs()' -> 'ubifs_lpt_init()' ++ * -> 'lpt_init_wr()' -> 'read_lsave()' -> 'ubifs_lpt_lookup()' ++ * -> 'ubifs_get_pnode()' -> 'update_cats()' ++ * -> 'ubifs_add_to_cat()'). ++ * 3. Lsave contains a freeable eraseblock, and @c->freeable_cnt ++ * becomes %1. ++ * 4. We calculate the amount of free space when the re-mount is ++ * finished in 'dbg_check_space_info()' and it does not match ++ * @d->saved_free. ++ */ ++ freeable_cnt = c->freeable_cnt; ++ c->freeable_cnt = 0; + d->saved_free = ubifs_get_free_space_nolock(c); ++ c->freeable_cnt = freeable_cnt; + spin_unlock(&c->space_lock); + } + +@@ -890,12 +1050,15 @@ int dbg_check_space_info(struct ubifs_in + { + struct ubifs_debug_info *d = c->dbg; + struct ubifs_lp_stats lst; +- long long avail, free; ++ long long free; ++ int freeable_cnt; + + spin_lock(&c->space_lock); +- avail = ubifs_calc_available(c, c->min_idx_lebs); ++ freeable_cnt = c->freeable_cnt; ++ c->freeable_cnt = 0; ++ free = ubifs_get_free_space_nolock(c); ++ c->freeable_cnt = freeable_cnt; + spin_unlock(&c->space_lock); +- free = ubifs_get_free_space(c); + + if (free != d->saved_free) { + ubifs_err("free space changed from %lld to %lld", +@@ -908,12 +1071,14 @@ int dbg_check_space_info(struct ubifs_in + out: + ubifs_msg("saved lprops statistics dump"); + dbg_dump_lstats(&d->saved_lst); +- ubifs_get_lp_stats(c, &lst); ++ ubifs_msg("saved budgeting info dump"); ++ dbg_dump_budg(c, &d->saved_bi); ++ ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt); + ubifs_msg("current lprops statistics dump"); +- dbg_dump_lstats(&d->saved_lst); +- spin_lock(&c->space_lock); +- dbg_dump_budg(c); +- spin_unlock(&c->space_lock); ++ ubifs_get_lp_stats(c, &lst); ++ dbg_dump_lstats(&lst); ++ ubifs_msg("current budgeting info dump"); ++ dbg_dump_budg(c, &c->bi); + dump_stack(); + return -EINVAL; + } +@@ -1214,7 +1379,7 @@ static int dbg_check_znode(struct ubifs_ + + /* + * Make sure the last key in our znode is less or +- * equivalent than the the key in zbranch which goes ++ * equivalent than the key in the zbranch which goes + * after our pointing zbranch. + */ + cmp = keys_cmp(c, max, +@@ -1657,6 +1822,8 @@ static struct fsck_inode *add_inode(stru + struct rb_node **p, *parent = NULL; + struct fsck_inode *fscki; + ino_t inum = key_inum_flash(c, &ino->key); ++ struct inode *inode; ++ struct ubifs_inode *ui; + + p = &fsckd->inodes.rb_node; + while (*p) { +@@ -1680,19 +1847,46 @@ static struct fsck_inode *add_inode(stru + if (!fscki) + return ERR_PTR(-ENOMEM); + ++ inode = ilookup(c->vfs_sb, inum); ++ + fscki->inum = inum; +- fscki->nlink = le32_to_cpu(ino->nlink); +- fscki->size = le64_to_cpu(ino->size); +- fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); +- fscki->xattr_sz = le32_to_cpu(ino->xattr_size); +- fscki->xattr_nms = le32_to_cpu(ino->xattr_names); +- fscki->mode = le32_to_cpu(ino->mode); ++ /* ++ * If the inode is present in the VFS inode cache, use it instead of ++ * the on-flash inode which might be out-of-date. E.g., the size might ++ * be out-of-date. If we do not do this, the following may happen, for ++ * example: ++ * 1. A power cut happens ++ * 2. We mount the file-system R/O, the replay process fixes up the ++ * inode size in the VFS cache, but on on-flash. ++ * 3. 'check_leaf()' fails because it hits a data node beyond inode ++ * size. ++ */ ++ if (!inode) { ++ fscki->nlink = le32_to_cpu(ino->nlink); ++ fscki->size = le64_to_cpu(ino->size); ++ fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); ++ fscki->xattr_sz = le32_to_cpu(ino->xattr_size); ++ fscki->xattr_nms = le32_to_cpu(ino->xattr_names); ++ fscki->mode = le32_to_cpu(ino->mode); ++ } else { ++ ui = ubifs_inode(inode); ++ fscki->nlink = inode->i_nlink; ++ fscki->size = inode->i_size; ++ fscki->xattr_cnt = ui->xattr_cnt; ++ fscki->xattr_sz = ui->xattr_size; ++ fscki->xattr_nms = ui->xattr_names; ++ fscki->mode = inode->i_mode; ++ iput(inode); ++ } ++ + if (S_ISDIR(fscki->mode)) { + fscki->calc_sz = UBIFS_INO_NODE_SZ; + fscki->calc_cnt = 2; + } ++ + rb_link_node(&fscki->rb, parent, p); + rb_insert_color(&fscki->rb, &fsckd->inodes); ++ + return fscki; + } + +@@ -1916,7 +2110,7 @@ static int check_leaf(struct ubifs_info + inum = key_inum_flash(c, &dent->key); + fscki1 = read_add_inode(c, priv, inum); + if (IS_ERR(fscki1)) { +- err = PTR_ERR(fscki); ++ err = PTR_ERR(fscki1); + ubifs_err("error %d while processing entry node and " + "trying to find parent inode node %lu", + err, (unsigned long)inum); +@@ -2145,14 +2339,169 @@ out_free: + return err; + } + +-static int invocation_cnt; ++/** ++ * dbg_check_data_nodes_order - check that list of data nodes is sorted. ++ * @c: UBIFS file-system description object ++ * @head: the list of nodes ('struct ubifs_scan_node' objects) ++ * ++ * This function returns zero if the list of data nodes is sorted correctly, ++ * and %-EINVAL if not. ++ */ ++int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) ++{ ++ struct list_head *cur; ++ struct ubifs_scan_node *sa, *sb; ++ ++ if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ return 0; ++ ++ for (cur = head->next; cur->next != head; cur = cur->next) { ++ ino_t inuma, inumb; ++ uint32_t blka, blkb; ++ ++ cond_resched(); ++ sa = container_of(cur, struct ubifs_scan_node, list); ++ sb = container_of(cur->next, struct ubifs_scan_node, list); ++ ++ if (sa->type != UBIFS_DATA_NODE) { ++ ubifs_err("bad node type %d", sa->type); ++ dbg_dump_node(c, sa->node); ++ return -EINVAL; ++ } ++ if (sb->type != UBIFS_DATA_NODE) { ++ ubifs_err("bad node type %d", sb->type); ++ dbg_dump_node(c, sb->node); ++ return -EINVAL; ++ } ++ ++ inuma = key_inum(c, &sa->key); ++ inumb = key_inum(c, &sb->key); ++ ++ if (inuma < inumb) ++ continue; ++ if (inuma > inumb) { ++ ubifs_err("larger inum %lu goes before inum %lu", ++ (unsigned long)inuma, (unsigned long)inumb); ++ goto error_dump; ++ } ++ ++ blka = key_block(c, &sa->key); ++ blkb = key_block(c, &sb->key); ++ ++ if (blka > blkb) { ++ ubifs_err("larger block %u goes before %u", blka, blkb); ++ goto error_dump; ++ } ++ if (blka == blkb) { ++ ubifs_err("two data nodes for the same block"); ++ goto error_dump; ++ } ++ } ++ ++ return 0; ++ ++error_dump: ++ dbg_dump_node(c, sa->node); ++ dbg_dump_node(c, sb->node); ++ return -EINVAL; ++} ++ ++/** ++ * dbg_check_nondata_nodes_order - check that list of data nodes is sorted. ++ * @c: UBIFS file-system description object ++ * @head: the list of nodes ('struct ubifs_scan_node' objects) ++ * ++ * This function returns zero if the list of non-data nodes is sorted correctly, ++ * and %-EINVAL if not. ++ */ ++int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) ++{ ++ struct list_head *cur; ++ struct ubifs_scan_node *sa, *sb; ++ ++ if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ return 0; ++ ++ for (cur = head->next; cur->next != head; cur = cur->next) { ++ ino_t inuma, inumb; ++ uint32_t hasha, hashb; ++ ++ cond_resched(); ++ sa = container_of(cur, struct ubifs_scan_node, list); ++ sb = container_of(cur->next, struct ubifs_scan_node, list); ++ ++ if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE && ++ sa->type != UBIFS_XENT_NODE) { ++ ubifs_err("bad node type %d", sa->type); ++ dbg_dump_node(c, sa->node); ++ return -EINVAL; ++ } ++ if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE && ++ sa->type != UBIFS_XENT_NODE) { ++ ubifs_err("bad node type %d", sb->type); ++ dbg_dump_node(c, sb->node); ++ return -EINVAL; ++ } ++ ++ if (sa->type != UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) { ++ ubifs_err("non-inode node goes before inode node"); ++ goto error_dump; ++ } ++ ++ if (sa->type == UBIFS_INO_NODE && sb->type != UBIFS_INO_NODE) ++ continue; ++ ++ if (sa->type == UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) { ++ /* Inode nodes are sorted in descending size order */ ++ if (sa->len < sb->len) { ++ ubifs_err("smaller inode node goes first"); ++ goto error_dump; ++ } ++ continue; ++ } ++ ++ /* ++ * This is either a dentry or xentry, which should be sorted in ++ * ascending (parent ino, hash) order. ++ */ ++ inuma = key_inum(c, &sa->key); ++ inumb = key_inum(c, &sb->key); ++ ++ if (inuma < inumb) ++ continue; ++ if (inuma > inumb) { ++ ubifs_err("larger inum %lu goes before inum %lu", ++ (unsigned long)inuma, (unsigned long)inumb); ++ goto error_dump; ++ } ++ ++ hasha = key_block(c, &sa->key); ++ hashb = key_block(c, &sb->key); ++ ++ if (hasha > hashb) { ++ ubifs_err("larger hash %u goes before %u", ++ hasha, hashb); ++ goto error_dump; ++ } ++ } ++ ++ return 0; ++ ++error_dump: ++ ubifs_msg("dumping first node"); ++ dbg_dump_node(c, sa->node); ++ ubifs_msg("dumping second node"); ++ dbg_dump_node(c, sb->node); ++ return -EINVAL; ++ return 0; ++} + + int dbg_force_in_the_gaps(void) + { +- if (!dbg_force_in_the_gaps_enabled) ++ if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + return 0; +- /* Force in-the-gaps every 8th commit */ +- return !((invocation_cnt++) & 0x7); ++ ++ return !(random32() & 7); + } + + /* Failure mode for recovery testing */ +@@ -2340,7 +2689,7 @@ int dbg_leb_read(struct ubi_volume_desc + int len, int check) + { + if (in_failure_mode(desc)) +- return -EIO; ++ return -EROFS; + return ubi_leb_read(desc, lnum, buf, offset, len, check); + } + +@@ -2350,7 +2699,7 @@ int dbg_leb_write(struct ubi_volume_desc + int err, failing; + + if (in_failure_mode(desc)) +- return -EIO; ++ return -EROFS; + failing = do_fail(desc, lnum, 1); + if (failing) + cut_data(buf, len); +@@ -2358,7 +2707,7 @@ int dbg_leb_write(struct ubi_volume_desc + if (err) + return err; + if (failing) +- return -EIO; ++ return -EROFS; + return 0; + } + +@@ -2368,12 +2717,12 @@ int dbg_leb_change(struct ubi_volume_des + int err; + + if (do_fail(desc, lnum, 1)) +- return -EIO; ++ return -EROFS; + err = ubi_leb_change(desc, lnum, buf, len, dtype); + if (err) + return err; + if (do_fail(desc, lnum, 1)) +- return -EIO; ++ return -EROFS; + return 0; + } + +@@ -2382,12 +2731,12 @@ int dbg_leb_erase(struct ubi_volume_desc + int err; + + if (do_fail(desc, lnum, 0)) +- return -EIO; ++ return -EROFS; + err = ubi_leb_erase(desc, lnum); + if (err) + return err; + if (do_fail(desc, lnum, 0)) +- return -EIO; ++ return -EROFS; + return 0; + } + +@@ -2396,19 +2745,19 @@ int dbg_leb_unmap(struct ubi_volume_desc + int err; + + if (do_fail(desc, lnum, 0)) +- return -EIO; ++ return -EROFS; + err = ubi_leb_unmap(desc, lnum); + if (err) + return err; + if (do_fail(desc, lnum, 0)) +- return -EIO; ++ return -EROFS; + return 0; + } + + int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) + { + if (in_failure_mode(desc)) +- return -EIO; ++ return -EROFS; + return ubi_is_mapped(desc, lnum); + } + +@@ -2417,12 +2766,12 @@ int dbg_leb_map(struct ubi_volume_desc * + int err; + + if (do_fail(desc, lnum, 0)) +- return -EIO; ++ return -EROFS; + err = ubi_leb_map(desc, lnum, dtype); + if (err) + return err; + if (do_fail(desc, lnum, 0)) +- return -EIO; ++ return -EROFS; + return 0; + } + +@@ -2440,16 +2789,8 @@ int ubifs_debugging_init(struct ubifs_in + if (!c->dbg) + return -ENOMEM; + +- c->dbg->buf = vmalloc(c->leb_size); +- if (!c->dbg->buf) +- goto out; +- + failure_mode_init(c); + return 0; +- +-out: +- kfree(c->dbg); +- return -ENOMEM; + } + + /** +@@ -2459,7 +2800,6 @@ out: + void ubifs_debugging_exit(struct ubifs_info *c) + { + failure_mode_exit(c); +- vfree(c->dbg->buf); + kfree(c->dbg); + } + +@@ -2501,7 +2841,7 @@ void dbg_debugfs_exit(void) + static int open_debugfs_file(struct inode *inode, struct file *file) + { + file->private_data = inode->i_private; +- return 0; ++ return nonseekable_open(inode, file); + } + + static ssize_t write_debugfs_file(struct file *file, const char __user *buf, +@@ -2512,18 +2852,15 @@ static ssize_t write_debugfs_file(struct + + if (file->f_path.dentry == d->dfs_dump_lprops) + dbg_dump_lprops(c); +- else if (file->f_path.dentry == d->dfs_dump_budg) { +- spin_lock(&c->space_lock); +- dbg_dump_budg(c); +- spin_unlock(&c->space_lock); +- } else if (file->f_path.dentry == d->dfs_dump_tnc) { ++ else if (file->f_path.dentry == d->dfs_dump_budg) ++ dbg_dump_budg(c, &c->bi); ++ else if (file->f_path.dentry == d->dfs_dump_tnc) { + mutex_lock(&c->tnc_mutex); + dbg_dump_tnc(c); + mutex_unlock(&c->tnc_mutex); + } else + return -EINVAL; + +- *ppos += count; + return count; + } + +@@ -2531,6 +2868,7 @@ static const struct file_operations dfs_ + .open = open_debugfs_file, + .write = write_debugfs_file, + .owner = THIS_MODULE, ++ .llseek = no_llseek, + }; + + /** +@@ -2553,40 +2891,38 @@ int dbg_debugfs_init_fs(struct ubifs_inf + struct ubifs_debug_info *d = c->dbg; + + sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); +- d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir); +- if (IS_ERR(d->dfs_dir)) { +- err = PTR_ERR(d->dfs_dir); +- ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", +- d->dfs_dir_name, err); ++ fname = d->dfs_dir_name; ++ dent = debugfs_create_dir(fname, dfs_rootdir); ++ if (!dent || IS_ERR(dent)) + goto out; +- } ++ d->dfs_dir = dent; + + fname = "dump_lprops"; +- dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); +- if (IS_ERR(dent)) ++ dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); ++ if (!dent || IS_ERR(dent)) + goto out_remove; + d->dfs_dump_lprops = dent; + + fname = "dump_budg"; +- dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); +- if (IS_ERR(dent)) ++ dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); ++ if (!dent || IS_ERR(dent)) + goto out_remove; + d->dfs_dump_budg = dent; + + fname = "dump_tnc"; +- dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); +- if (IS_ERR(dent)) ++ dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops); ++ if (!dent || IS_ERR(dent)) + goto out_remove; + d->dfs_dump_tnc = dent; + + return 0; + + out_remove: +- err = PTR_ERR(dent); +- ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", +- fname, err); + debugfs_remove_recursive(d->dfs_dir); + out: ++ err = dent ? PTR_ERR(dent) : -ENODEV; ++ ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", ++ fname, err); + return err; + } + +diff -uprN linux-2.6.28/fs/ubifs/debug.h ubifs-v2.6.28/fs/ubifs/debug.h +--- linux-2.6.28/fs/ubifs/debug.h 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/debug.h 2011-06-15 14:22:09.000000000 -0400 +@@ -23,11 +23,18 @@ + #ifndef __UBIFS_DEBUG_H__ + #define __UBIFS_DEBUG_H__ + ++/* Checking helper functions */ ++typedef int (*dbg_leaf_callback)(struct ubifs_info *c, ++ struct ubifs_zbranch *zbr, void *priv); ++typedef int (*dbg_znode_callback)(struct ubifs_info *c, ++ struct ubifs_znode *znode, void *priv); ++ + #ifdef CONFIG_UBIFS_FS_DEBUG + ++#include ++ + /** + * ubifs_debug_info - per-FS debugging information. +- * @buf: a buffer of LEB size, used for various purposes + * @old_zroot: old index root - used by 'dbg_check_old_index()' + * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' + * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' +@@ -45,16 +52,17 @@ + * @new_ihead_offs: used by debugging to check @c->ihead_offs + * + * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') +- * @saved_free: saved free space (used by 'dbg_save_space_info()') ++ * @saved_bi: saved budgeting information ++ * @saved_free: saved amount of free space ++ * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt + * +- * dfs_dir_name: name of debugfs directory containing this file-system's files +- * dfs_dir: direntry object of the file-system debugfs directory +- * dfs_dump_lprops: "dump lprops" debugfs knob +- * dfs_dump_budg: "dump budgeting information" debugfs knob +- * dfs_dump_tnc: "dump TNC" debugfs knob ++ * @dfs_dir_name: name of debugfs directory containing this file-system's files ++ * @dfs_dir: direntry object of the file-system debugfs directory ++ * @dfs_dump_lprops: "dump lprops" debugfs knob ++ * @dfs_dump_budg: "dump budgeting information" debugfs knob ++ * @dfs_dump_tnc: "dump TNC" debugfs knob + */ + struct ubifs_debug_info { +- void *buf; + struct ubifs_zbranch old_zroot; + int old_zroot_level; + unsigned long long old_zroot_sqnum; +@@ -72,7 +80,9 @@ struct ubifs_debug_info { + int new_ihead_offs; + + struct ubifs_lp_stats saved_lst; ++ struct ubifs_budg_info saved_bi; + long long saved_free; ++ int saved_idx_gc_cnt; + + char dfs_dir_name[100]; + struct dentry *dfs_dir; +@@ -97,23 +107,7 @@ struct ubifs_debug_info { + } \ + } while (0) + +-#define dbg_dump_stack() do { \ +- if (!dbg_failure_mode) \ +- dump_stack(); \ +-} while (0) +- +-/* Generic debugging messages */ +-#define dbg_msg(fmt, ...) do { \ +- spin_lock(&dbg_lock); \ +- printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ +- __func__, ##__VA_ARGS__); \ +- spin_unlock(&dbg_lock); \ +-} while (0) +- +-#define dbg_do_msg(typ, fmt, ...) do { \ +- if (ubifs_msg_flags & typ) \ +- dbg_msg(fmt, ##__VA_ARGS__); \ +-} while (0) ++#define dbg_dump_stack() dump_stack() + + #define dbg_err(fmt, ...) do { \ + spin_lock(&dbg_lock); \ +@@ -133,86 +127,43 @@ const char *dbg_key_str1(const struct ub + #define DBGKEY(key) dbg_key_str0(c, (key)) + #define DBGKEY1(key) dbg_key_str1(c, (key)) + +-/* General messages */ +-#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) ++#define ubifs_dbg_msg(type, fmt, ...) do { \ ++ spin_lock(&dbg_lock); \ ++ pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \ ++ spin_unlock(&dbg_lock); \ ++} while (0) + ++/* Just a debugging messages not related to any specific UBIFS subsystem */ ++#define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__) ++/* General messages */ ++#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__) + /* Additional journal messages */ +-#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) +- ++#define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__) + /* Additional TNC messages */ +-#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) +- ++#define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__) + /* Additional lprops messages */ +-#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) +- ++#define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__) + /* Additional LEB find messages */ +-#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) +- ++#define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__) + /* Additional mount messages */ +-#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) +- ++#define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__) + /* Additional I/O messages */ +-#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) +- ++#define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__) + /* Additional commit messages */ +-#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) +- ++#define dbg_cmt(fmt, ...) ubifs_dbg_msg("cmt", fmt, ##__VA_ARGS__) + /* Additional budgeting messages */ +-#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) +- ++#define dbg_budg(fmt, ...) ubifs_dbg_msg("budg", fmt, ##__VA_ARGS__) + /* Additional log messages */ +-#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) +- ++#define dbg_log(fmt, ...) ubifs_dbg_msg("log", fmt, ##__VA_ARGS__) + /* Additional gc messages */ +-#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) +- ++#define dbg_gc(fmt, ...) ubifs_dbg_msg("gc", fmt, ##__VA_ARGS__) + /* Additional scan messages */ +-#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) +- ++#define dbg_scan(fmt, ...) ubifs_dbg_msg("scan", fmt, ##__VA_ARGS__) + /* Additional recovery messages */ +-#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) ++#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) + + /* +- * Debugging message type flags (must match msg_type_names in debug.c). +- * +- * UBIFS_MSG_GEN: general messages +- * UBIFS_MSG_JNL: journal messages +- * UBIFS_MSG_MNT: mount messages +- * UBIFS_MSG_CMT: commit messages +- * UBIFS_MSG_FIND: LEB find messages +- * UBIFS_MSG_BUDG: budgeting messages +- * UBIFS_MSG_GC: garbage collection messages +- * UBIFS_MSG_TNC: TNC messages +- * UBIFS_MSG_LP: lprops messages +- * UBIFS_MSG_IO: I/O messages +- * UBIFS_MSG_LOG: log messages +- * UBIFS_MSG_SCAN: scan messages +- * UBIFS_MSG_RCVRY: recovery messages +- */ +-enum { +- UBIFS_MSG_GEN = 0x1, +- UBIFS_MSG_JNL = 0x2, +- UBIFS_MSG_MNT = 0x4, +- UBIFS_MSG_CMT = 0x8, +- UBIFS_MSG_FIND = 0x10, +- UBIFS_MSG_BUDG = 0x20, +- UBIFS_MSG_GC = 0x40, +- UBIFS_MSG_TNC = 0x80, +- UBIFS_MSG_LP = 0x100, +- UBIFS_MSG_IO = 0x200, +- UBIFS_MSG_LOG = 0x400, +- UBIFS_MSG_SCAN = 0x800, +- UBIFS_MSG_RCVRY = 0x1000, +-}; +- +-/* Debugging message type flags for each default debug message level */ +-#define UBIFS_MSG_LVL_0 0 +-#define UBIFS_MSG_LVL_1 0x1 +-#define UBIFS_MSG_LVL_2 0x7f +-#define UBIFS_MSG_LVL_3 0xffff +- +-/* +- * Debugging check flags (must match chk_names in debug.c). ++ * Debugging check flags. + * + * UBIFS_CHK_GEN: general checks + * UBIFS_CHK_TNC: check TNC +@@ -233,32 +184,14 @@ enum { + }; + + /* +- * Special testing flags (must match tst_names in debug.c). ++ * Special testing flags. + * +- * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method + * UBIFS_TST_RCVRY: failure mode for recovery testing + */ + enum { +- UBIFS_TST_FORCE_IN_THE_GAPS = 0x2, + UBIFS_TST_RCVRY = 0x4, + }; + +-#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1 +-#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1 +-#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2 +-#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2 +-#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3 +-#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3 +-#else +-#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0 +-#endif +- +-#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS +-#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff +-#else +-#define UBIFS_CHK_FLAGS_DEFAULT 0 +-#endif +- + extern spinlock_t dbg_lock; + + extern unsigned int ubifs_msg_flags; +@@ -271,6 +204,7 @@ void ubifs_debugging_exit(struct ubifs_i + /* Dump functions */ + const char *dbg_ntype(int type); + const char *dbg_cstate(int cmt_state); ++const char *dbg_jhead(int jhead); + const char *dbg_get_key_dump(const struct ubifs_info *c, + const union ubifs_key *key); + void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); +@@ -279,7 +213,7 @@ void dbg_dump_lpt_node(const struct ubif + int offs); + void dbg_dump_budget_req(const struct ubifs_budget_req *req); + void dbg_dump_lstats(const struct ubifs_lp_stats *lst); +-void dbg_dump_budg(struct ubifs_info *c); ++void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi); + void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); + void dbg_dump_lprops(struct ubifs_info *c); + void dbg_dump_lpt_info(struct ubifs_info *c); +@@ -293,11 +227,6 @@ void dbg_dump_tnc(struct ubifs_info *c); + void dbg_dump_index(struct ubifs_info *c); + void dbg_dump_lpt_lebs(const struct ubifs_info *c); + +-/* Checking helper functions */ +-typedef int (*dbg_leaf_callback)(struct ubifs_info *c, +- struct ubifs_zbranch *zbr, void *priv); +-typedef int (*dbg_znode_callback)(struct ubifs_info *c, +- struct ubifs_znode *znode, void *priv); + int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, + dbg_znode_callback znode_cb, void *priv); + +@@ -318,23 +247,24 @@ int dbg_check_idx_size(struct ubifs_info + int dbg_check_filesystem(struct ubifs_info *c); + void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, + int add_pos); +-int dbg_check_lprops(struct ubifs_info *c); + int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, + int row, int col); ++int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, ++ loff_t size); ++int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head); ++int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); + + /* Force the use of in-the-gaps method for testing */ +- +-#define dbg_force_in_the_gaps_enabled \ +- (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS) +- ++static inline int dbg_force_in_the_gaps_enabled(void) ++{ ++ return ubifs_chk_flags & UBIFS_CHK_GEN; ++} + int dbg_force_in_the_gaps(void); + + /* Failure mode for recovery testing */ +- + #define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) + + #ifndef UBIFS_DBG_PRESERVE_UBI +- + #define ubi_leb_read dbg_leb_read + #define ubi_leb_write dbg_leb_write + #define ubi_leb_change dbg_leb_change +@@ -342,7 +272,6 @@ int dbg_force_in_the_gaps(void); + #define ubi_leb_unmap dbg_leb_unmap + #define ubi_is_mapped dbg_is_mapped + #define ubi_leb_map dbg_leb_map +- + #endif + + int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, +@@ -389,85 +318,127 @@ void dbg_debugfs_exit_fs(struct ubifs_in + __func__, __LINE__, current->pid); \ + } while (0) + +-#define dbg_err(fmt, ...) do { \ +- if (0) \ +- ubifs_err(fmt, ##__VA_ARGS__); \ ++#define dbg_err(fmt, ...) do { \ ++ if (0) \ ++ ubifs_err(fmt, ##__VA_ARGS__); \ + } while (0) + +-#define dbg_msg(fmt, ...) do { \ +- if (0) \ +- printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \ +- current->pid, __func__, ##__VA_ARGS__); \ ++#define ubifs_dbg_msg(fmt, ...) do { \ ++ if (0) \ ++ pr_debug(fmt "\n", ##__VA_ARGS__); \ + } while (0) + + #define dbg_dump_stack() + #define ubifs_assert_cmt_locked(c) + +-#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +-#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_msg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_gen(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_jnl(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_tnc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_lp(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_find(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_mnt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_io(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_cmt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_budg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_log(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_gc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) ++#define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) + + #define DBGKEY(key) ((char *)(key)) + #define DBGKEY1(key) ((char *)(key)) + +-#define ubifs_debugging_init(c) 0 +-#define ubifs_debugging_exit(c) ({}) +- +-#define dbg_ntype(type) "" +-#define dbg_cstate(cmt_state) "" +-#define dbg_get_key_dump(c, key) ({}) +-#define dbg_dump_inode(c, inode) ({}) +-#define dbg_dump_node(c, node) ({}) +-#define dbg_dump_lpt_node(c, node, lnum, offs) ({}) +-#define dbg_dump_budget_req(req) ({}) +-#define dbg_dump_lstats(lst) ({}) +-#define dbg_dump_budg(c) ({}) +-#define dbg_dump_lprop(c, lp) ({}) +-#define dbg_dump_lprops(c) ({}) +-#define dbg_dump_lpt_info(c) ({}) +-#define dbg_dump_leb(c, lnum) ({}) +-#define dbg_dump_znode(c, znode) ({}) +-#define dbg_dump_heap(c, heap, cat) ({}) +-#define dbg_dump_pnode(c, pnode, parent, iip) ({}) +-#define dbg_dump_tnc(c) ({}) +-#define dbg_dump_index(c) ({}) +-#define dbg_dump_lpt_lebs(c) ({}) +- +-#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 +-#define dbg_old_index_check_init(c, zroot) 0 +-#define dbg_save_space_info(c) ({}) +-#define dbg_check_space_info(c) 0 +-#define dbg_check_old_index(c, zroot) 0 +-#define dbg_check_cats(c) 0 +-#define dbg_check_ltab(c) 0 +-#define dbg_chk_lpt_free_spc(c) 0 +-#define dbg_chk_lpt_sz(c, action, len) 0 +-#define dbg_check_synced_i_size(inode) 0 +-#define dbg_check_dir_size(c, dir) 0 +-#define dbg_check_tnc(c, x) 0 +-#define dbg_check_idx_size(c, idx_size) 0 +-#define dbg_check_filesystem(c) 0 +-#define dbg_check_heap(c, heap, cat, add_pos) ({}) +-#define dbg_check_lprops(c) 0 +-#define dbg_check_lpt_nodes(c, cnode, row, col) 0 +-#define dbg_force_in_the_gaps_enabled 0 +-#define dbg_force_in_the_gaps() 0 +-#define dbg_failure_mode 0 +- +-#define dbg_debugfs_init() 0 +-#define dbg_debugfs_exit() +-#define dbg_debugfs_init_fs(c) 0 +-#define dbg_debugfs_exit_fs(c) 0 ++static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; } ++static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; } ++static inline const char *dbg_ntype(int type) { return ""; } ++static inline const char *dbg_cstate(int cmt_state) { return ""; } ++static inline const char *dbg_jhead(int jhead) { return ""; } ++static inline const char * ++dbg_get_key_dump(const struct ubifs_info *c, ++ const union ubifs_key *key) { return ""; } ++static inline void dbg_dump_inode(const struct ubifs_info *c, ++ const struct inode *inode) { return; } ++static inline void dbg_dump_node(const struct ubifs_info *c, ++ const void *node) { return; } ++static inline void dbg_dump_lpt_node(const struct ubifs_info *c, ++ void *node, int lnum, ++ int offs) { return; } ++static inline void ++dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; } ++static inline void ++dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; } ++static inline void ++dbg_dump_budg(struct ubifs_info *c, ++ const struct ubifs_budg_info *bi) { return; } ++static inline void dbg_dump_lprop(const struct ubifs_info *c, ++ const struct ubifs_lprops *lp) { return; } ++static inline void dbg_dump_lprops(struct ubifs_info *c) { return; } ++static inline void dbg_dump_lpt_info(struct ubifs_info *c) { return; } ++static inline void dbg_dump_leb(const struct ubifs_info *c, ++ int lnum) { return; } ++static inline void ++dbg_dump_znode(const struct ubifs_info *c, ++ const struct ubifs_znode *znode) { return; } ++static inline void dbg_dump_heap(struct ubifs_info *c, ++ struct ubifs_lpt_heap *heap, ++ int cat) { return; } ++static inline void dbg_dump_pnode(struct ubifs_info *c, ++ struct ubifs_pnode *pnode, ++ struct ubifs_nnode *parent, ++ int iip) { return; } ++static inline void dbg_dump_tnc(struct ubifs_info *c) { return; } ++static inline void dbg_dump_index(struct ubifs_info *c) { return; } ++static inline void dbg_dump_lpt_lebs(const struct ubifs_info *c) { return; } ++ ++static inline int dbg_walk_index(struct ubifs_info *c, ++ dbg_leaf_callback leaf_cb, ++ dbg_znode_callback znode_cb, ++ void *priv) { return 0; } ++static inline void dbg_save_space_info(struct ubifs_info *c) { return; } ++static inline int dbg_check_space_info(struct ubifs_info *c) { return 0; } ++static inline int dbg_check_lprops(struct ubifs_info *c) { return 0; } ++static inline int ++dbg_old_index_check_init(struct ubifs_info *c, ++ struct ubifs_zbranch *zroot) { return 0; } ++static inline int ++dbg_check_old_index(struct ubifs_info *c, ++ struct ubifs_zbranch *zroot) { return 0; } ++static inline int dbg_check_cats(struct ubifs_info *c) { return 0; } ++static inline int dbg_check_ltab(struct ubifs_info *c) { return 0; } ++static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c) { return 0; } ++static inline int dbg_chk_lpt_sz(struct ubifs_info *c, ++ int action, int len) { return 0; } ++static inline int dbg_check_synced_i_size(struct inode *inode) { return 0; } ++static inline int dbg_check_dir_size(struct ubifs_info *c, ++ const struct inode *dir) { return 0; } ++static inline int dbg_check_tnc(struct ubifs_info *c, int extra) { return 0; } ++static inline int dbg_check_idx_size(struct ubifs_info *c, ++ long long idx_size) { return 0; } ++static inline int dbg_check_filesystem(struct ubifs_info *c) { return 0; } ++static inline void dbg_check_heap(struct ubifs_info *c, ++ struct ubifs_lpt_heap *heap, ++ int cat, int add_pos) { return; } ++static inline int dbg_check_lpt_nodes(struct ubifs_info *c, ++ struct ubifs_cnode *cnode, int row, int col) { return 0; } ++static inline int dbg_check_inode_size(struct ubifs_info *c, ++ const struct inode *inode, ++ loff_t size) { return 0; } ++static inline int ++dbg_check_data_nodes_order(struct ubifs_info *c, ++ struct list_head *head) { return 0; } ++static inline int ++dbg_check_nondata_nodes_order(struct ubifs_info *c, ++ struct list_head *head) { return 0; } ++ ++static inline int dbg_force_in_the_gaps(void) { return 0; } ++#define dbg_force_in_the_gaps_enabled() 0 ++#define dbg_failure_mode 0 ++ ++static inline int dbg_debugfs_init(void) { return 0; } ++static inline void dbg_debugfs_exit(void) { return; } ++static inline int dbg_debugfs_init_fs(struct ubifs_info *c) { return 0; } ++static inline int dbg_debugfs_exit_fs(struct ubifs_info *c) { return 0; } + + #endif /* !CONFIG_UBIFS_FS_DEBUG */ + #endif /* !__UBIFS_DEBUG_H__ */ +diff -uprN linux-2.6.28/fs/ubifs/dir.c ubifs-v2.6.28/fs/ubifs/dir.c +--- linux-2.6.28/fs/ubifs/dir.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/dir.c 2011-06-15 14:22:09.000000000 -0400 +@@ -104,13 +104,13 @@ struct inode *ubifs_new_inode(struct ubi + */ + inode->i_flags |= (S_NOCMTIME); + +- inode->i_uid = current->fsuid; ++ inode->i_uid = current_fsuid(); + if (dir->i_mode & S_ISGID) { + inode->i_gid = dir->i_gid; + if (S_ISDIR(mode)) + mode |= S_ISGID; + } else +- inode->i_gid = current->fsgid; ++ inode->i_gid = current_fsgid(); + inode->i_mode = mode; + inode->i_mtime = inode->i_atime = inode->i_ctime = + ubifs_current_time(inode); +@@ -628,7 +628,7 @@ static int ubifs_unlink(struct inode *di + ubifs_release_budget(c, &req); + else { + /* We've deleted something - clean the "no space" flags */ +- c->nospace = c->nospace_rp = 0; ++ c->bi.nospace = c->bi.nospace_rp = 0; + smp_wmb(); + } + return 0; +@@ -718,7 +718,7 @@ static int ubifs_rmdir(struct inode *dir + ubifs_release_budget(c, &req); + else { + /* We've deleted something - clean the "no space" flags */ +- c->nospace = c->nospace_rp = 0; ++ c->bi.nospace = c->bi.nospace_rp = 0; + smp_wmb(); + } + return 0; +diff -uprN linux-2.6.28/fs/ubifs/file.c ubifs-v2.6.28/fs/ubifs/file.c +--- linux-2.6.28/fs/ubifs/file.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/file.c 2011-06-15 14:22:09.000000000 -0400 +@@ -21,34 +21,32 @@ + */ + + /* +- * This file implements VFS file and inode operations of regular files, device ++ * This file implements VFS file and inode operations for regular files, device + * nodes and symlinks as well as address space operations. + * +- * UBIFS uses 2 page flags: PG_private and PG_checked. PG_private is set if the +- * page is dirty and is used for budgeting purposes - dirty pages should not be +- * budgeted. The PG_checked flag is set if full budgeting is required for the +- * page e.g., when it corresponds to a file hole or it is just beyond the file +- * size. The budgeting is done in 'ubifs_write_begin()', because it is OK to +- * fail in this function, and the budget is released in 'ubifs_write_end()'. So +- * the PG_private and PG_checked flags carry the information about how the page +- * was budgeted, to make it possible to release the budget properly. +- * +- * A thing to keep in mind: inode's 'i_mutex' is locked in most VFS operations +- * we implement. However, this is not true for '->writepage()', which might be +- * called with 'i_mutex' unlocked. For example, when pdflush is performing +- * write-back, it calls 'writepage()' with unlocked 'i_mutex', although the +- * inode has 'I_LOCK' flag in this case. At "normal" work-paths 'i_mutex' is +- * locked in '->writepage', e.g. in "sys_write -> alloc_pages -> direct reclaim +- * path'. So, in '->writepage()' we are only guaranteed that the page is +- * locked. +- * +- * Similarly, 'i_mutex' does not have to be locked in readpage(), e.g., +- * readahead path does not have it locked ("sys_read -> generic_file_aio_read +- * -> ondemand_readahead -> readpage"). In case of readahead, 'I_LOCK' flag is +- * not set as well. However, UBIFS disables readahead. +- * +- * This, for example means that there might be 2 concurrent '->writepage()' +- * calls for the same inode, but different inode dirty pages. ++ * UBIFS uses 2 page flags: @PG_private and @PG_checked. @PG_private is set if ++ * the page is dirty and is used for optimization purposes - dirty pages are ++ * not budgeted so the flag shows that 'ubifs_write_end()' should not release ++ * the budget for this page. The @PG_checked flag is set if full budgeting is ++ * required for the page e.g., when it corresponds to a file hole or it is ++ * beyond the file size. The budgeting is done in 'ubifs_write_begin()', because ++ * it is OK to fail in this function, and the budget is released in ++ * 'ubifs_write_end()'. So the @PG_private and @PG_checked flags carry ++ * information about how the page was budgeted, to make it possible to release ++ * the budget properly. ++ * ++ * A thing to keep in mind: inode @i_mutex is locked in most VFS operations we ++ * implement. However, this is not true for 'ubifs_writepage()', which may be ++ * called with @i_mutex unlocked. For example, when pdflush is doing background ++ * write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex. At "normal" ++ * work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g. in the ++ * "sys_write -> alloc_pages -> direct reclaim path". So, in 'ubifs_writepage()' ++ * we are only guaranteed that the page is locked. ++ * ++ * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the ++ * read-ahead path does not lock it ("sys_read -> generic_file_aio_read -> ++ * ondemand_readahead -> readpage"). In case of readahead, @I_LOCK flag is not ++ * set as well. However, UBIFS disables readahead. + */ + + #include "ubifs.h" +@@ -213,7 +211,7 @@ static void release_new_page_budget(stru + */ + static void release_existing_page_budget(struct ubifs_info *c) + { +- struct ubifs_budget_req req = { .dd_growth = c->page_budget}; ++ struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget}; + + ubifs_release_budget(c, &req); + } +@@ -433,8 +431,9 @@ static int ubifs_write_begin(struct file + struct page *page; + + ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); ++ ubifs_assert(!c->ro_media && !c->ro_mount); + +- if (unlikely(c->ro_media)) ++ if (unlikely(c->ro_error)) + return -EROFS; + + /* Try out the fast-path part first */ +@@ -447,10 +446,12 @@ static int ubifs_write_begin(struct file + if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) { + /* + * We change whole page so no need to load it. But we +- * have to set the @PG_checked flag to make the further +- * code the page is new. This might be not true, but it +- * is better to budget more that to read the page from +- * the media. ++ * do not know whether this page exists on the media or ++ * not, so we assume the latter because it requires ++ * larger budget. The assumption is that it is better ++ * to budget a bit more than to read the page from the ++ * media. Thus, we are setting the @PG_checked flag ++ * here. + */ + SetPageChecked(page); + skipped_read = 1; +@@ -496,8 +497,8 @@ static int ubifs_write_begin(struct file + } + + /* +- * Whee, we aquired budgeting quickly - without involving +- * garbage-collection, committing or forceing write-back. We return ++ * Whee, we acquired budgeting quickly - without involving ++ * garbage-collection, committing or forcing write-back. We return + * with @ui->ui_mutex locked if we are appending pages, and unlocked + * otherwise. This is an optimization (slightly hacky though). + */ +@@ -558,10 +559,11 @@ static int ubifs_write_end(struct file * + dbg_gen("copied %d instead of %d, read page and repeat", + copied, len); + cancel_budget(c, page, ui, appending); ++ ClearPageChecked(page); + + /* + * Return 0 to force VFS to repeat the whole operation, or the +- * error code if 'do_readpage()' failes. ++ * error code if 'do_readpage()' fails. + */ + copied = do_readpage(page); + goto out; +@@ -958,7 +960,7 @@ static int do_writepage(struct page *pag + * whole index and correct all inode sizes, which is long an unacceptable. + * + * To prevent situations like this, UBIFS writes pages back only if they are +- * within last synchronized inode size, i.e. the the size which has been ++ * within the last synchronized inode size, i.e. the size which has been + * written to the flash media last time. Otherwise, UBIFS forces inode + * write-back, thus making sure the on-flash inode contains current inode size, + * and then keeps writing pages back. +@@ -1174,16 +1176,16 @@ static int do_truncation(struct ubifs_in + ui->ui_size = inode->i_size; + /* Truncation changes inode [mc]time */ + inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); +- /* The other attributes may be changed at the same time as well */ ++ /* Other attributes may be changed at the same time as well */ + do_attr_changes(inode, attr); +- + err = ubifs_jnl_truncate(c, inode, old_size, new_size); + mutex_unlock(&ui->ui_mutex); ++ + out_budg: + if (budgeted) + ubifs_release_budget(c, &req); + else { +- c->nospace = c->nospace_rp = 0; ++ c->bi.nospace = c->bi.nospace_rp = 0; + smp_wmb(); + } + return err; +@@ -1312,6 +1314,13 @@ int ubifs_fsync(struct file *file, struc + + dbg_gen("syncing inode %lu", inode->i_ino); + ++ if (c->ro_mount) ++ /* ++ * For some really strange reasons VFS does not filter out ++ * 'fsync()' for R/O mounted file-systems as per 2.6.39. ++ */ ++ return 0; ++ + /* + * VFS has already synchronized dirty pages for this inode. Synchronize + * the inode unless this is a 'datasync()' call. +@@ -1440,8 +1449,8 @@ static int ubifs_releasepage(struct page + } + + /* +- * mmap()d file has taken write protection fault and is being made +- * writable. UBIFS must ensure page is budgeted for. ++ * mmap()d file has taken write protection fault and is being made writable. ++ * UBIFS must ensure page is budgeted for. + */ + static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) + { +@@ -1453,9 +1462,9 @@ static int ubifs_vm_page_mkwrite(struct + + dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index, + i_size_read(inode)); +- ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY)); ++ ubifs_assert(!c->ro_media && !c->ro_mount); + +- if (unlikely(c->ro_media)) ++ if (unlikely(c->ro_error)) + return -EROFS; + + /* +@@ -1541,7 +1550,6 @@ static int ubifs_file_mmap(struct file * + { + int err; + +- /* 'generic_file_mmap()' takes care of NOMMU case */ + err = generic_file_mmap(file, vma); + if (err) + return err; +diff -uprN linux-2.6.28/fs/ubifs/find.c ubifs-v2.6.28/fs/ubifs/find.c +--- linux-2.6.28/fs/ubifs/find.c 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/fs/ubifs/find.c 2011-06-15 14:22:09.000000000 -0400 +@@ -252,8 +252,8 @@ int ubifs_find_dirty_leb(struct ubifs_in + * But if the index takes fewer LEBs than it is reserved for it, + * this function must avoid picking those reserved LEBs. + */ +- if (c->min_idx_lebs >= c->lst.idx_lebs) { +- rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; ++ if (c->bi.min_idx_lebs >= c->lst.idx_lebs) { ++ rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; + exclude_index = 1; + } + spin_unlock(&c->space_lock); +@@ -276,7 +276,7 @@ int ubifs_find_dirty_leb(struct ubifs_in + pick_free = 0; + } else { + spin_lock(&c->space_lock); +- exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs); ++ exclude_index = (c->bi.min_idx_lebs >= c->lst.idx_lebs); + spin_unlock(&c->space_lock); + } + +@@ -478,7 +478,7 @@ const struct ubifs_lprops *do_find_free_ + * ubifs_find_free_space - find a data LEB with free space. + * @c: the UBIFS file-system description object + * @min_space: minimum amount of required free space +- * @free: contains amount of free space in the LEB on exit ++ * @offs: contains offset of where free space starts on exit + * @squeeze: whether to try to find space in a non-empty LEB first + * + * This function looks for an LEB with at least @min_space bytes of free space. +@@ -490,7 +490,7 @@ const struct ubifs_lprops *do_find_free_ + * failed to find a LEB with @min_space bytes of free space and other a negative + * error codes in case of failure. + */ +-int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, ++int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, + int squeeze) + { + const struct ubifs_lprops *lprops; +@@ -501,8 +501,8 @@ int ubifs_find_free_space(struct ubifs_i + + /* Check if there are enough empty LEBs for commit */ + spin_lock(&c->space_lock); +- if (c->min_idx_lebs > c->lst.idx_lebs) +- rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; ++ if (c->bi.min_idx_lebs > c->lst.idx_lebs) ++ rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; + else + rsvd_idx_lebs = 0; + lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - +@@ -558,10 +558,10 @@ int ubifs_find_free_space(struct ubifs_i + spin_unlock(&c->space_lock); + } + +- *free = lprops->free; ++ *offs = c->leb_size - lprops->free; + ubifs_release_lprops(c); + +- if (*free == c->leb_size) { ++ if (*offs == 0) { + /* + * Ensure that empty LEBs have been unmapped. They may not have + * been, for example, because of an unclean unmount. Also +@@ -573,8 +573,8 @@ int ubifs_find_free_space(struct ubifs_i + return err; + } + +- dbg_find("found LEB %d, free %d", lnum, *free); +- ubifs_assert(*free >= min_space); ++ dbg_find("found LEB %d, free %d", lnum, c->leb_size - *offs); ++ ubifs_assert(*offs <= c->leb_size - min_space); + return lnum; + + out: +diff -uprN linux-2.6.28/fs/ubifs/gc.c ubifs-v2.6.28/fs/ubifs/gc.c +--- linux-2.6.28/fs/ubifs/gc.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/gc.c 2011-06-15 14:22:09.000000000 -0400 +@@ -47,25 +47,18 @@ + * have to waste large pieces of free space at the end of LEB B, because nodes + * from LEB A would not fit. And the worst situation is when all nodes are of + * maximum size. So dark watermark is the amount of free + dirty space in LEB +- * which are guaranteed to be reclaimable. If LEB has less space, the GC migh ++ * which are guaranteed to be reclaimable. If LEB has less space, the GC might + * be unable to reclaim it. So, LEBs with free + dirty greater than dark + * watermark are "good" LEBs from GC's point of few. The other LEBs are not so + * good, and GC takes extra care when moving them. + */ + + #include ++#include + #include "ubifs.h" + + /* +- * GC tries to optimize the way it fit nodes to available space, and it sorts +- * nodes a little. The below constants are watermarks which define "large", +- * "medium", and "small" nodes. +- */ +-#define MEDIUM_NODE_WM (UBIFS_BLOCK_SIZE / 4) +-#define SMALL_NODE_WM UBIFS_MAX_DENT_NODE_SZ +- +-/* +- * GC may need to move more then one LEB to make progress. The below constants ++ * GC may need to move more than one LEB to make progress. The below constants + * define "soft" and "hard" limits on the number of LEBs the garbage collector + * may move. + */ +@@ -106,6 +99,10 @@ static int switch_gc_head(struct ubifs_i + if (err) + return err; + ++ err = ubifs_wbuf_sync_nolock(wbuf); ++ if (err) ++ return err; ++ + err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0); + if (err) + return err; +@@ -116,138 +113,243 @@ static int switch_gc_head(struct ubifs_i + } + + /** +- * joinup - bring data nodes for an inode together. +- * @c: UBIFS file-system description object +- * @sleb: describes scanned LEB +- * @inum: inode number +- * @blk: block number +- * @data: list to which to add data nodes +- * +- * This function looks at the first few nodes in the scanned LEB @sleb and adds +- * them to @data if they are data nodes from @inum and have a larger block +- * number than @blk. This function returns %0 on success and a negative error +- * code on failure. ++ * data_nodes_cmp - compare 2 data nodes. ++ * @priv: UBIFS file-system description object ++ * @a: first data node ++ * @a: second data node ++ * ++ * This function compares data nodes @a and @b. Returns %1 if @a has greater ++ * inode or block number, and %-1 otherwise. + */ +-static int joinup(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ino_t inum, +- unsigned int blk, struct list_head *data) ++static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) + { +- int err, cnt = 6, lnum = sleb->lnum, offs; +- struct ubifs_scan_node *snod, *tmp; +- union ubifs_key *key; ++ ino_t inuma, inumb; ++ struct ubifs_info *c = priv; ++ struct ubifs_scan_node *sa, *sb; ++ ++ cond_resched(); ++ if (a == b) ++ return 0; ++ ++ sa = list_entry(a, struct ubifs_scan_node, list); ++ sb = list_entry(b, struct ubifs_scan_node, list); ++ ++ ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY); ++ ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY); ++ ubifs_assert(sa->type == UBIFS_DATA_NODE); ++ ubifs_assert(sb->type == UBIFS_DATA_NODE); ++ ++ inuma = key_inum(c, &sa->key); ++ inumb = key_inum(c, &sb->key); ++ ++ if (inuma == inumb) { ++ unsigned int blka = key_block(c, &sa->key); ++ unsigned int blkb = key_block(c, &sb->key); ++ ++ if (blka <= blkb) ++ return -1; ++ } else if (inuma <= inumb) ++ return -1; + +- list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { +- key = &snod->key; +- if (key_inum(c, key) == inum && +- key_type(c, key) == UBIFS_DATA_KEY && +- key_block(c, key) > blk) { +- offs = snod->offs; +- err = ubifs_tnc_has_node(c, key, 0, lnum, offs, 0); +- if (err < 0) +- return err; +- list_del(&snod->list); +- if (err) { +- list_add_tail(&snod->list, data); +- blk = key_block(c, key); +- } else +- kfree(snod); +- cnt = 6; +- } else if (--cnt == 0) +- break; +- } +- return 0; ++ return 1; ++} ++ ++/* ++ * nondata_nodes_cmp - compare 2 non-data nodes. ++ * @priv: UBIFS file-system description object ++ * @a: first node ++ * @a: second node ++ * ++ * This function compares nodes @a and @b. It makes sure that inode nodes go ++ * first and sorted by length in descending order. Directory entry nodes go ++ * after inode nodes and are sorted in ascending hash valuer order. ++ */ ++static int nondata_nodes_cmp(void *priv, struct list_head *a, ++ struct list_head *b) ++{ ++ ino_t inuma, inumb; ++ struct ubifs_info *c = priv; ++ struct ubifs_scan_node *sa, *sb; ++ ++ cond_resched(); ++ if (a == b) ++ return 0; ++ ++ sa = list_entry(a, struct ubifs_scan_node, list); ++ sb = list_entry(b, struct ubifs_scan_node, list); ++ ++ ubifs_assert(key_type(c, &sa->key) != UBIFS_DATA_KEY && ++ key_type(c, &sb->key) != UBIFS_DATA_KEY); ++ ubifs_assert(sa->type != UBIFS_DATA_NODE && ++ sb->type != UBIFS_DATA_NODE); ++ ++ /* Inodes go before directory entries */ ++ if (sa->type == UBIFS_INO_NODE) { ++ if (sb->type == UBIFS_INO_NODE) ++ return sb->len - sa->len; ++ return -1; ++ } ++ if (sb->type == UBIFS_INO_NODE) ++ return 1; ++ ++ ubifs_assert(key_type(c, &sa->key) == UBIFS_DENT_KEY || ++ key_type(c, &sa->key) == UBIFS_XENT_KEY); ++ ubifs_assert(key_type(c, &sb->key) == UBIFS_DENT_KEY || ++ key_type(c, &sb->key) == UBIFS_XENT_KEY); ++ ubifs_assert(sa->type == UBIFS_DENT_NODE || ++ sa->type == UBIFS_XENT_NODE); ++ ubifs_assert(sb->type == UBIFS_DENT_NODE || ++ sb->type == UBIFS_XENT_NODE); ++ ++ inuma = key_inum(c, &sa->key); ++ inumb = key_inum(c, &sb->key); ++ ++ if (inuma == inumb) { ++ uint32_t hasha = key_hash(c, &sa->key); ++ uint32_t hashb = key_hash(c, &sb->key); ++ ++ if (hasha <= hashb) ++ return -1; ++ } else if (inuma <= inumb) ++ return -1; ++ ++ return 1; + } + + /** +- * move_nodes - move nodes. ++ * sort_nodes - sort nodes for GC. + * @c: UBIFS file-system description object +- * @sleb: describes nodes to move +- * +- * This function moves valid nodes from data LEB described by @sleb to the GC +- * journal head. The obsolete nodes are dropped. +- * +- * When moving nodes we have to deal with classical bin-packing problem: the +- * space in the current GC journal head LEB and in @c->gc_lnum are the "bins", +- * where the nodes in the @sleb->nodes list are the elements which should be +- * fit optimally to the bins. This function uses the "first fit decreasing" +- * strategy, although it does not really sort the nodes but just split them on +- * 3 classes - large, medium, and small, so they are roughly sorted. ++ * @sleb: describes nodes to sort and contains the result on exit ++ * @nondata: contains non-data nodes on exit ++ * @min: minimum node size is returned here ++ * ++ * This function sorts the list of inodes to garbage collect. First of all, it ++ * kills obsolete nodes and separates data and non-data nodes to the ++ * @sleb->nodes and @nondata lists correspondingly. ++ * ++ * Data nodes are then sorted in block number order - this is important for ++ * bulk-read; data nodes with lower inode number go before data nodes with ++ * higher inode number, and data nodes with lower block number go before data ++ * nodes with higher block number; ++ * ++ * Non-data nodes are sorted as follows. ++ * o First go inode nodes - they are sorted in descending length order. ++ * o Then go directory entry nodes - they are sorted in hash order, which ++ * should supposedly optimize 'readdir()'. Direntry nodes with lower parent ++ * inode number go before direntry nodes with higher parent inode number, ++ * and direntry nodes with lower name hash values go before direntry nodes ++ * with higher name hash values. + * +- * This function returns zero in case of success, %-EAGAIN if commit is +- * required, and other negative error codes in case of other failures. ++ * This function returns zero in case of success and a negative error code in ++ * case of failure. + */ +-static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) ++static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ++ struct list_head *nondata, int *min) + { ++ int err; + struct ubifs_scan_node *snod, *tmp; +- struct list_head data, large, medium, small; +- struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; +- int avail, err, min = INT_MAX; +- unsigned int blk = 0; +- ino_t inum = 0; +- +- INIT_LIST_HEAD(&data); +- INIT_LIST_HEAD(&large); +- INIT_LIST_HEAD(&medium); +- INIT_LIST_HEAD(&small); +- +- while (!list_empty(&sleb->nodes)) { +- struct list_head *lst = sleb->nodes.next; +- +- snod = list_entry(lst, struct ubifs_scan_node, list); +- +- ubifs_assert(snod->type != UBIFS_IDX_NODE); +- ubifs_assert(snod->type != UBIFS_REF_NODE); +- ubifs_assert(snod->type != UBIFS_CS_NODE); ++ ++ *min = INT_MAX; ++ ++ /* Separate data nodes and non-data nodes */ ++ list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { ++ ubifs_assert(snod->type == UBIFS_INO_NODE || ++ snod->type == UBIFS_DATA_NODE || ++ snod->type == UBIFS_DENT_NODE || ++ snod->type == UBIFS_XENT_NODE || ++ snod->type == UBIFS_TRUN_NODE); ++ ++ if (snod->type != UBIFS_INO_NODE && ++ snod->type != UBIFS_DATA_NODE && ++ snod->type != UBIFS_DENT_NODE && ++ snod->type != UBIFS_XENT_NODE) { ++ /* Probably truncation node, zap it */ ++ list_del(&snod->list); ++ kfree(snod); ++ continue; ++ } ++ ++ ubifs_assert(key_type(c, &snod->key) == UBIFS_DATA_KEY || ++ key_type(c, &snod->key) == UBIFS_INO_KEY || ++ key_type(c, &snod->key) == UBIFS_DENT_KEY || ++ key_type(c, &snod->key) == UBIFS_XENT_KEY); + + err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum, + snod->offs, 0); + if (err < 0) +- goto out; ++ return err; + +- list_del(lst); + if (!err) { + /* The node is obsolete, remove it from the list */ ++ list_del(&snod->list); + kfree(snod); + continue; + } + +- /* +- * Sort the list of nodes so that data nodes go first, large +- * nodes go second, and small nodes go last. +- */ +- if (key_type(c, &snod->key) == UBIFS_DATA_KEY) { +- if (inum != key_inum(c, &snod->key)) { +- if (inum) { +- /* +- * Try to move data nodes from the same +- * inode together. +- */ +- err = joinup(c, sleb, inum, blk, &data); +- if (err) +- goto out; +- } +- inum = key_inum(c, &snod->key); +- blk = key_block(c, &snod->key); +- } +- list_add_tail(lst, &data); +- } else if (snod->len > MEDIUM_NODE_WM) +- list_add_tail(lst, &large); +- else if (snod->len > SMALL_NODE_WM) +- list_add_tail(lst, &medium); +- else +- list_add_tail(lst, &small); +- +- /* And find the smallest node */ +- if (snod->len < min) +- min = snod->len; ++ if (snod->len < *min) ++ *min = snod->len; ++ ++ if (key_type(c, &snod->key) != UBIFS_DATA_KEY) ++ list_move_tail(&snod->list, nondata); + } + +- /* +- * Join the tree lists so that we'd have one roughly sorted list +- * ('large' will be the head of the joined list). +- */ +- list_splice(&data, &large); +- list_splice(&medium, large.prev); +- list_splice(&small, large.prev); ++ /* Sort data and non-data nodes */ ++ list_sort(c, &sleb->nodes, &data_nodes_cmp); ++ list_sort(c, nondata, &nondata_nodes_cmp); ++ ++ err = dbg_check_data_nodes_order(c, &sleb->nodes); ++ if (err) ++ return err; ++ err = dbg_check_nondata_nodes_order(c, nondata); ++ if (err) ++ return err; ++ return 0; ++} ++ ++/** ++ * move_node - move a node. ++ * @c: UBIFS file-system description object ++ * @sleb: describes the LEB to move nodes from ++ * @snod: the mode to move ++ * @wbuf: write-buffer to move node to ++ * ++ * This function moves node @snod to @wbuf, changes TNC correspondingly, and ++ * destroys @snod. Returns zero in case of success and a negative error code in ++ * case of failure. ++ */ ++static int move_node(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ++ struct ubifs_scan_node *snod, struct ubifs_wbuf *wbuf) ++{ ++ int err, new_lnum = wbuf->lnum, new_offs = wbuf->offs + wbuf->used; ++ ++ cond_resched(); ++ err = ubifs_wbuf_write_nolock(wbuf, snod->node, snod->len); ++ if (err) ++ return err; ++ ++ err = ubifs_tnc_replace(c, &snod->key, sleb->lnum, ++ snod->offs, new_lnum, new_offs, ++ snod->len); ++ list_del(&snod->list); ++ kfree(snod); ++ return err; ++} ++ ++/** ++ * move_nodes - move nodes. ++ * @c: UBIFS file-system description object ++ * @sleb: describes the LEB to move nodes from ++ * ++ * This function moves valid nodes from data LEB described by @sleb to the GC ++ * journal head. This function returns zero in case of success, %-EAGAIN if ++ * commit is required, and other negative error codes in case of other ++ * failures. ++ */ ++static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) ++{ ++ int err, min; ++ LIST_HEAD(nondata); ++ struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; + + if (wbuf->lnum == -1) { + /* +@@ -256,42 +358,59 @@ static int move_nodes(struct ubifs_info + */ + err = switch_gc_head(c); + if (err) +- goto out; ++ return err; + } + ++ err = sort_nodes(c, sleb, &nondata, &min); ++ if (err) ++ goto out; ++ + /* Write nodes to their new location. Use the first-fit strategy */ + while (1) { +- avail = c->leb_size - wbuf->offs - wbuf->used; +- list_for_each_entry_safe(snod, tmp, &large, list) { +- int new_lnum, new_offs; ++ int avail; ++ struct ubifs_scan_node *snod, *tmp; ++ ++ /* Move data nodes */ ++ list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { ++ avail = c->leb_size - wbuf->offs - wbuf->used; ++ if (snod->len > avail) ++ /* ++ * Do not skip data nodes in order to optimize ++ * bulk-read. ++ */ ++ break; ++ ++ err = move_node(c, sleb, snod, wbuf); ++ if (err) ++ goto out; ++ } + ++ /* Move non-data nodes */ ++ list_for_each_entry_safe(snod, tmp, &nondata, list) { ++ avail = c->leb_size - wbuf->offs - wbuf->used; + if (avail < min) + break; + +- if (snod->len > avail) +- /* This node does not fit */ ++ if (snod->len > avail) { ++ /* ++ * Keep going only if this is an inode with ++ * some data. Otherwise stop and switch the GC ++ * head. IOW, we assume that data-less inode ++ * nodes and direntry nodes are roughly of the ++ * same size. ++ */ ++ if (key_type(c, &snod->key) == UBIFS_DENT_KEY || ++ snod->len == UBIFS_INO_NODE_SZ) ++ break; + continue; ++ } + +- cond_resched(); +- +- new_lnum = wbuf->lnum; +- new_offs = wbuf->offs + wbuf->used; +- err = ubifs_wbuf_write_nolock(wbuf, snod->node, +- snod->len); ++ err = move_node(c, sleb, snod, wbuf); + if (err) + goto out; +- err = ubifs_tnc_replace(c, &snod->key, sleb->lnum, +- snod->offs, new_lnum, new_offs, +- snod->len); +- if (err) +- goto out; +- +- avail = c->leb_size - wbuf->offs - wbuf->used; +- list_del(&snod->list); +- kfree(snod); + } + +- if (list_empty(&large)) ++ if (list_empty(&sleb->nodes) && list_empty(&nondata)) + break; + + /* +@@ -306,10 +425,7 @@ static int move_nodes(struct ubifs_info + return 0; + + out: +- list_for_each_entry_safe(snod, tmp, &large, list) { +- list_del(&snod->list); +- kfree(snod); +- } ++ list_splice_tail(&nondata, &sleb->nodes); + return err; + } + +@@ -361,11 +477,42 @@ int ubifs_garbage_collect_leb(struct ubi + ubifs_assert(c->gc_lnum != lnum); + ubifs_assert(wbuf->lnum != lnum); + ++ if (lp->free + lp->dirty == c->leb_size) { ++ /* Special case - a free LEB */ ++ dbg_gc("LEB %d is free, return it", lp->lnum); ++ ubifs_assert(!(lp->flags & LPROPS_INDEX)); ++ ++ if (lp->free != c->leb_size) { ++ /* ++ * Write buffers must be sync'd before unmapping ++ * freeable LEBs, because one of them may contain data ++ * which obsoletes something in 'lp->pnum'. ++ */ ++ err = gc_sync_wbufs(c); ++ if (err) ++ return err; ++ err = ubifs_change_one_lp(c, lp->lnum, c->leb_size, ++ 0, 0, 0, 0); ++ if (err) ++ return err; ++ } ++ err = ubifs_leb_unmap(c, lp->lnum); ++ if (err) ++ return err; ++ ++ if (c->gc_lnum == -1) { ++ c->gc_lnum = lnum; ++ return LEB_RETAINED; ++ } ++ ++ return LEB_FREED; ++ } ++ + /* + * We scan the entire LEB even though we only really need to scan up to + * (c->leb_size - lp->free). + */ +- sleb = ubifs_scan(c, lnum, 0, c->sbuf); ++ sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0); + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + +@@ -504,13 +651,14 @@ int ubifs_garbage_collect(struct ubifs_i + struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; + + ubifs_assert_cmt_locked(c); ++ ubifs_assert(!c->ro_media && !c->ro_mount); + + if (ubifs_gc_should_commit(c)) + return -EAGAIN; + + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + +- if (c->ro_media) { ++ if (c->ro_error) { + ret = -EROFS; + goto out_unlock; + } +@@ -569,51 +717,18 @@ int ubifs_garbage_collect(struct ubifs_i + "(min. space %d)", lp.lnum, lp.free, lp.dirty, + lp.free + lp.dirty, min_space); + +- if (lp.free + lp.dirty == c->leb_size) { +- /* An empty LEB was returned */ +- dbg_gc("LEB %d is free, return it", lp.lnum); +- /* +- * ubifs_find_dirty_leb() doesn't return freeable index +- * LEBs. +- */ +- ubifs_assert(!(lp.flags & LPROPS_INDEX)); +- if (lp.free != c->leb_size) { +- /* +- * Write buffers must be sync'd before +- * unmapping freeable LEBs, because one of them +- * may contain data which obsoletes something +- * in 'lp.pnum'. +- */ +- ret = gc_sync_wbufs(c); +- if (ret) +- goto out; +- ret = ubifs_change_one_lp(c, lp.lnum, +- c->leb_size, 0, 0, 0, +- 0); +- if (ret) +- goto out; +- } +- ret = ubifs_leb_unmap(c, lp.lnum); +- if (ret) +- goto out; +- ret = lp.lnum; +- break; +- } +- + space_before = c->leb_size - wbuf->offs - wbuf->used; + if (wbuf->lnum == -1) + space_before = 0; + + ret = ubifs_garbage_collect_leb(c, &lp); + if (ret < 0) { +- if (ret == -EAGAIN || ret == -ENOSPC) { ++ if (ret == -EAGAIN) { + /* +- * These codes are not errors, so we have to +- * return the LEB to lprops. But if the +- * 'ubifs_return_leb()' function fails, its +- * failure code is propagated to the caller +- * instead of the original '-EAGAIN' or +- * '-ENOSPC'. ++ * This is not error, so we have to return the ++ * LEB to lprops. But if 'ubifs_return_leb()' ++ * fails, its failure code is propagated to the ++ * caller instead of the original '-EAGAIN'. + */ + err = ubifs_return_leb(c, lp.lnum); + if (err) +@@ -703,8 +818,8 @@ out_unlock: + out: + ubifs_assert(ret < 0); + ubifs_assert(ret != -ENOSPC && ret != -EAGAIN); +- ubifs_ro_mode(c, ret); + ubifs_wbuf_sync_nolock(wbuf); ++ ubifs_ro_mode(c, ret); + mutex_unlock(&wbuf->io_mutex); + ubifs_return_leb(c, lp.lnum); + return ret; +diff -uprN linux-2.6.28/fs/ubifs/io.c ubifs-v2.6.28/fs/ubifs/io.c +--- linux-2.6.28/fs/ubifs/io.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/io.c 2011-06-15 14:22:09.000000000 -0400 +@@ -31,6 +31,26 @@ + * buffer is full or when it is not used for some time (by timer). This is + * similar to the mechanism is used by JFFS2. + * ++ * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum ++ * write size (@c->max_write_size). The latter is the maximum amount of bytes ++ * the underlying flash is able to program at a time, and writing in ++ * @c->max_write_size units should presumably be faster. Obviously, ++ * @c->min_io_size <= @c->max_write_size. Write-buffers are of ++ * @c->max_write_size bytes in size for maximum performance. However, when a ++ * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size ++ * boundary) which contains data is written, not the whole write-buffer, ++ * because this is more space-efficient. ++ * ++ * This optimization adds few complications to the code. Indeed, on the one ++ * hand, we want to write in optimal @c->max_write_size bytes chunks, which ++ * also means aligning writes at the @c->max_write_size bytes offsets. On the ++ * other hand, we do not want to waste space when synchronizing the write ++ * buffer, so during synchronization we writes in smaller chunks. And this makes ++ * the next write offset to be not aligned to @c->max_write_size bytes. So the ++ * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned ++ * to @c->max_write_size bytes again. We do this by temporarily shrinking ++ * write-buffer size (@wbuf->size). ++ * + * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by + * mutexes defined inside these objects. Since sometimes upper-level code + * has to lock the write-buffer (e.g. journal space reservation code), many +@@ -46,8 +66,8 @@ + * UBIFS uses padding when it pads to the next min. I/O unit. In this case it + * uses padding nodes or padding bytes, if the padding node does not fit. + * +- * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes +- * every time they are read from the flash media. ++ * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when ++ * they are read from the flash media. + */ + + #include +@@ -60,9 +80,10 @@ + */ + void ubifs_ro_mode(struct ubifs_info *c, int err) + { +- if (!c->ro_media) { +- c->ro_media = 1; ++ if (!c->ro_error) { ++ c->ro_error = 1; + c->no_chk_data_crc = 0; ++ c->vfs_sb->s_flags |= MS_RDONLY; + ubifs_warn("switched to read-only mode, error %d", err); + dbg_dump_stack(); + } +@@ -86,8 +107,12 @@ void ubifs_ro_mode(struct ubifs_info *c, + * This function may skip data nodes CRC checking if @c->no_chk_data_crc is + * true, which is controlled by corresponding UBIFS mount option. However, if + * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is +- * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is +- * ignored and CRC is checked. ++ * checked. Similarly, if @c->mounting or @c->remounting_rw is true (we are ++ * mounting or re-mounting to R/W mode), @c->no_chk_data_crc is ignored and CRC ++ * is checked. This is because during mounting or re-mounting from R/O mode to ++ * R/W mode we may read journal nodes (when replying the journal or doing the ++ * recovery) and the journal nodes may potentially be corrupted, so checking is ++ * required. + * + * This function returns zero in case of success and %-EUCLEAN in case of bad + * CRC or magic. +@@ -129,8 +154,8 @@ int ubifs_check_node(const struct ubifs_ + node_len > c->ranges[type].max_len) + goto out_len; + +- if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc && +- c->no_chk_data_crc) ++ if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->mounting && ++ !c->remounting_rw && c->no_chk_data_crc) + return 0; + + crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); +@@ -297,7 +322,7 @@ static enum hrtimer_restart wbuf_timer_c + { + struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer); + +- dbg_io("jhead %d", wbuf->jhead); ++ dbg_io("jhead %s", dbg_jhead(wbuf->jhead)); + wbuf->need_sync = 1; + wbuf->c->need_wbuf_sync = 1; + ubifs_wake_up_bgt(wbuf->c); +@@ -314,7 +339,8 @@ static void new_wbuf_timer_nolock(struct + + if (wbuf->no_timer) + return; +- dbg_io("set timer for jhead %d, %llu-%llu millisecs", wbuf->jhead, ++ dbg_io("set timer for jhead %s, %llu-%llu millisecs", ++ dbg_jhead(wbuf->jhead), + div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC), + div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta, + USEC_PER_SEC)); +@@ -340,41 +366,73 @@ static void cancel_wbuf_timer_nolock(str + * + * This function synchronizes write-buffer @buf and returns zero in case of + * success or a negative error code in case of failure. ++ * ++ * Note, although write-buffers are of @c->max_write_size, this function does ++ * not necessarily writes all @c->max_write_size bytes to the flash. Instead, ++ * if the write-buffer is only partially filled with data, only the used part ++ * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized. ++ * This way we waste less space. + */ + int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) + { + struct ubifs_info *c = wbuf->c; +- int err, dirt; ++ int err, dirt, sync_len; + + cancel_wbuf_timer_nolock(wbuf); + if (!wbuf->used || wbuf->lnum == -1) + /* Write-buffer is empty or not seeked */ + return 0; + +- dbg_io("LEB %d:%d, %d bytes, jhead %d", +- wbuf->lnum, wbuf->offs, wbuf->used, wbuf->jhead); +- ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); ++ dbg_io("LEB %d:%d, %d bytes, jhead %s", ++ wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead)); + ubifs_assert(!(wbuf->avail & 7)); +- ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); ++ ubifs_assert(wbuf->offs + wbuf->size <= c->leb_size); ++ ubifs_assert(wbuf->size >= c->min_io_size); ++ ubifs_assert(wbuf->size <= c->max_write_size); ++ ubifs_assert(wbuf->size % c->min_io_size == 0); ++ ubifs_assert(!c->ro_media && !c->ro_mount); ++ if (c->leb_size - wbuf->offs >= c->max_write_size) ++ ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); + +- if (c->ro_media) ++ if (c->ro_error) + return -EROFS; + +- ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail); ++ /* ++ * Do not write whole write buffer but write only the minimum necessary ++ * amount of min. I/O units. ++ */ ++ sync_len = ALIGN(wbuf->used, c->min_io_size); ++ dirt = sync_len - wbuf->used; ++ if (dirt) ++ ubifs_pad(c, wbuf->buf + wbuf->used, dirt); + err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, +- c->min_io_size, wbuf->dtype); ++ sync_len, wbuf->dtype); + if (err) { + ubifs_err("cannot write %d bytes to LEB %d:%d", +- c->min_io_size, wbuf->lnum, wbuf->offs); ++ sync_len, wbuf->lnum, wbuf->offs); + dbg_dump_stack(); + return err; + } + +- dirt = wbuf->avail; +- + spin_lock(&wbuf->lock); +- wbuf->offs += c->min_io_size; +- wbuf->avail = c->min_io_size; ++ wbuf->offs += sync_len; ++ /* ++ * Now @wbuf->offs is not necessarily aligned to @c->max_write_size. ++ * But our goal is to optimize writes and make sure we write in ++ * @c->max_write_size chunks and to @c->max_write_size-aligned offset. ++ * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make ++ * sure that @wbuf->offs + @wbuf->size is aligned to ++ * @c->max_write_size. This way we make sure that after next ++ * write-buffer flush we are again at the optimal offset (aligned to ++ * @c->max_write_size). ++ */ ++ if (c->leb_size - wbuf->offs < c->max_write_size) ++ wbuf->size = c->leb_size - wbuf->offs; ++ else if (wbuf->offs & (c->max_write_size - 1)) ++ wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; ++ else ++ wbuf->size = c->max_write_size; ++ wbuf->avail = wbuf->size; + wbuf->used = 0; + wbuf->next_ino = 0; + spin_unlock(&wbuf->lock); +@@ -393,31 +451,31 @@ int ubifs_wbuf_sync_nolock(struct ubifs_ + * @dtype: data type + * + * This function targets the write-buffer to logical eraseblock @lnum:@offs. +- * The write-buffer is synchronized if it is not empty. Returns zero in case of +- * success and a negative error code in case of failure. ++ * The write-buffer has to be empty. Returns zero in case of success and a ++ * negative error code in case of failure. + */ + int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, + int dtype) + { + const struct ubifs_info *c = wbuf->c; + +- dbg_io("LEB %d:%d, jhead %d", lnum, offs, wbuf->jhead); ++ dbg_io("LEB %d:%d, jhead %s", lnum, offs, dbg_jhead(wbuf->jhead)); + ubifs_assert(lnum >= 0 && lnum < c->leb_cnt); + ubifs_assert(offs >= 0 && offs <= c->leb_size); + ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); + ubifs_assert(lnum != wbuf->lnum); +- +- if (wbuf->used > 0) { +- int err = ubifs_wbuf_sync_nolock(wbuf); +- +- if (err) +- return err; +- } ++ ubifs_assert(wbuf->used == 0); + + spin_lock(&wbuf->lock); + wbuf->lnum = lnum; + wbuf->offs = offs; +- wbuf->avail = c->min_io_size; ++ if (c->leb_size - wbuf->offs < c->max_write_size) ++ wbuf->size = c->leb_size - wbuf->offs; ++ else if (wbuf->offs & (c->max_write_size - 1)) ++ wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; ++ else ++ wbuf->size = c->max_write_size; ++ wbuf->avail = wbuf->size; + wbuf->used = 0; + spin_unlock(&wbuf->lock); + wbuf->dtype = dtype; +@@ -437,11 +495,12 @@ int ubifs_bg_wbufs_sync(struct ubifs_inf + { + int err, i; + ++ ubifs_assert(!c->ro_media && !c->ro_mount); + if (!c->need_wbuf_sync) + return 0; + c->need_wbuf_sync = 0; + +- if (c->ro_media) { ++ if (c->ro_error) { + err = -EROFS; + goto out_timers; + } +@@ -496,8 +555,9 @@ out_timers: + * + * This function writes data to flash via write-buffer @wbuf. This means that + * the last piece of the node won't reach the flash media immediately if it +- * does not take whole minimal I/O unit. Instead, the node will sit in RAM +- * until the write-buffer is synchronized (e.g., by timer). ++ * does not take whole max. write unit (@c->max_write_size). Instead, the node ++ * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or ++ * because more data are appended to the write-buffer). + * + * This function returns zero in case of success and a negative error code in + * case of failure. If the node cannot be written because there is no more +@@ -506,16 +566,23 @@ out_timers: + int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) + { + struct ubifs_info *c = wbuf->c; +- int err, written, n, aligned_len = ALIGN(len, 8), offs; ++ int err, written, n, aligned_len = ALIGN(len, 8); + +- dbg_io("%d bytes (%s) to jhead %d wbuf at LEB %d:%d", len, +- dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->jhead, +- wbuf->lnum, wbuf->offs + wbuf->used); ++ dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, ++ dbg_ntype(((struct ubifs_ch *)buf)->node_type), ++ dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs + wbuf->used); + ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); + ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); + ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); +- ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size); ++ ubifs_assert(wbuf->avail > 0 && wbuf->avail <= wbuf->size); ++ ubifs_assert(wbuf->size >= c->min_io_size); ++ ubifs_assert(wbuf->size <= c->max_write_size); ++ ubifs_assert(wbuf->size % c->min_io_size == 0); + ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); ++ ubifs_assert(!c->ro_media && !c->ro_mount); ++ ubifs_assert(!c->space_fixup); ++ if (c->leb_size - wbuf->offs >= c->max_write_size) ++ ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); + + if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { + err = -ENOSPC; +@@ -524,7 +591,7 @@ int ubifs_wbuf_write_nolock(struct ubifs + + cancel_wbuf_timer_nolock(wbuf); + +- if (c->ro_media) ++ if (c->ro_error) + return -EROFS; + + if (aligned_len <= wbuf->avail) { +@@ -535,17 +602,21 @@ int ubifs_wbuf_write_nolock(struct ubifs + memcpy(wbuf->buf + wbuf->used, buf, len); + + if (aligned_len == wbuf->avail) { +- dbg_io("flush jhead %d wbuf to LEB %d:%d", +- wbuf->jhead, wbuf->lnum, wbuf->offs); ++ dbg_io("flush jhead %s wbuf to LEB %d:%d", ++ dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); + err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, +- wbuf->offs, c->min_io_size, ++ wbuf->offs, wbuf->size, + wbuf->dtype); + if (err) + goto out; + + spin_lock(&wbuf->lock); +- wbuf->offs += c->min_io_size; +- wbuf->avail = c->min_io_size; ++ wbuf->offs += wbuf->size; ++ if (c->leb_size - wbuf->offs >= c->max_write_size) ++ wbuf->size = c->max_write_size; ++ else ++ wbuf->size = c->leb_size - wbuf->offs; ++ wbuf->avail = wbuf->size; + wbuf->used = 0; + wbuf->next_ino = 0; + spin_unlock(&wbuf->lock); +@@ -559,39 +630,63 @@ int ubifs_wbuf_write_nolock(struct ubifs + goto exit; + } + +- /* +- * The node is large enough and does not fit entirely within current +- * minimal I/O unit. We have to fill and flush write-buffer and switch +- * to the next min. I/O unit. +- */ +- dbg_io("flush jhead %d wbuf to LEB %d:%d", +- wbuf->jhead, wbuf->lnum, wbuf->offs); +- memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); +- err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, +- c->min_io_size, wbuf->dtype); +- if (err) +- goto out; ++ written = 0; + +- offs = wbuf->offs + c->min_io_size; +- len -= wbuf->avail; +- aligned_len -= wbuf->avail; +- written = wbuf->avail; ++ if (wbuf->used) { ++ /* ++ * The node is large enough and does not fit entirely within ++ * current available space. We have to fill and flush ++ * write-buffer and switch to the next max. write unit. ++ */ ++ dbg_io("flush jhead %s wbuf to LEB %d:%d", ++ dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); ++ memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); ++ err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, ++ wbuf->size, wbuf->dtype); ++ if (err) ++ goto out; ++ ++ wbuf->offs += wbuf->size; ++ len -= wbuf->avail; ++ aligned_len -= wbuf->avail; ++ written += wbuf->avail; ++ } else if (wbuf->offs & (c->max_write_size - 1)) { ++ /* ++ * The write-buffer offset is not aligned to ++ * @c->max_write_size and @wbuf->size is less than ++ * @c->max_write_size. Write @wbuf->size bytes to make sure the ++ * following writes are done in optimal @c->max_write_size ++ * chunks. ++ */ ++ dbg_io("write %d bytes to LEB %d:%d", ++ wbuf->size, wbuf->lnum, wbuf->offs); ++ err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs, ++ wbuf->size, wbuf->dtype); ++ if (err) ++ goto out; ++ ++ wbuf->offs += wbuf->size; ++ len -= wbuf->size; ++ aligned_len -= wbuf->size; ++ written += wbuf->size; ++ } + + /* +- * The remaining data may take more whole min. I/O units, so write the +- * remains multiple to min. I/O unit size directly to the flash media. ++ * The remaining data may take more whole max. write units, so write the ++ * remains multiple to max. write unit size directly to the flash media. + * We align node length to 8-byte boundary because we anyway flash wbuf + * if the remaining space is less than 8 bytes. + */ +- n = aligned_len >> c->min_io_shift; ++ n = aligned_len >> c->max_write_shift; + if (n) { +- n <<= c->min_io_shift; +- dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); +- err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, +- wbuf->dtype); ++ n <<= c->max_write_shift; ++ dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, ++ wbuf->offs); ++ err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, ++ wbuf->offs, n, wbuf->dtype); + if (err) + goto out; +- offs += n; ++ wbuf->offs += n; + aligned_len -= n; + len -= n; + written += n; +@@ -601,14 +696,17 @@ int ubifs_wbuf_write_nolock(struct ubifs + if (aligned_len) + /* + * And now we have what's left and what does not take whole +- * min. I/O unit, so write it to the write-buffer and we are ++ * max. write unit, so write it to the write-buffer and we are + * done. + */ + memcpy(wbuf->buf, buf + written, len); + +- wbuf->offs = offs; ++ if (c->leb_size - wbuf->offs >= c->max_write_size) ++ wbuf->size = c->max_write_size; ++ else ++ wbuf->size = c->leb_size - wbuf->offs; ++ wbuf->avail = wbuf->size - aligned_len; + wbuf->used = aligned_len; +- wbuf->avail = c->min_io_size - aligned_len; + wbuf->next_ino = 0; + spin_unlock(&wbuf->lock); + +@@ -660,8 +758,10 @@ int ubifs_write_node(struct ubifs_info * + buf_len); + ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); + ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); ++ ubifs_assert(!c->ro_media && !c->ro_mount); ++ ubifs_assert(!c->space_fixup); + +- if (c->ro_media) ++ if (c->ro_error) + return -EROFS; + + ubifs_prepare_node(c, buf, len, 1); +@@ -698,8 +798,8 @@ int ubifs_read_node_wbuf(struct ubifs_wb + int err, rlen, overlap; + struct ubifs_ch *ch = buf; + +- dbg_io("LEB %d:%d, %s, length %d, jhead %d", lnum, offs, +- dbg_ntype(type), len, wbuf->jhead); ++ dbg_io("LEB %d:%d, %s, length %d, jhead %s", lnum, offs, ++ dbg_ntype(type), len, dbg_jhead(wbuf->jhead)); + ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); + ubifs_assert(!(offs & 7) && offs < c->leb_size); + ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); +@@ -812,7 +912,8 @@ int ubifs_read_node(const struct ubifs_i + return 0; + + out: +- ubifs_err("bad node at LEB %d:%d", lnum, offs); ++ ubifs_err("bad node at LEB %d:%d, LEB mapping status %d", lnum, offs, ++ ubi_is_mapped(c->ubi, lnum)); + dbg_dump_node(c, buf); + dbg_dump_stack(); + return -EINVAL; +@@ -830,11 +931,11 @@ int ubifs_wbuf_init(struct ubifs_info *c + { + size_t size; + +- wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); ++ wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL); + if (!wbuf->buf) + return -ENOMEM; + +- size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); ++ size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); + wbuf->inodes = kmalloc(size, GFP_KERNEL); + if (!wbuf->inodes) { + kfree(wbuf->buf); +@@ -844,7 +945,14 @@ int ubifs_wbuf_init(struct ubifs_info *c + + wbuf->used = 0; + wbuf->lnum = wbuf->offs = -1; +- wbuf->avail = c->min_io_size; ++ /* ++ * If the LEB starts at the max. write size aligned address, then ++ * write-buffer size has to be set to @c->max_write_size. Otherwise, ++ * set it to something smaller so that it ends at the closest max. ++ * write size boundary. ++ */ ++ size = c->max_write_size - (c->leb_start % c->max_write_size); ++ wbuf->avail = wbuf->size = size; + wbuf->dtype = UBI_UNKNOWN; + wbuf->sync_callback = NULL; + mutex_init(&wbuf->io_mutex); +diff -uprN linux-2.6.28/fs/ubifs/journal.c ubifs-v2.6.28/fs/ubifs/journal.c +--- linux-2.6.28/fs/ubifs/journal.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/journal.c 2011-06-15 14:22:09.000000000 -0400 +@@ -114,7 +114,7 @@ static inline void zero_trun_node_unused + */ + static int reserve_space(struct ubifs_info *c, int jhead, int len) + { +- int err = 0, err1, retries = 0, avail, lnum, offs, free, squeeze; ++ int err = 0, err1, retries = 0, avail, lnum, offs, squeeze; + struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf; + + /* +@@ -122,11 +122,12 @@ static int reserve_space(struct ubifs_in + * better to try to allocate space at the ends of eraseblocks. This is + * what the squeeze parameter does. + */ ++ ubifs_assert(!c->ro_media && !c->ro_mount); + squeeze = (jhead == BASEHD); + again: + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + +- if (c->ro_media) { ++ if (c->ro_error) { + err = -EROFS; + goto out_unlock; + } +@@ -139,16 +140,9 @@ again: + * Write buffer wasn't seek'ed or there is no enough space - look for an + * LEB with some empty space. + */ +- lnum = ubifs_find_free_space(c, len, &free, squeeze); +- if (lnum >= 0) { +- /* Found an LEB, add it to the journal head */ +- offs = c->leb_size - free; +- err = ubifs_add_bud_to_log(c, jhead, lnum, offs); +- if (err) +- goto out_return; +- /* A new bud was successfully allocated and added to the log */ ++ lnum = ubifs_find_free_space(c, len, &offs, squeeze); ++ if (lnum >= 0) + goto out; +- } + + err = lnum; + if (err != -ENOSPC) +@@ -159,7 +153,7 @@ again: + * some. But the write-buffer mutex has to be unlocked because + * GC also takes it. + */ +- dbg_jnl("no free space jhead %d, run GC", jhead); ++ dbg_jnl("no free space in jhead %s, run GC", dbg_jhead(jhead)); + mutex_unlock(&wbuf->io_mutex); + + lnum = ubifs_garbage_collect(c, 0); +@@ -174,7 +168,8 @@ again: + * because we dropped @wbuf->io_mutex, so try once + * again. + */ +- dbg_jnl("GC couldn't make a free LEB for jhead %d", jhead); ++ dbg_jnl("GC couldn't make a free LEB for jhead %s", ++ dbg_jhead(jhead)); + if (retries++ < 2) { + dbg_jnl("retry (%d)", retries); + goto again; +@@ -185,13 +180,13 @@ again: + } + + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); +- dbg_jnl("got LEB %d for jhead %d", lnum, jhead); ++ dbg_jnl("got LEB %d for jhead %s", lnum, dbg_jhead(jhead)); + avail = c->leb_size - wbuf->offs - wbuf->used; + + if (wbuf->lnum != -1 && avail >= len) { + /* + * Someone else has switched the journal head and we have +- * enough space now. This happens when more then one process is ++ * enough space now. This happens when more than one process is + * trying to write to the same journal head at the same time. + */ + dbg_jnl("return LEB %d back, already have LEB %d:%d", +@@ -202,12 +197,23 @@ again: + return 0; + } + +- err = ubifs_add_bud_to_log(c, jhead, lnum, 0); +- if (err) +- goto out_return; + offs = 0; + + out: ++ /* ++ * Make sure we synchronize the write-buffer before we add the new bud ++ * to the log. Otherwise we may have a power cut after the log ++ * reference node for the last bud (@lnum) is written but before the ++ * write-buffer data are written to the next-to-last bud ++ * (@wbuf->lnum). And the effect would be that the recovery would see ++ * that there is corruption in the next-to-last bud. ++ */ ++ err = ubifs_wbuf_sync_nolock(wbuf); ++ if (err) ++ goto out_return; ++ err = ubifs_add_bud_to_log(c, jhead, lnum, offs); ++ if (err) ++ goto out_return; + err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype); + if (err) + goto out_unlock; +@@ -256,7 +262,8 @@ static int write_node(struct ubifs_info + *lnum = c->jheads[jhead].wbuf.lnum; + *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; + +- dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len); ++ dbg_jnl("jhead %s, LEB %d:%d, len %d", ++ dbg_jhead(jhead), *lnum, *offs, len); + ubifs_prepare_node(c, node, len, 0); + + return ubifs_wbuf_write_nolock(wbuf, node, len); +@@ -286,7 +293,8 @@ static int write_head(struct ubifs_info + + *lnum = c->jheads[jhead].wbuf.lnum; + *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; +- dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len); ++ dbg_jnl("jhead %s, LEB %d:%d, len %d", ++ dbg_jhead(jhead), *lnum, *offs, len); + + err = ubifs_wbuf_write_nolock(wbuf, buf, len); + if (err) +@@ -377,10 +385,8 @@ out: + if (err == -ENOSPC) { + /* This are some budgeting problems, print useful information */ + down_write(&c->commit_sem); +- spin_lock(&c->space_lock); + dbg_dump_stack(); +- dbg_dump_budg(c); +- spin_unlock(&c->space_lock); ++ dbg_dump_budg(c, &c->bi); + dbg_dump_lprops(c); + cmt_retries = dbg_check_lprops(c); + up_write(&c->commit_sem); +@@ -469,10 +475,7 @@ static void pack_inode(struct ubifs_info + ino->flags = cpu_to_le32(ui->flags); + ino->size = cpu_to_le64(ui->ui_size); + ino->nlink = cpu_to_le32(inode->i_nlink); +- if (ui->compr_type == UBIFS_COMPR_LZO999) +- ino->compr_type = cpu_to_le16(UBIFS_COMPR_LZO); +- else +- ino->compr_type = cpu_to_le16(ui->compr_type); ++ ino->compr_type = cpu_to_le16(ui->compr_type); + ino->data_len = cpu_to_le32(ui->data_len); + ino->xattr_cnt = cpu_to_le32(ui->xattr_cnt); + ino->xattr_size = cpu_to_le32(ui->xattr_size); +@@ -666,6 +669,7 @@ out_free: + + out_release: + release_head(c, BASEHD); ++ kfree(dent); + out_ro: + ubifs_ro_mode(c, err); + if (last_reference) +@@ -690,7 +694,7 @@ int ubifs_jnl_write_data(struct ubifs_in + { + struct ubifs_data_node *data; + int err, lnum, offs, compr_type, out_len; +- int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR; ++ int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1; + struct ubifs_inode *ui = ubifs_inode(inode); + + dbg_jnl("ino %lu, blk %u, len %d, key %s", +@@ -698,9 +702,19 @@ int ubifs_jnl_write_data(struct ubifs_in + DBGKEY(key)); + ubifs_assert(len <= UBIFS_BLOCK_SIZE); + +- data = kmalloc(dlen, GFP_NOFS); +- if (!data) +- return -ENOMEM; ++ data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN); ++ if (!data) { ++ /* ++ * Fall-back to the write reserve buffer. Note, we might be ++ * currently on the memory reclaim path, when the kernel is ++ * trying to free some memory by writing out dirty pages. The ++ * write reserve buffer helps us to guarantee that we are ++ * always able to write the data. ++ */ ++ allocated = 0; ++ mutex_lock(&c->write_reserve_mutex); ++ data = c->write_reserve_buf; ++ } + + data->ch.node_type = UBIFS_DATA_NODE; + key_write(c, key, &data->key); +@@ -736,7 +750,10 @@ int ubifs_jnl_write_data(struct ubifs_in + goto out_ro; + + finish_reservation(c); +- kfree(data); ++ if (!allocated) ++ mutex_unlock(&c->write_reserve_mutex); ++ else ++ kfree(data); + return 0; + + out_release: +@@ -745,7 +762,10 @@ out_ro: + ubifs_ro_mode(c, err); + finish_reservation(c); + out_free: +- kfree(data); ++ if (!allocated) ++ mutex_unlock(&c->write_reserve_mutex); ++ else ++ kfree(data); + return err; + } + +@@ -1369,7 +1389,7 @@ out_ro: + * @host: host inode + * + * This function writes the updated version of an extended attribute inode and +- * the host inode tho the journal (to the base head). The host inode is written ++ * the host inode to the journal (to the base head). The host inode is written + * after the extended attribute inode in order to guarantee that the extended + * attribute will be flushed when the inode is synchronized by 'fsync()' and + * consequently, the write-buffer is synchronized. This function returns zero +diff -uprN linux-2.6.28/fs/ubifs/Kconfig ubifs-v2.6.28/fs/ubifs/Kconfig +--- linux-2.6.28/fs/ubifs/Kconfig 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/fs/ubifs/Kconfig 2011-06-15 14:22:09.000000000 -0400 +@@ -40,33 +40,21 @@ config UBIFS_FS_ZLIB + depends on UBIFS_FS + default y + help +- Zlib copresses better then LZO but it is slower. Say 'Y' if unsure. ++ Zlib compresses better than LZO but it is slower. Say 'Y' if unsure. + + # Debugging-related stuff + config UBIFS_FS_DEBUG +- bool "Enable debugging" ++ bool "Enable debugging support" + depends on UBIFS_FS + select DEBUG_FS +- select KALLSYMS_ALL ++ select KALLSYMS + help +- This option enables UBIFS debugging. +- +-config UBIFS_FS_DEBUG_MSG_LVL +- int "Default message level (0 = no extra messages, 3 = lots)" +- depends on UBIFS_FS_DEBUG +- default "0" +- help +- This controls the amount of debugging messages produced by UBIFS. +- If reporting bugs, please try to have available a full dump of the +- messages at level 1 while the misbehaviour was occurring. Level 2 +- may become necessary if level 1 messages were not enough to find the +- bug. Generally Level 3 should be avoided. +- +-config UBIFS_FS_DEBUG_CHKS +- bool "Enable extra checks" +- depends on UBIFS_FS_DEBUG +- help +- If extra checks are enabled UBIFS will check the consistency of its +- internal data structures during operation. However, UBIFS performance +- is dramatically slower when this option is selected especially if the +- file system is large. ++ This option enables UBIFS debugging support. It makes sure various ++ assertions, self-checks, debugging messages and test modes are compiled ++ in (this all is compiled out otherwise). Assertions are light-weight ++ and this option also enables them. Self-checks, debugging messages and ++ test modes are switched off by default. Thus, it is safe and actually ++ recommended to have debugging support enabled, and it should not slow ++ down UBIFS. You can then further enable / disable individual debugging ++ features using UBIFS module parameters and the corresponding sysfs ++ interfaces. +diff -uprN linux-2.6.28/fs/ubifs/key.h ubifs-v2.6.28/fs/ubifs/key.h +--- linux-2.6.28/fs/ubifs/key.h 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/key.h 2011-06-15 14:22:09.000000000 -0400 +@@ -229,23 +229,6 @@ static inline void xent_key_init(const s + } + + /** +- * xent_key_init_hash - initialize extended attribute entry key without +- * re-calculating hash function. +- * @c: UBIFS file-system description object +- * @key: key to initialize +- * @inum: host inode number +- * @hash: extended attribute entry name hash +- */ +-static inline void xent_key_init_hash(const struct ubifs_info *c, +- union ubifs_key *key, ino_t inum, +- uint32_t hash) +-{ +- ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); +- key->u32[0] = inum; +- key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS); +-} +- +-/** + * xent_key_init_flash - initialize on-flash extended attribute entry key. + * @c: UBIFS file-system description object + * @k: key to initialize +@@ -295,22 +278,15 @@ static inline void data_key_init(const s + } + + /** +- * data_key_init_flash - initialize on-flash data key. ++ * highest_data_key - get the highest possible data key for an inode. + * @c: UBIFS file-system description object +- * @k: key to initialize ++ * @key: key to initialize + * @inum: inode number +- * @block: block number + */ +-static inline void data_key_init_flash(const struct ubifs_info *c, void *k, +- ino_t inum, unsigned int block) ++static inline void highest_data_key(const struct ubifs_info *c, ++ union ubifs_key *key, ino_t inum) + { +- union ubifs_key *key = k; +- +- ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK)); +- key->j32[0] = cpu_to_le32(inum); +- key->j32[1] = cpu_to_le32(block | +- (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS)); +- memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); ++ data_key_init(c, key, inum, UBIFS_S_KEY_BLOCK_MASK); + } + + /** +@@ -330,6 +306,20 @@ static inline void trun_key_init(const s + } + + /** ++ * invalid_key_init - initialize invalid node key. ++ * @c: UBIFS file-system description object ++ * @key: key to initialize ++ * ++ * This is a helper function which marks a @key object as invalid. ++ */ ++static inline void invalid_key_init(const struct ubifs_info *c, ++ union ubifs_key *key) ++{ ++ key->u32[0] = 0xDEADBEAF; ++ key->u32[1] = UBIFS_INVALID_KEY; ++} ++ ++/** + * key_type - get key type. + * @c: UBIFS file-system description object + * @key: key to get type of +@@ -381,8 +371,8 @@ static inline ino_t key_inum_flash(const + * @c: UBIFS file-system description object + * @key: the key to get hash from + */ +-static inline int key_hash(const struct ubifs_info *c, +- const union ubifs_key *key) ++static inline uint32_t key_hash(const struct ubifs_info *c, ++ const union ubifs_key *key) + { + return key->u32[1] & UBIFS_S_KEY_HASH_MASK; + } +@@ -392,7 +382,7 @@ static inline int key_hash(const struct + * @c: UBIFS file-system description object + * @k: the key to get hash from + */ +-static inline int key_hash_flash(const struct ubifs_info *c, const void *k) ++static inline uint32_t key_hash_flash(const struct ubifs_info *c, const void *k) + { + const union ubifs_key *key = k; + +@@ -554,4 +544,5 @@ static inline unsigned long long key_max + return 0; + } + } ++ + #endif /* !__UBIFS_KEY_H__ */ +diff -uprN linux-2.6.28/fs/ubifs/log.c ubifs-v2.6.28/fs/ubifs/log.c +--- linux-2.6.28/fs/ubifs/log.c 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/fs/ubifs/log.c 2011-06-15 14:22:09.000000000 -0400 +@@ -100,20 +100,6 @@ struct ubifs_wbuf *ubifs_get_wbuf(struct + } + + /** +- * next_log_lnum - switch to the next log LEB. +- * @c: UBIFS file-system description object +- * @lnum: current log LEB +- */ +-static inline int next_log_lnum(const struct ubifs_info *c, int lnum) +-{ +- lnum += 1; +- if (lnum > c->log_last) +- lnum = UBIFS_LOG_LNUM; +- +- return lnum; +-} +- +-/** + * empty_log_bytes - calculate amount of empty space in the log. + * @c: UBIFS file-system description object + */ +@@ -159,7 +145,7 @@ void ubifs_add_bud(struct ubifs_info *c, + jhead = &c->jheads[bud->jhead]; + list_add_tail(&bud->list, &jhead->buds_list); + } else +- ubifs_assert(c->replaying && (c->vfs_sb->s_flags & MS_RDONLY)); ++ ubifs_assert(c->replaying && c->ro_mount); + + /* + * Note, although this is a new bud, we anyway account this space now, +@@ -169,28 +155,8 @@ void ubifs_add_bud(struct ubifs_info *c, + */ + c->bud_bytes += c->leb_size - bud->start; + +- dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum, +- bud->start, bud->jhead, c->bud_bytes); +- spin_unlock(&c->buds_lock); +-} +- +-/** +- * ubifs_create_buds_lists - create journal head buds lists for remount rw. +- * @c: UBIFS file-system description object +- */ +-void ubifs_create_buds_lists(struct ubifs_info *c) +-{ +- struct rb_node *p; +- +- spin_lock(&c->buds_lock); +- p = rb_first(&c->buds); +- while (p) { +- struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb); +- struct ubifs_jhead *jhead = &c->jheads[bud->jhead]; +- +- list_add_tail(&bud->list, &jhead->buds_list); +- p = rb_next(p); +- } ++ dbg_log("LEB %d:%d, jhead %s, bud_bytes %lld", bud->lnum, ++ bud->start, dbg_jhead(bud->jhead), c->bud_bytes); + spin_unlock(&c->buds_lock); + } + +@@ -223,8 +189,8 @@ int ubifs_add_bud_to_log(struct ubifs_in + } + + mutex_lock(&c->log_mutex); +- +- if (c->ro_media) { ++ ubifs_assert(!c->ro_media && !c->ro_mount); ++ if (c->ro_error) { + err = -EROFS; + goto out_unlock; + } +@@ -239,7 +205,7 @@ int ubifs_add_bud_to_log(struct ubifs_in + } + + /* +- * Make sure the the amount of space in buds will not exceed ++ * Make sure the amount of space in buds will not exceed the + * 'c->max_bud_bytes' limit, because we want to guarantee mount time + * limits. + * +@@ -277,7 +243,7 @@ int ubifs_add_bud_to_log(struct ubifs_in + ref->jhead = cpu_to_le32(jhead); + + if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { +- c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); ++ c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); + c->lhead_offs = 0; + } + +@@ -355,19 +321,18 @@ static void remove_buds(struct ubifs_inf + * heads (non-closed buds). + */ + c->cmt_bud_bytes += wbuf->offs - bud->start; +- dbg_log("preserve %d:%d, jhead %d, bud bytes %d, " ++ dbg_log("preserve %d:%d, jhead %s, bud bytes %d, " + "cmt_bud_bytes %lld", bud->lnum, bud->start, +- bud->jhead, wbuf->offs - bud->start, ++ dbg_jhead(bud->jhead), wbuf->offs - bud->start, + c->cmt_bud_bytes); + bud->start = wbuf->offs; + } else { + c->cmt_bud_bytes += c->leb_size - bud->start; +- dbg_log("remove %d:%d, jhead %d, bud bytes %d, " ++ dbg_log("remove %d:%d, jhead %s, bud bytes %d, " + "cmt_bud_bytes %lld", bud->lnum, bud->start, +- bud->jhead, c->leb_size - bud->start, ++ dbg_jhead(bud->jhead), c->leb_size - bud->start, + c->cmt_bud_bytes); + rb_erase(p1, &c->buds); +- list_del(&bud->list); + /* + * If the commit does not finish, the recovery will need + * to replay the journal, in which case the old buds +@@ -375,7 +340,7 @@ static void remove_buds(struct ubifs_inf + * commit i.e. do not allow them to be garbage + * collected. + */ +- list_add(&bud->list, &c->old_buds); ++ list_move(&bud->list, &c->old_buds); + } + } + spin_unlock(&c->buds_lock); +@@ -430,7 +395,8 @@ int ubifs_log_start_commit(struct ubifs_ + if (lnum == -1 || offs == c->leb_size) + continue; + +- dbg_log("add ref to LEB %d:%d for jhead %d", lnum, offs, i); ++ dbg_log("add ref to LEB %d:%d for jhead %s", ++ lnum, offs, dbg_jhead(i)); + ref = buf + len; + ref->ch.node_type = UBIFS_REF_NODE; + ref->lnum = cpu_to_le32(lnum); +@@ -445,7 +411,7 @@ int ubifs_log_start_commit(struct ubifs_ + + /* Switch to the next log LEB */ + if (c->lhead_offs) { +- c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); ++ c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); + c->lhead_offs = 0; + } + +@@ -466,7 +432,7 @@ int ubifs_log_start_commit(struct ubifs_ + + c->lhead_offs += len; + if (c->lhead_offs == c->leb_size) { +- c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); ++ c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); + c->lhead_offs = 0; + } + +@@ -553,7 +519,7 @@ int ubifs_log_post_commit(struct ubifs_i + } + mutex_lock(&c->log_mutex); + for (lnum = old_ltail_lnum; lnum != c->ltail_lnum; +- lnum = next_log_lnum(c, lnum)) { ++ lnum = ubifs_next_log_lnum(c, lnum)) { + dbg_log("unmap log LEB %d", lnum); + err = ubifs_leb_unmap(c, lnum); + if (err) +@@ -662,7 +628,7 @@ static int add_node(struct ubifs_info *c + err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM); + if (err) + return err; +- *lnum = next_log_lnum(c, *lnum); ++ *lnum = ubifs_next_log_lnum(c, *lnum); + *offs = 0; + } + memcpy(buf + *offs, node, len); +@@ -696,7 +662,7 @@ int ubifs_consolidate_log(struct ubifs_i + lnum = c->ltail_lnum; + write_lnum = lnum; + while (1) { +- sleb = ubifs_scan(c, lnum, 0, c->sbuf); ++ sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0); + if (IS_ERR(sleb)) { + err = PTR_ERR(sleb); + goto out_free; +@@ -732,7 +698,7 @@ int ubifs_consolidate_log(struct ubifs_i + ubifs_scan_destroy(sleb); + if (lnum == c->lhead_lnum) + break; +- lnum = next_log_lnum(c, lnum); ++ lnum = ubifs_next_log_lnum(c, lnum); + } + if (offs) { + int sz = ALIGN(offs, c->min_io_size); +@@ -752,7 +718,7 @@ int ubifs_consolidate_log(struct ubifs_i + /* Unmap remaining LEBs */ + lnum = write_lnum; + do { +- lnum = next_log_lnum(c, lnum); ++ lnum = ubifs_next_log_lnum(c, lnum); + err = ubifs_leb_unmap(c, lnum); + if (err) + return err; +diff -uprN linux-2.6.28/fs/ubifs/lprops.c ubifs-v2.6.28/fs/ubifs/lprops.c +--- linux-2.6.28/fs/ubifs/lprops.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/lprops.c 2011-06-15 14:22:09.000000000 -0400 +@@ -281,7 +281,7 @@ void ubifs_add_to_cat(struct ubifs_info + case LPROPS_FREE: + if (add_to_lpt_heap(c, lprops, cat)) + break; +- /* No more room on heap so make it uncategorized */ ++ /* No more room on heap so make it un-categorized */ + cat = LPROPS_UNCAT; + /* Fall through */ + case LPROPS_UNCAT: +@@ -375,8 +375,8 @@ void ubifs_replace_cat(struct ubifs_info + * @lprops: LEB properties + * + * A LEB may have fallen off of the bottom of a heap, and ended up as +- * uncategorized even though it has enough space for us now. If that is the case +- * this function will put the LEB back onto a heap. ++ * un-categorized even though it has enough space for us now. If that is the ++ * case this function will put the LEB back onto a heap. + */ + void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops) + { +@@ -436,10 +436,10 @@ int ubifs_categorize_lprops(const struct + /** + * change_category - change LEB properties category. + * @c: UBIFS file-system description object +- * @lprops: LEB properties to recategorize ++ * @lprops: LEB properties to re-categorize + * + * LEB properties are categorized to enable fast find operations. When the LEB +- * properties change they must be recategorized. ++ * properties change they must be re-categorized. + */ + static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) + { +@@ -461,21 +461,18 @@ static void change_category(struct ubifs + } + + /** +- * calc_dark - calculate LEB dark space size. ++ * ubifs_calc_dark - calculate LEB dark space size. + * @c: the UBIFS file-system description object + * @spc: amount of free and dirty space in the LEB + * +- * This function calculates amount of dark space in an LEB which has @spc bytes +- * of free and dirty space. Returns the calculations result. ++ * This function calculates and returns amount of dark space in an LEB which ++ * has @spc bytes of free and dirty space. + * +- * Dark space is the space which is not always usable - it depends on which +- * nodes are written in which order. E.g., if an LEB has only 512 free bytes, +- * it is dark space, because it cannot fit a large data node. So UBIFS cannot +- * count on this LEB and treat these 512 bytes as usable because it is not true +- * if, for example, only big chunks of uncompressible data will be written to +- * the FS. ++ * UBIFS is trying to account the space which might not be usable, and this ++ * space is called "dark space". For example, if an LEB has only %512 free ++ * bytes, it is dark space, because it cannot fit a large data node. + */ +-static int calc_dark(struct ubifs_info *c, int spc) ++int ubifs_calc_dark(const struct ubifs_info *c, int spc) + { + ubifs_assert(!(spc & 7)); + +@@ -518,7 +515,7 @@ static int is_lprops_dirty(struct ubifs_ + * @free: new free space amount + * @dirty: new dirty space amount + * @flags: new flags +- * @idx_gc_cnt: change to the count of idx_gc list ++ * @idx_gc_cnt: change to the count of @idx_gc list + * + * This function changes LEB properties (@free, @dirty or @flag). However, the + * property which has the %LPROPS_NC value is not changed. Returns a pointer to +@@ -535,7 +532,7 @@ const struct ubifs_lprops *ubifs_change_ + { + /* + * This is the only function that is allowed to change lprops, so we +- * discard the const qualifier. ++ * discard the "const" qualifier. + */ + struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp; + +@@ -575,7 +572,7 @@ const struct ubifs_lprops *ubifs_change_ + if (old_spc < c->dead_wm) + c->lst.total_dead -= old_spc; + else +- c->lst.total_dark -= calc_dark(c, old_spc); ++ c->lst.total_dark -= ubifs_calc_dark(c, old_spc); + + c->lst.total_used -= c->leb_size - old_spc; + } +@@ -616,7 +613,7 @@ const struct ubifs_lprops *ubifs_change_ + if (new_spc < c->dead_wm) + c->lst.total_dead += new_spc; + else +- c->lst.total_dark += calc_dark(c, new_spc); ++ c->lst.total_dark += ubifs_calc_dark(c, new_spc); + + c->lst.total_used += c->leb_size - new_spc; + } +@@ -1010,21 +1007,11 @@ out: + } + + /** +- * struct scan_check_data - data provided to scan callback function. +- * @lst: LEB properties statistics +- * @err: error code +- */ +-struct scan_check_data { +- struct ubifs_lp_stats lst; +- int err; +-}; +- +-/** + * scan_check_cb - scan callback. + * @c: the UBIFS file-system description object + * @lp: LEB properties to scan + * @in_tree: whether the LEB properties are in main memory +- * @data: information passed to and from the caller of the scan ++ * @lst: lprops statistics to update + * + * This function returns a code that indicates whether the scan should continue + * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree +@@ -1033,12 +1020,12 @@ struct scan_check_data { + */ + static int scan_check_cb(struct ubifs_info *c, + const struct ubifs_lprops *lp, int in_tree, +- struct scan_check_data *data) ++ struct ubifs_lp_stats *lst) + { + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; +- struct ubifs_lp_stats *lst = &data->lst; +- int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty; ++ int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret; ++ void *buf = NULL; + + cat = lp->flags & LPROPS_CAT_MASK; + if (cat != LPROPS_UNCAT) { +@@ -1046,7 +1033,7 @@ static int scan_check_cb(struct ubifs_in + if (cat != (lp->flags & LPROPS_CAT_MASK)) { + ubifs_err("bad LEB category %d expected %d", + (lp->flags & LPROPS_CAT_MASK), cat); +- goto out; ++ return -EINVAL; + } + } + +@@ -1080,7 +1067,7 @@ static int scan_check_cb(struct ubifs_in + } + if (!found) { + ubifs_err("bad LPT list (category %d)", cat); +- goto out; ++ return -EINVAL; + } + } + } +@@ -1092,36 +1079,40 @@ static int scan_check_cb(struct ubifs_in + if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || + lp != heap->arr[lp->hpos]) { + ubifs_err("bad LPT heap (category %d)", cat); +- goto out; ++ return -EINVAL; + } + } + +- sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); ++ buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); ++ if (!buf) ++ return -ENOMEM; ++ ++ /* ++ * After an unclean unmount, empty and freeable LEBs ++ * may contain garbage - do not scan them. ++ */ ++ if (lp->free == c->leb_size) { ++ lst->empty_lebs += 1; ++ lst->total_free += c->leb_size; ++ lst->total_dark += ubifs_calc_dark(c, c->leb_size); ++ return LPT_SCAN_CONTINUE; ++ } ++ if (lp->free + lp->dirty == c->leb_size && ++ !(lp->flags & LPROPS_INDEX)) { ++ lst->total_free += lp->free; ++ lst->total_dirty += lp->dirty; ++ lst->total_dark += ubifs_calc_dark(c, c->leb_size); ++ return LPT_SCAN_CONTINUE; ++ } ++ ++ sleb = ubifs_scan(c, lnum, 0, buf, 0); + if (IS_ERR(sleb)) { +- /* +- * After an unclean unmount, empty and freeable LEBs +- * may contain garbage. +- */ +- if (lp->free == c->leb_size) { +- ubifs_err("scan errors were in empty LEB " +- "- continuing checking"); +- lst->empty_lebs += 1; +- lst->total_free += c->leb_size; +- lst->total_dark += calc_dark(c, c->leb_size); +- return LPT_SCAN_CONTINUE; +- } +- +- if (lp->free + lp->dirty == c->leb_size && +- !(lp->flags & LPROPS_INDEX)) { +- ubifs_err("scan errors were in freeable LEB " +- "- continuing checking"); +- lst->total_free += lp->free; +- lst->total_dirty += lp->dirty; +- lst->total_dark += calc_dark(c, c->leb_size); +- return LPT_SCAN_CONTINUE; ++ ret = PTR_ERR(sleb); ++ if (ret == -EUCLEAN) { ++ dbg_dump_lprops(c); ++ dbg_dump_budg(c, &c->bi); + } +- data->err = PTR_ERR(sleb); +- return LPT_SCAN_STOP; ++ goto out; + } + + is_idx = -1; +@@ -1235,10 +1226,11 @@ static int scan_check_cb(struct ubifs_in + if (spc < c->dead_wm) + lst->total_dead += spc; + else +- lst->total_dark += calc_dark(c, spc); ++ lst->total_dark += ubifs_calc_dark(c, spc); + } + + ubifs_scan_destroy(sleb); ++ vfree(buf); + return LPT_SCAN_CONTINUE; + + out_print: +@@ -1248,9 +1240,10 @@ out_print: + dbg_dump_leb(c, lnum); + out_destroy: + ubifs_scan_destroy(sleb); ++ ret = -EINVAL; + out: +- data->err = -EINVAL; +- return LPT_SCAN_STOP; ++ vfree(buf); ++ return ret; + } + + /** +@@ -1267,8 +1260,7 @@ out: + int dbg_check_lprops(struct ubifs_info *c) + { + int i, err; +- struct scan_check_data data; +- struct ubifs_lp_stats *lst = &data.lst; ++ struct ubifs_lp_stats lst; + + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + return 0; +@@ -1283,29 +1275,23 @@ int dbg_check_lprops(struct ubifs_info * + return err; + } + +- memset(lst, 0, sizeof(struct ubifs_lp_stats)); +- +- data.err = 0; ++ memset(&lst, 0, sizeof(struct ubifs_lp_stats)); + err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, + (ubifs_lpt_scan_callback)scan_check_cb, +- &data); ++ &lst); + if (err && err != -ENOSPC) + goto out; +- if (data.err) { +- err = data.err; +- goto out; +- } + +- if (lst->empty_lebs != c->lst.empty_lebs || +- lst->idx_lebs != c->lst.idx_lebs || +- lst->total_free != c->lst.total_free || +- lst->total_dirty != c->lst.total_dirty || +- lst->total_used != c->lst.total_used) { ++ if (lst.empty_lebs != c->lst.empty_lebs || ++ lst.idx_lebs != c->lst.idx_lebs || ++ lst.total_free != c->lst.total_free || ++ lst.total_dirty != c->lst.total_dirty || ++ lst.total_used != c->lst.total_used) { + ubifs_err("bad overall accounting"); + ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " + "total_free %lld, total_dirty %lld, total_used %lld", +- lst->empty_lebs, lst->idx_lebs, lst->total_free, +- lst->total_dirty, lst->total_used); ++ lst.empty_lebs, lst.idx_lebs, lst.total_free, ++ lst.total_dirty, lst.total_used); + ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " + "total_free %lld, total_dirty %lld, total_used %lld", + c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, +@@ -1314,11 +1300,11 @@ int dbg_check_lprops(struct ubifs_info * + goto out; + } + +- if (lst->total_dead != c->lst.total_dead || +- lst->total_dark != c->lst.total_dark) { ++ if (lst.total_dead != c->lst.total_dead || ++ lst.total_dark != c->lst.total_dark) { + ubifs_err("bad dead/dark space accounting"); + ubifs_err("calculated: total_dead %lld, total_dark %lld", +- lst->total_dead, lst->total_dark); ++ lst.total_dead, lst.total_dark); + ubifs_err("read from lprops: total_dead %lld, total_dark %lld", + c->lst.total_dead, c->lst.total_dark); + err = -EINVAL; +diff -uprN linux-2.6.28/fs/ubifs/lpt.c ubifs-v2.6.28/fs/ubifs/lpt.c +--- linux-2.6.28/fs/ubifs/lpt.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/lpt.c 2011-06-15 14:22:09.000000000 -0400 +@@ -1269,10 +1269,9 @@ static int read_pnode(struct ubifs_info + lnum = branch->lnum; + offs = branch->offs; + pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS); +- if (!pnode) { +- err = -ENOMEM; +- goto out; +- } ++ if (!pnode) ++ return -ENOMEM; ++ + if (lnum == 0) { + /* + * This pnode was not written which just means that the LEB +@@ -1362,6 +1361,7 @@ static int read_lsave(struct ubifs_info + goto out; + for (i = 0; i < c->lsave_cnt; i++) { + int lnum = c->lsave[i]; ++ struct ubifs_lprops *lprops; + + /* + * Due to automatic resizing, the values in the lsave table +@@ -1369,7 +1369,11 @@ static int read_lsave(struct ubifs_info + */ + if (lnum >= c->leb_cnt) + continue; +- ubifs_lpt_lookup(c, lnum); ++ lprops = ubifs_lpt_lookup(c, lnum); ++ if (IS_ERR(lprops)) { ++ err = PTR_ERR(lprops); ++ goto out; ++ } + } + out: + vfree(buf); +@@ -1456,13 +1460,13 @@ struct ubifs_lprops *ubifs_lpt_lookup(st + shft -= UBIFS_LPT_FANOUT_SHIFT; + nnode = ubifs_get_nnode(c, nnode, iip); + if (IS_ERR(nnode)) +- return ERR_PTR(PTR_ERR(nnode)); ++ return ERR_CAST(nnode); + } + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + pnode = ubifs_get_pnode(c, nnode, iip); + if (IS_ERR(pnode)) +- return ERR_PTR(PTR_ERR(pnode)); ++ return ERR_CAST(pnode); + iip = (i & (UBIFS_LPT_FANOUT - 1)); + dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, + pnode->lprops[iip].free, pnode->lprops[iip].dirty, +@@ -1585,7 +1589,7 @@ struct ubifs_lprops *ubifs_lpt_lookup_di + nnode = c->nroot; + nnode = dirty_cow_nnode(c, nnode); + if (IS_ERR(nnode)) +- return ERR_PTR(PTR_ERR(nnode)); ++ return ERR_CAST(nnode); + i = lnum - c->main_first; + shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; + for (h = 1; h < c->lpt_hght; h++) { +@@ -1593,19 +1597,19 @@ struct ubifs_lprops *ubifs_lpt_lookup_di + shft -= UBIFS_LPT_FANOUT_SHIFT; + nnode = ubifs_get_nnode(c, nnode, iip); + if (IS_ERR(nnode)) +- return ERR_PTR(PTR_ERR(nnode)); ++ return ERR_CAST(nnode); + nnode = dirty_cow_nnode(c, nnode); + if (IS_ERR(nnode)) +- return ERR_PTR(PTR_ERR(nnode)); ++ return ERR_CAST(nnode); + } + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + shft -= UBIFS_LPT_FANOUT_SHIFT; + pnode = ubifs_get_pnode(c, nnode, iip); + if (IS_ERR(pnode)) +- return ERR_PTR(PTR_ERR(pnode)); ++ return ERR_CAST(pnode); + pnode = dirty_cow_pnode(c, pnode); + if (IS_ERR(pnode)) +- return ERR_PTR(PTR_ERR(pnode)); ++ return ERR_CAST(pnode); + iip = (i & (UBIFS_LPT_FANOUT - 1)); + dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, + pnode->lprops[iip].free, pnode->lprops[iip].dirty, +diff -uprN linux-2.6.28/fs/ubifs/lpt_commit.c ubifs-v2.6.28/fs/ubifs/lpt_commit.c +--- linux-2.6.28/fs/ubifs/lpt_commit.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/lpt_commit.c 2011-06-15 14:22:09.000000000 -0400 +@@ -28,6 +28,12 @@ + #include + #include "ubifs.h" + ++#ifdef CONFIG_UBIFS_FS_DEBUG ++static int dbg_populate_lsave(struct ubifs_info *c); ++#else ++#define dbg_populate_lsave(c) 0 ++#endif ++ + /** + * first_dirty_cnode - find first dirty cnode. + * @c: UBIFS file-system description object +@@ -585,7 +591,7 @@ static struct ubifs_pnode *next_pnode_to + if (nnode->nbranch[iip].lnum) + break; + } +- } while (iip >= UBIFS_LPT_FANOUT); ++ } while (iip >= UBIFS_LPT_FANOUT); + + /* Go right */ + nnode = ubifs_get_nnode(c, nnode, iip); +@@ -645,7 +651,7 @@ static struct ubifs_pnode *pnode_lookup( + shft -= UBIFS_LPT_FANOUT_SHIFT; + nnode = ubifs_get_nnode(c, nnode, iip); + if (IS_ERR(nnode)) +- return ERR_PTR(PTR_ERR(nnode)); ++ return ERR_CAST(nnode); + } + iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); + return ubifs_get_pnode(c, nnode, iip); +@@ -704,6 +710,9 @@ static int make_tree_dirty(struct ubifs_ + struct ubifs_pnode *pnode; + + pnode = pnode_lookup(c, 0); ++ if (IS_ERR(pnode)) ++ return PTR_ERR(pnode); ++ + while (pnode) { + do_make_pnode_dirty(c, pnode); + pnode = next_pnode_to_dirty(c, pnode); +@@ -811,6 +820,10 @@ static void populate_lsave(struct ubifs_ + c->lpt_drty_flgs |= LSAVE_DIRTY; + ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); + } ++ ++ if (dbg_populate_lsave(c)) ++ return; ++ + list_for_each_entry(lprops, &c->empty_list, list) { + c->lsave[cnt++] = lprops->lnum; + if (cnt >= c->lsave_cnt) +@@ -1624,29 +1637,35 @@ static int dbg_check_ltab_lnum(struct ub + { + int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len; + int ret; +- void *buf = c->dbg->buf; ++ void *buf, *p; + + if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) + return 0; + ++ buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); ++ if (!buf) { ++ ubifs_err("cannot allocate memory for ltab checking"); ++ return 0; ++ } ++ + dbg_lp("LEB %d", lnum); + err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); + if (err) { + dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err); +- return err; ++ goto out; + } + while (1) { +- if (!is_a_node(c, buf, len)) { ++ if (!is_a_node(c, p, len)) { + int i, pad_len; + +- pad_len = get_pad_len(c, buf, len); ++ pad_len = get_pad_len(c, p, len); + if (pad_len) { +- buf += pad_len; ++ p += pad_len; + len -= pad_len; + dirty += pad_len; + continue; + } +- if (!dbg_is_all_ff(buf, len)) { ++ if (!dbg_is_all_ff(p, len)) { + dbg_msg("invalid empty space in LEB %d at %d", + lnum, c->leb_size - len); + err = -EINVAL; +@@ -1664,16 +1683,21 @@ static int dbg_check_ltab_lnum(struct ub + lnum, dirty, c->ltab[i].dirty); + err = -EINVAL; + } +- return err; ++ goto out; + } +- node_type = get_lpt_node_type(c, buf, &node_num); ++ node_type = get_lpt_node_type(c, p, &node_num); + node_len = get_lpt_node_len(c, node_type); + ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len); + if (ret == 1) + dirty += node_len; +- buf += node_len; ++ p += node_len; + len -= node_len; + } ++ ++ err = 0; ++out: ++ vfree(buf); ++ return err; + } + + /** +@@ -1866,25 +1890,31 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, + static void dump_lpt_leb(const struct ubifs_info *c, int lnum) + { + int err, len = c->leb_size, node_type, node_num, node_len, offs; +- void *buf = c->dbg->buf; ++ void *buf, *p; + + printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", + current->pid, lnum); ++ buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); ++ if (!buf) { ++ ubifs_err("cannot allocate memory to dump LPT"); ++ return; ++ } ++ + err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); + if (err) { + ubifs_err("cannot read LEB %d, error %d", lnum, err); +- return; ++ goto out; + } + while (1) { + offs = c->leb_size - len; +- if (!is_a_node(c, buf, len)) { ++ if (!is_a_node(c, p, len)) { + int pad_len; + +- pad_len = get_pad_len(c, buf, len); ++ pad_len = get_pad_len(c, p, len); + if (pad_len) { + printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n", + lnum, offs, pad_len); +- buf += pad_len; ++ p += pad_len; + len -= pad_len; + continue; + } +@@ -1894,7 +1924,7 @@ static void dump_lpt_leb(const struct ub + break; + } + +- node_type = get_lpt_node_type(c, buf, &node_num); ++ node_type = get_lpt_node_type(c, p, &node_num); + switch (node_type) { + case UBIFS_LPT_PNODE: + { +@@ -1919,14 +1949,14 @@ static void dump_lpt_leb(const struct ub + else + printk(KERN_DEBUG "LEB %d:%d, nnode, ", + lnum, offs); +- err = ubifs_unpack_nnode(c, buf, &nnode); ++ err = ubifs_unpack_nnode(c, p, &nnode); + for (i = 0; i < UBIFS_LPT_FANOUT; i++) { +- printk("%d:%d", nnode.nbranch[i].lnum, ++ printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum, + nnode.nbranch[i].offs); + if (i != UBIFS_LPT_FANOUT - 1) +- printk(", "); ++ printk(KERN_CONT ", "); + } +- printk("\n"); ++ printk(KERN_CONT "\n"); + break; + } + case UBIFS_LPT_LTAB: +@@ -1940,15 +1970,18 @@ static void dump_lpt_leb(const struct ub + break; + default: + ubifs_err("LPT node type %d not recognized", node_type); +- return; ++ goto out; + } + +- buf += node_len; ++ p += node_len; + len -= node_len; + } + + printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", + current->pid, lnum); ++out: ++ vfree(buf); ++ return; + } + + /** +@@ -1970,4 +2003,47 @@ void dbg_dump_lpt_lebs(const struct ubif + current->pid); + } + ++/** ++ * dbg_populate_lsave - debugging version of 'populate_lsave()' ++ * @c: UBIFS file-system description object ++ * ++ * This is a debugging version for 'populate_lsave()' which populates lsave ++ * with random LEBs instead of useful LEBs, which is good for test coverage. ++ * Returns zero if lsave has not been populated (this debugging feature is ++ * disabled) an non-zero if lsave has been populated. ++ */ ++static int dbg_populate_lsave(struct ubifs_info *c) ++{ ++ struct ubifs_lprops *lprops; ++ struct ubifs_lpt_heap *heap; ++ int i; ++ ++ if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ return 0; ++ if (random32() & 3) ++ return 0; ++ ++ for (i = 0; i < c->lsave_cnt; i++) ++ c->lsave[i] = c->main_first; ++ ++ list_for_each_entry(lprops, &c->empty_list, list) ++ c->lsave[random32() % c->lsave_cnt] = lprops->lnum; ++ list_for_each_entry(lprops, &c->freeable_list, list) ++ c->lsave[random32() % c->lsave_cnt] = lprops->lnum; ++ list_for_each_entry(lprops, &c->frdi_idx_list, list) ++ c->lsave[random32() % c->lsave_cnt] = lprops->lnum; ++ ++ heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; ++ for (i = 0; i < heap->cnt; i++) ++ c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; ++ heap = &c->lpt_heap[LPROPS_DIRTY - 1]; ++ for (i = 0; i < heap->cnt; i++) ++ c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; ++ heap = &c->lpt_heap[LPROPS_FREE - 1]; ++ for (i = 0; i < heap->cnt; i++) ++ c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; ++ ++ return 1; ++} ++ + #endif /* CONFIG_UBIFS_FS_DEBUG */ +diff -uprN linux-2.6.28/fs/ubifs/master.c ubifs-v2.6.28/fs/ubifs/master.c +--- linux-2.6.28/fs/ubifs/master.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/master.c 2011-06-15 14:22:09.000000000 -0400 +@@ -29,7 +29,8 @@ + * @c: UBIFS file-system description object + * + * This function scans the master node LEBs and search for the latest master +- * node. Returns zero in case of success and a negative error code in case of ++ * node. Returns zero in case of success, %-EUCLEAN if there master area is ++ * corrupted and requires recovery, and a negative error code in case of + * failure. + */ + static int scan_for_master(struct ubifs_info *c) +@@ -40,7 +41,7 @@ static int scan_for_master(struct ubifs_ + + lnum = UBIFS_MST_LNUM; + +- sleb = ubifs_scan(c, lnum, 0, c->sbuf); ++ sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + nodes_cnt = sleb->nodes_cnt; +@@ -48,7 +49,7 @@ static int scan_for_master(struct ubifs_ + snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, + list); + if (snod->type != UBIFS_MST_NODE) +- goto out; ++ goto out_dump; + memcpy(c->mst_node, snod->node, snod->len); + offs = snod->offs; + } +@@ -56,7 +57,7 @@ static int scan_for_master(struct ubifs_ + + lnum += 1; + +- sleb = ubifs_scan(c, lnum, 0, c->sbuf); ++ sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + if (sleb->nodes_cnt != nodes_cnt) +@@ -65,7 +66,7 @@ static int scan_for_master(struct ubifs_ + goto out; + snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list); + if (snod->type != UBIFS_MST_NODE) +- goto out; ++ goto out_dump; + if (snod->offs != offs) + goto out; + if (memcmp((void *)c->mst_node + UBIFS_CH_SZ, +@@ -78,6 +79,12 @@ static int scan_for_master(struct ubifs_ + + out: + ubifs_scan_destroy(sleb); ++ return -EUCLEAN; ++ ++out_dump: ++ ubifs_err("unexpected node type %d master LEB %d:%d", ++ snod->type, lnum, snod->offs); ++ ubifs_scan_destroy(sleb); + return -EINVAL; + } + +@@ -141,7 +148,7 @@ static int validate_master(const struct + } + + main_sz = (long long)c->main_lebs * c->leb_size; +- if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) { ++ if (c->bi.old_idx_sz & 7 || c->bi.old_idx_sz >= main_sz) { + err = 9; + goto out; + } +@@ -211,7 +218,7 @@ static int validate_master(const struct + } + + if (c->lst.total_dead + c->lst.total_dark + +- c->lst.total_used + c->old_idx_sz > main_sz) { ++ c->lst.total_used + c->bi.old_idx_sz > main_sz) { + err = 21; + goto out; + } +@@ -256,7 +263,8 @@ int ubifs_read_master(struct ubifs_info + + err = scan_for_master(c); + if (err) { +- err = ubifs_recover_master_node(c); ++ if (err == -EUCLEAN) ++ err = ubifs_recover_master_node(c); + if (err) + /* + * Note, we do not free 'c->mst_node' here because the +@@ -278,7 +286,7 @@ int ubifs_read_master(struct ubifs_info + c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); + c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); + c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); +- c->old_idx_sz = le64_to_cpu(c->mst_node->index_size); ++ c->bi.old_idx_sz = le64_to_cpu(c->mst_node->index_size); + c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); + c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); + c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); +@@ -297,7 +305,7 @@ int ubifs_read_master(struct ubifs_info + c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead); + c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark); + +- c->calc_idx_sz = c->old_idx_sz; ++ c->calc_idx_sz = c->bi.old_idx_sz; + + if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) + c->no_orphs = 1; +@@ -353,7 +361,8 @@ int ubifs_write_master(struct ubifs_info + { + int err, lnum, offs, len; + +- if (c->ro_media) ++ ubifs_assert(!c->ro_media && !c->ro_mount); ++ if (c->ro_error) + return -EROFS; + + lnum = UBIFS_MST_LNUM; +diff -uprN linux-2.6.28/fs/ubifs/misc.h ubifs-v2.6.28/fs/ubifs/misc.h +--- linux-2.6.28/fs/ubifs/misc.h 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/fs/ubifs/misc.h 2011-06-15 14:22:09.000000000 -0400 +@@ -132,7 +132,8 @@ static inline int ubifs_leb_unmap(const + { + int err; + +- if (c->ro_media) ++ ubifs_assert(!c->ro_media && !c->ro_mount); ++ if (c->ro_error) + return -EROFS; + err = ubi_leb_unmap(c->ubi, lnum); + if (err) { +@@ -159,7 +160,8 @@ static inline int ubifs_leb_write(const + { + int err; + +- if (c->ro_media) ++ ubifs_assert(!c->ro_media && !c->ro_mount); ++ if (c->ro_error) + return -EROFS; + err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); + if (err) { +@@ -186,7 +188,8 @@ static inline int ubifs_leb_change(const + { + int err; + +- if (c->ro_media) ++ ubifs_assert(!c->ro_media && !c->ro_mount); ++ if (c->ro_error) + return -EROFS; + err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); + if (err) { +@@ -337,4 +340,21 @@ static inline void ubifs_release_lprops( + mutex_unlock(&c->lp_mutex); + } + ++/** ++ * ubifs_next_log_lnum - switch to the next log LEB. ++ * @c: UBIFS file-system description object ++ * @lnum: current log LEB ++ * ++ * This helper function returns the log LEB number which goes next after LEB ++ * 'lnum'. ++ */ ++static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum) ++{ ++ lnum += 1; ++ if (lnum > c->log_last) ++ lnum = UBIFS_LOG_LNUM; ++ ++ return lnum; ++} ++ + #endif /* __UBIFS_MISC_H__ */ +diff -uprN linux-2.6.28/fs/ubifs/orphan.c ubifs-v2.6.28/fs/ubifs/orphan.c +--- linux-2.6.28/fs/ubifs/orphan.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/orphan.c 2011-06-15 14:22:09.000000000 -0400 +@@ -670,9 +670,11 @@ static int kill_orphans(struct ubifs_inf + struct ubifs_scan_leb *sleb; + + dbg_rcvry("LEB %d", lnum); +- sleb = ubifs_scan(c, lnum, 0, c->sbuf); ++ sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); + if (IS_ERR(sleb)) { +- sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); ++ if (PTR_ERR(sleb) == -EUCLEAN) ++ sleb = ubifs_recover_leb(c, lnum, 0, ++ c->sbuf, -1); + if (IS_ERR(sleb)) { + err = PTR_ERR(sleb); + break; +@@ -891,15 +893,22 @@ static int dbg_read_orphans(struct check + static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) + { + int lnum, err = 0; ++ void *buf; + + /* Check no-orphans flag and skip this if no orphans */ + if (c->no_orphs) + return 0; + ++ buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); ++ if (!buf) { ++ ubifs_err("cannot allocate memory to check orphans"); ++ return 0; ++ } ++ + for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { + struct ubifs_scan_leb *sleb; + +- sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); ++ sleb = ubifs_scan(c, lnum, 0, buf, 0); + if (IS_ERR(sleb)) { + err = PTR_ERR(sleb); + break; +@@ -911,6 +920,7 @@ static int dbg_scan_orphans(struct ubifs + break; + } + ++ vfree(buf); + return err; + } + +diff -uprN linux-2.6.28/fs/ubifs/recovery.c ubifs-v2.6.28/fs/ubifs/recovery.c +--- linux-2.6.28/fs/ubifs/recovery.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/recovery.c 2011-06-15 14:22:09.000000000 -0400 +@@ -24,10 +24,27 @@ + * This file implements functions needed to recover from unclean un-mounts. + * When UBIFS is mounted, it checks a flag on the master node to determine if + * an un-mount was completed sucessfully. If not, the process of mounting +- * incorparates additional checking and fixing of on-flash data structures. ++ * incorporates additional checking and fixing of on-flash data structures. + * UBIFS always cleans away all remnants of an unclean un-mount, so that + * errors do not accumulate. However UBIFS defers recovery if it is mounted + * read-only, and the flash is not modified in that case. ++ * ++ * The general UBIFS approach to the recovery is that it recovers from ++ * corruptions which could be caused by power cuts, but it refuses to recover ++ * from corruption caused by other reasons. And UBIFS tries to distinguish ++ * between these 2 reasons of corruptions and silently recover in the former ++ * case and loudly complain in the latter case. ++ * ++ * UBIFS writes only to erased LEBs, so it writes only to the flash space ++ * containing only 0xFFs. UBIFS also always writes strictly from the beginning ++ * of the LEB to the end. And UBIFS assumes that the underlying flash media ++ * writes in @c->max_write_size bytes at a time. ++ * ++ * Hence, if UBIFS finds a corrupted node at offset X, it expects only the min. ++ * I/O unit corresponding to offset X to contain corrupted data, all the ++ * following min. I/O units have to contain empty space (all 0xFFs). If this is ++ * not true, the corruption cannot be the result of a power cut, and UBIFS ++ * refuses to mount. + */ + + #include +@@ -53,6 +70,25 @@ static int is_empty(void *buf, int len) + } + + /** ++ * first_non_ff - find offset of the first non-0xff byte. ++ * @buf: buffer to search in ++ * @len: length of buffer ++ * ++ * This function returns offset of the first non-0xff byte in @buf or %-1 if ++ * the buffer contains only 0xff bytes. ++ */ ++static int first_non_ff(void *buf, int len) ++{ ++ uint8_t *p = buf; ++ int i; ++ ++ for (i = 0; i < len; i++) ++ if (*p++ != 0xff) ++ return i; ++ return -1; ++} ++ ++/** + * get_master_node - get the last valid master node allowing for corruption. + * @c: UBIFS file-system description object + * @lnum: LEB number +@@ -267,12 +303,12 @@ int ubifs_recover_master_node(struct ubi + mst = mst2; + } + +- dbg_rcvry("recovered master node from LEB %d", ++ ubifs_msg("recovered master node from LEB %d", + (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1)); + + memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); + +- if ((c->vfs_sb->s_flags & MS_RDONLY)) { ++ if (c->ro_mount) { + /* Read-only mode. Keep a copy for switching to rw mode */ + c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL); + if (!c->rcvrd_mst_node) { +@@ -280,6 +316,32 @@ int ubifs_recover_master_node(struct ubi + goto out_free; + } + memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ); ++ ++ /* ++ * We had to recover the master node, which means there was an ++ * unclean reboot. However, it is possible that the master node ++ * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set. ++ * E.g., consider the following chain of events: ++ * ++ * 1. UBIFS was cleanly unmounted, so the master node is clean ++ * 2. UBIFS is being mounted R/W and starts changing the master ++ * node in the first (%UBIFS_MST_LNUM). A power cut happens, ++ * so this LEB ends up with some amount of garbage at the ++ * end. ++ * 3. UBIFS is being mounted R/O. We reach this place and ++ * recover the master node from the second LEB ++ * (%UBIFS_MST_LNUM + 1). But we cannot update the media ++ * because we are being mounted R/O. We have to defer the ++ * operation. ++ * 4. However, this master node (@c->mst_node) is marked as ++ * clean (since the step 1). And if we just return, the ++ * mount code will be confused and won't recover the master ++ * node when it is re-mounter R/W later. ++ * ++ * Thus, to force the recovery by marking the master node as ++ * dirty. ++ */ ++ c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); + } else { + /* Write the recovered master node */ + c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1; +@@ -342,44 +404,23 @@ int ubifs_write_rcvrd_mst_node(struct ub + * @offs: offset to check + * + * This function returns %1 if @offs was in the last write to the LEB whose data +- * is in @buf, otherwise %0 is returned. The determination is made by checking +- * for subsequent empty space starting from the next min_io_size boundary (or a +- * bit less than the common header size if min_io_size is one). ++ * is in @buf, otherwise %0 is returned. The determination is made by checking ++ * for subsequent empty space starting from the next @c->max_write_size ++ * boundary. + */ + static int is_last_write(const struct ubifs_info *c, void *buf, int offs) + { +- int empty_offs; +- int check_len; ++ int empty_offs, check_len; + uint8_t *p; + +- if (c->min_io_size == 1) { +- check_len = c->leb_size - offs; +- p = buf + check_len; +- for (; check_len > 0; check_len--) +- if (*--p != 0xff) +- break; +- /* +- * 'check_len' is the size of the corruption which cannot be +- * more than the size of 1 node if it was caused by an unclean +- * unmount. +- */ +- if (check_len > UBIFS_MAX_NODE_SZ) +- return 0; +- return 1; +- } +- + /* +- * Round up to the next c->min_io_size boundary i.e. 'offs' is in the +- * last wbuf written. After that should be empty space. ++ * Round up to the next @c->max_write_size boundary i.e. @offs is in ++ * the last wbuf written. After that should be empty space. + */ +- empty_offs = ALIGN(offs + 1, c->min_io_size); ++ empty_offs = ALIGN(offs + 1, c->max_write_size); + check_len = c->leb_size - empty_offs; + p = buf + empty_offs - offs; +- +- for (; check_len > 0; check_len--) +- if (*p++ != 0xff) +- return 0; +- return 1; ++ return is_empty(p, check_len); + } + + /** +@@ -392,7 +433,7 @@ static int is_last_write(const struct ub + * + * This function pads up to the next min_io_size boundary (if there is one) and + * sets empty space to all 0xff. @buf, @offs and @len are updated to the next +- * min_io_size boundary (if there is one). ++ * @c->min_io_size boundary. + */ + static void clean_buf(const struct ubifs_info *c, void **buf, int lnum, + int *offs, int *len) +@@ -402,11 +443,6 @@ static void clean_buf(const struct ubifs + lnum = lnum; + dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs); + +- if (c->min_io_size == 1) { +- memset(*buf, 0xff, c->leb_size - *offs); +- return; +- } +- + ubifs_assert(!(*offs & 7)); + empty_offs = ALIGN(*offs, c->min_io_size); + pad_len = empty_offs - *offs; +@@ -436,7 +472,7 @@ static int no_more_nodes(const struct ub + int skip, dlen = le32_to_cpu(ch->len); + + /* Check for empty space after the corrupt node's common header */ +- skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs; ++ skip = ALIGN(offs + UBIFS_CH_SZ, c->max_write_size) - offs; + if (is_empty(buf + skip, len - skip)) + return 1; + /* +@@ -448,7 +484,7 @@ static int no_more_nodes(const struct ub + return 0; + } + /* Now we know the corrupt node's length we can skip over it */ +- skip = ALIGN(offs + dlen, c->min_io_size) - offs; ++ skip = ALIGN(offs + dlen, c->max_write_size) - offs; + /* After which there should be empty space */ + if (is_empty(buf + skip, len - skip)) + return 1; +@@ -476,7 +512,7 @@ static int fix_unclean_leb(struct ubifs_ + endpt = snod->offs + snod->len; + } + +- if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) { ++ if (c->ro_mount && !c->remounting_rw) { + /* Add to recovery list */ + struct ubifs_unclean_leb *ucleb; + +@@ -527,16 +563,15 @@ static int fix_unclean_leb(struct ubifs_ + } + + /** +- * drop_incomplete_group - drop nodes from an incomplete group. ++ * drop_last_group - drop the last group of nodes. + * @sleb: scanned LEB information + * @offs: offset of dropped nodes is returned here + * +- * This function returns %1 if nodes are dropped and %0 otherwise. ++ * This is a helper function for 'ubifs_recover_leb()' which drops the last ++ * group of nodes of the scanned LEB. + */ +-static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) ++static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs) + { +- int dropped = 0; +- + while (!list_empty(&sleb->nodes)) { + struct ubifs_scan_node *snod; + struct ubifs_ch *ch; +@@ -545,15 +580,40 @@ static int drop_incomplete_group(struct + list); + ch = snod->node; + if (ch->group_type != UBIFS_IN_NODE_GROUP) +- return dropped; +- dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); ++ break; ++ ++ dbg_rcvry("dropping grouped node at %d:%d", ++ sleb->lnum, snod->offs); ++ *offs = snod->offs; ++ list_del(&snod->list); ++ kfree(snod); ++ sleb->nodes_cnt -= 1; ++ } ++} ++ ++/** ++ * drop_last_node - drop the last node. ++ * @sleb: scanned LEB information ++ * @offs: offset of dropped nodes is returned here ++ * @grouped: non-zero if whole group of nodes have to be dropped ++ * ++ * This is a helper function for 'ubifs_recover_leb()' which drops the last ++ * node of the scanned LEB. ++ */ ++static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs) ++{ ++ struct ubifs_scan_node *snod; ++ ++ if (!list_empty(&sleb->nodes)) { ++ snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, ++ list); ++ ++ dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs); + *offs = snod->offs; + list_del(&snod->list); + kfree(snod); + sleb->nodes_cnt -= 1; +- dropped = 1; + } +- return dropped; + } + + /** +@@ -562,33 +622,30 @@ static int drop_incomplete_group(struct + * @lnum: LEB number + * @offs: offset + * @sbuf: LEB-sized buffer to use +- * @grouped: nodes may be grouped for recovery ++ * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not ++ * belong to any journal head) + * + * This function does a scan of a LEB, but caters for errors that might have + * been caused by the unclean unmount from which we are attempting to recover. +- * +- * This function returns %0 on success and a negative error code on failure. ++ * Returns %0 in case of success, %-EUCLEAN if an unrecoverable corruption is ++ * found, and a negative error code in case of failure. + */ + struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, +- int offs, void *sbuf, int grouped) ++ int offs, void *sbuf, int jhead) + { +- int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; +- int empty_chkd = 0, start = offs; ++ int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; ++ int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped; + struct ubifs_scan_leb *sleb; + void *buf = sbuf + offs; + +- dbg_rcvry("%d:%d", lnum, offs); ++ dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped); + + sleb = ubifs_start_scan(c, lnum, offs, sbuf); + if (IS_ERR(sleb)) + return sleb; + +- if (sleb->ecc) +- need_clean = 1; +- ++ ubifs_assert(len >= 8); + while (len >= 8) { +- int ret; +- + dbg_scan("look at LEB %d:%d (%d bytes left)", + lnum, offs, len); + +@@ -598,8 +655,7 @@ struct ubifs_scan_leb *ubifs_recover_leb + * Scan quietly until there is an error from which we cannot + * recover + */ +- ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); +- ++ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1); + if (ret == SCANNED_A_NODE) { + /* A valid node, and not a padding node */ + struct ubifs_ch *ch = buf; +@@ -612,98 +668,126 @@ struct ubifs_scan_leb *ubifs_recover_leb + offs += node_len; + buf += node_len; + len -= node_len; +- continue; +- } +- +- if (ret > 0) { ++ } else if (ret > 0) { + /* Padding bytes or a valid padding node */ + offs += ret; + buf += ret; + len -= ret; +- continue; +- } +- +- if (ret == SCANNED_EMPTY_SPACE) { +- if (!is_empty(buf, len)) { +- if (!is_last_write(c, buf, offs)) +- break; +- clean_buf(c, &buf, lnum, &offs, &len); +- need_clean = 1; +- } +- empty_chkd = 1; ++ } else if (ret == SCANNED_EMPTY_SPACE || ++ ret == SCANNED_GARBAGE || ++ ret == SCANNED_A_BAD_PAD_NODE || ++ ret == SCANNED_A_CORRUPT_NODE) { ++ dbg_rcvry("found corruption - %d", ret); + break; ++ } else { ++ dbg_err("unexpected return value %d", ret); ++ err = -EINVAL; ++ goto error; + } ++ } + +- if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) +- if (is_last_write(c, buf, offs)) { +- clean_buf(c, &buf, lnum, &offs, &len); +- need_clean = 1; +- empty_chkd = 1; +- break; +- } +- +- if (ret == SCANNED_A_CORRUPT_NODE) +- if (no_more_nodes(c, buf, len, lnum, offs)) { +- clean_buf(c, &buf, lnum, &offs, &len); +- need_clean = 1; +- empty_chkd = 1; +- break; +- } +- +- if (quiet) { +- /* Redo the last scan but noisily */ +- quiet = 0; +- continue; +- } ++ if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) { ++ if (!is_last_write(c, buf, offs)) ++ goto corrupted_rescan; ++ } else if (ret == SCANNED_A_CORRUPT_NODE) { ++ if (!no_more_nodes(c, buf, len, lnum, offs)) ++ goto corrupted_rescan; ++ } else if (!is_empty(buf, len)) { ++ if (!is_last_write(c, buf, offs)) { ++ int corruption = first_non_ff(buf, len); + +- switch (ret) { +- case SCANNED_GARBAGE: +- dbg_err("garbage"); +- goto corrupted; +- case SCANNED_A_CORRUPT_NODE: +- case SCANNED_A_BAD_PAD_NODE: +- dbg_err("bad node"); +- goto corrupted; +- default: +- dbg_err("unknown"); ++ /* ++ * See header comment for this file for more ++ * explanations about the reasons we have this check. ++ */ ++ ubifs_err("corrupt empty space LEB %d:%d, corruption " ++ "starts at %d", lnum, offs, corruption); ++ /* Make sure we dump interesting non-0xFF data */ ++ offs += corruption; ++ buf += corruption; + goto corrupted; + } + } + +- if (!empty_chkd && !is_empty(buf, len)) { +- if (is_last_write(c, buf, offs)) { +- clean_buf(c, &buf, lnum, &offs, &len); +- need_clean = 1; +- } else { +- ubifs_err("corrupt empty space at LEB %d:%d", +- lnum, offs); +- goto corrupted; +- } +- } ++ min_io_unit = round_down(offs, c->min_io_size); ++ if (grouped) ++ /* ++ * If nodes are grouped, always drop the incomplete group at ++ * the end. ++ */ ++ drop_last_group(sleb, &offs); + +- /* Drop nodes from incomplete group */ +- if (grouped && drop_incomplete_group(sleb, &offs)) { +- buf = sbuf + offs; +- len = c->leb_size - offs; +- clean_buf(c, &buf, lnum, &offs, &len); +- need_clean = 1; ++ if (jhead == GCHD) { ++ /* ++ * If this LEB belongs to the GC head then while we are in the ++ * middle of the same min. I/O unit keep dropping nodes. So ++ * basically, what we want is to make sure that the last min. ++ * I/O unit where we saw the corruption is dropped completely ++ * with all the uncorrupted nodes which may possibly sit there. ++ * ++ * In other words, let's name the min. I/O unit where the ++ * corruption starts B, and the previous min. I/O unit A. The ++ * below code tries to deal with a situation when half of B ++ * contains valid nodes or the end of a valid node, and the ++ * second half of B contains corrupted data or garbage. This ++ * means that UBIFS had been writing to B just before the power ++ * cut happened. I do not know how realistic is this scenario ++ * that half of the min. I/O unit had been written successfully ++ * and the other half not, but this is possible in our 'failure ++ * mode emulation' infrastructure at least. ++ * ++ * So what is the problem, why we need to drop those nodes? Why ++ * can't we just clean-up the second half of B by putting a ++ * padding node there? We can, and this works fine with one ++ * exception which was reproduced with power cut emulation ++ * testing and happens extremely rarely. ++ * ++ * Imagine the file-system is full, we run GC which starts ++ * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is ++ * the current GC head LEB). The @c->gc_lnum is -1, which means ++ * that GC will retain LEB X and will try to continue. Imagine ++ * that LEB X is currently the dirtiest LEB, and the amount of ++ * used space in LEB Y is exactly the same as amount of free ++ * space in LEB X. ++ * ++ * And a power cut happens when nodes are moved from LEB X to ++ * LEB Y. We are here trying to recover LEB Y which is the GC ++ * head LEB. We find the min. I/O unit B as described above. ++ * Then we clean-up LEB Y by padding min. I/O unit. And later ++ * 'ubifs_rcvry_gc_commit()' function fails, because it cannot ++ * find a dirty LEB which could be GC'd into LEB Y! Even LEB X ++ * does not match because the amount of valid nodes there does ++ * not fit the free space in LEB Y any more! And this is ++ * because of the padding node which we added to LEB Y. The ++ * user-visible effect of this which I once observed and ++ * analysed is that we cannot mount the file-system with ++ * -ENOSPC error. ++ * ++ * So obviously, to make sure that situation does not happen we ++ * should free min. I/O unit B in LEB Y completely and the last ++ * used min. I/O unit in LEB Y should be A. This is basically ++ * what the below code tries to do. ++ */ ++ while (offs > min_io_unit) ++ drop_last_node(sleb, &offs); + } + +- if (offs % c->min_io_size) { +- clean_buf(c, &buf, lnum, &offs, &len); +- need_clean = 1; +- } ++ buf = sbuf + offs; ++ len = c->leb_size - offs; + ++ clean_buf(c, &buf, lnum, &offs, &len); + ubifs_end_scan(c, sleb, lnum, offs); + +- if (need_clean) { +- err = fix_unclean_leb(c, sleb, start); +- if (err) +- goto error; +- } ++ err = fix_unclean_leb(c, sleb, start); ++ if (err) ++ goto error; + + return sleb; + ++corrupted_rescan: ++ /* Re-scan the corrupted data with verbose messages */ ++ dbg_err("corruptio %d", ret); ++ ubifs_scan_a_node(c, buf, len, lnum, offs, 1); + corrupted: + ubifs_scanned_corruption(c, lnum, offs, buf); + err = -EUCLEAN; +@@ -773,7 +857,8 @@ out_free: + * @sbuf: LEB-sized buffer to use + * + * This function does a scan of a LEB, but caters for errors that might have +- * been caused by the unclean unmount from which we are attempting to recover. ++ * been caused by unclean reboots from which we are attempting to recover ++ * (assume that only the last log LEB can be corrupted by an unclean reboot). + * + * This function returns %0 on success and a negative error code on failure. + */ +@@ -792,7 +877,7 @@ struct ubifs_scan_leb *ubifs_recover_log + * We can only recover at the end of the log, so check that the + * next log LEB is empty or out of date. + */ +- sleb = ubifs_scan(c, next_lnum, 0, sbuf); ++ sleb = ubifs_scan(c, next_lnum, 0, sbuf, 0); + if (IS_ERR(sleb)) + return sleb; + if (sleb->nodes_cnt) { +@@ -819,7 +904,7 @@ struct ubifs_scan_leb *ubifs_recover_log + } + ubifs_scan_destroy(sleb); + } +- return ubifs_recover_leb(c, lnum, offs, sbuf, 0); ++ return ubifs_recover_leb(c, lnum, offs, sbuf, -1); + } + + /** +@@ -836,12 +921,8 @@ struct ubifs_scan_leb *ubifs_recover_log + static int recover_head(const struct ubifs_info *c, int lnum, int offs, + void *sbuf) + { +- int len, err, need_clean = 0; ++ int len = c->max_write_size, err; + +- if (c->min_io_size > 1) +- len = c->min_io_size; +- else +- len = 512; + if (offs + len > c->leb_size) + len = c->leb_size - offs; + +@@ -850,19 +931,7 @@ static int recover_head(const struct ubi + + /* Read at the head location and check it is empty flash */ + err = ubi_read(c->ubi, lnum, sbuf, offs, len); +- if (err) +- need_clean = 1; +- else { +- uint8_t *p = sbuf; +- +- while (len--) +- if (*p++ != 0xff) { +- need_clean = 1; +- break; +- } +- } +- +- if (need_clean) { ++ if (err || !is_empty(sbuf, len)) { + dbg_rcvry("cleaning head at %d:%d", lnum, offs); + if (offs == 0) + return ubifs_leb_unmap(c, lnum); +@@ -896,7 +965,7 @@ int ubifs_recover_inl_heads(const struct + { + int err; + +- ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw); ++ ubifs_assert(!c->ro_mount || c->remounting_rw); + + dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs); + err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf); +@@ -1038,6 +1107,53 @@ int ubifs_clean_lebs(const struct ubifs_ + } + + /** ++ * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit. ++ * @c: UBIFS file-system description object ++ * ++ * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty ++ * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns ++ * zero in case of success and a negative error code in case of failure. ++ */ ++static int grab_empty_leb(struct ubifs_info *c) ++{ ++ int lnum, err; ++ ++ /* ++ * Note, it is very important to first search for an empty LEB and then ++ * run the commit, not vice-versa. The reason is that there might be ++ * only one empty LEB at the moment, the one which has been the ++ * @c->gc_lnum just before the power cut happened. During the regular ++ * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no ++ * one but GC can grab it. But at this moment this single empty LEB is ++ * not marked as taken, so if we run commit - what happens? Right, the ++ * commit will grab it and write the index there. Remember that the ++ * index always expands as long as there is free space, and it only ++ * starts consolidating when we run out of space. ++ * ++ * IOW, if we run commit now, we might not be able to find a free LEB ++ * after this. ++ */ ++ lnum = ubifs_find_free_leb_for_idx(c); ++ if (lnum < 0) { ++ dbg_err("could not find an empty LEB"); ++ dbg_dump_lprops(c); ++ dbg_dump_budg(c, &c->bi); ++ return lnum; ++ } ++ ++ /* Reset the index flag */ ++ err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, ++ LPROPS_INDEX, 0); ++ if (err) ++ return err; ++ ++ c->gc_lnum = lnum; ++ dbg_rcvry("found empty LEB %d, run commit", lnum); ++ ++ return ubifs_run_commit(c); ++} ++ ++/** + * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. + * @c: UBIFS file-system description object + * +@@ -1059,58 +1175,26 @@ int ubifs_rcvry_gc_commit(struct ubifs_i + { + struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; + struct ubifs_lprops lp; +- int lnum, err; ++ int err; ++ ++ dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs); + + c->gc_lnum = -1; +- if (wbuf->lnum == -1) { +- dbg_rcvry("no GC head LEB"); +- goto find_free; +- } +- /* +- * See whether the used space in the dirtiest LEB fits in the GC head +- * LEB. +- */ +- if (wbuf->offs == c->leb_size) { +- dbg_rcvry("no room in GC head LEB"); +- goto find_free; +- } ++ if (wbuf->lnum == -1 || wbuf->offs == c->leb_size) ++ return grab_empty_leb(c); ++ + err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); + if (err) { +- if (err == -ENOSPC) +- dbg_err("could not find a dirty LEB"); +- return err; +- } +- ubifs_assert(!(lp.flags & LPROPS_INDEX)); +- lnum = lp.lnum; +- if (lp.free + lp.dirty == c->leb_size) { +- /* An empty LEB was returned */ +- if (lp.free != c->leb_size) { +- err = ubifs_change_one_lp(c, lnum, c->leb_size, +- 0, 0, 0, 0); +- if (err) +- return err; +- } +- err = ubifs_leb_unmap(c, lnum); +- if (err) ++ if (err != -ENOSPC) + return err; +- c->gc_lnum = lnum; +- dbg_rcvry("allocated LEB %d for GC", lnum); +- /* Run the commit */ +- dbg_rcvry("committing"); +- return ubifs_run_commit(c); +- } +- /* +- * There was no empty LEB so the used space in the dirtiest LEB must fit +- * in the GC head LEB. +- */ +- if (lp.free + lp.dirty < wbuf->offs) { +- dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d", +- lnum, wbuf->lnum, wbuf->offs); +- err = ubifs_return_leb(c, lnum); +- if (err) +- return err; +- goto find_free; ++ ++ dbg_rcvry("could not find a dirty LEB"); ++ return grab_empty_leb(c); + } ++ ++ ubifs_assert(!(lp.flags & LPROPS_INDEX)); ++ ubifs_assert(lp.free + lp.dirty >= wbuf->offs); ++ + /* + * We run the commit before garbage collection otherwise subsequent + * mounts will see the GC and orphan deletion in a different order. +@@ -1119,11 +1203,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_i + err = ubifs_run_commit(c); + if (err) + return err; +- /* +- * The data in the dirtiest LEB fits in the GC head LEB, so do the GC +- * - use locking to keep 'ubifs_assert()' happy. +- */ +- dbg_rcvry("GC'ing LEB %d", lnum); ++ ++ dbg_rcvry("GC'ing LEB %d", lp.lnum); + mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); + err = ubifs_garbage_collect_leb(c, &lp); + if (err >= 0) { +@@ -1139,37 +1220,17 @@ int ubifs_rcvry_gc_commit(struct ubifs_i + err = -EINVAL; + return err; + } +- if (err != LEB_RETAINED) { +- dbg_err("GC returned %d", err); ++ ++ ubifs_assert(err == LEB_RETAINED); ++ if (err != LEB_RETAINED) + return -EINVAL; +- } ++ + err = ubifs_leb_unmap(c, c->gc_lnum); + if (err) + return err; +- dbg_rcvry("allocated LEB %d for GC", lnum); +- return 0; + +-find_free: +- /* +- * There is no GC head LEB or the free space in the GC head LEB is too +- * small. Allocate gc_lnum by calling 'ubifs_find_free_leb_for_idx()' so +- * GC is not run. +- */ +- lnum = ubifs_find_free_leb_for_idx(c); +- if (lnum < 0) { +- dbg_err("could not find an empty LEB"); +- return lnum; +- } +- /* And reset the index flag */ +- err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, +- LPROPS_INDEX, 0); +- if (err) +- return err; +- c->gc_lnum = lnum; +- dbg_rcvry("allocated LEB %d for GC", lnum); +- /* Run the commit */ +- dbg_rcvry("committing"); +- return ubifs_run_commit(c); ++ dbg_rcvry("allocated LEB %d for GC", lp.lnum); ++ return 0; + } + + /** +@@ -1411,7 +1472,7 @@ static int fix_size_in_place(struct ubif + err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); + if (err) + goto out; +- dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", ++ dbg_rcvry("inode %lu at %d:%d size %lld -> %lld", + (unsigned long)e->inum, lnum, offs, i_size, e->d_size); + return 0; + +@@ -1460,20 +1521,27 @@ int ubifs_recover_size(struct ubifs_info + e->i_size = le64_to_cpu(ino->size); + } + } ++ + if (e->exists && e->i_size < e->d_size) { +- if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) { ++ if (c->ro_mount) { + /* Fix the inode size and pin it in memory */ + struct inode *inode; ++ struct ubifs_inode *ui; ++ ++ ubifs_assert(!e->inode); + + inode = ubifs_iget(c->vfs_sb, e->inum); + if (IS_ERR(inode)) + return PTR_ERR(inode); ++ ++ ui = ubifs_inode(inode); + if (inode->i_size < e->d_size) { + dbg_rcvry("ino %lu size %lld -> %lld", + (unsigned long)e->inum, +- e->d_size, inode->i_size); ++ inode->i_size, e->d_size); + inode->i_size = e->d_size; +- ubifs_inode(inode)->ui_size = e->d_size; ++ ui->ui_size = e->d_size; ++ ui->synced_i_size = e->d_size; + e->inode = inode; + this = rb_next(this); + continue; +@@ -1488,9 +1556,11 @@ int ubifs_recover_size(struct ubifs_info + iput(e->inode); + } + } ++ + this = rb_next(this); + rb_erase(&e->rb, &c->size_tree); + kfree(e); + } ++ + return 0; + } +diff -uprN linux-2.6.28/fs/ubifs/replay.c ubifs-v2.6.28/fs/ubifs/replay.c +--- linux-2.6.28/fs/ubifs/replay.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/replay.c 2011-06-15 14:22:09.000000000 -0400 +@@ -33,43 +33,32 @@ + */ + + #include "ubifs.h" +- +-/* +- * Replay flags. +- * +- * REPLAY_DELETION: node was deleted +- * REPLAY_REF: node is a reference node +- */ +-enum { +- REPLAY_DELETION = 1, +- REPLAY_REF = 2, +-}; ++#include + + /** +- * struct replay_entry - replay tree entry. ++ * struct replay_entry - replay list entry. + * @lnum: logical eraseblock number of the node + * @offs: node offset + * @len: node length ++ * @deletion: non-zero if this entry corresponds to a node deletion + * @sqnum: node sequence number +- * @flags: replay flags +- * @rb: links the replay tree ++ * @list: links the replay list + * @key: node key + * @nm: directory entry name + * @old_size: truncation old size + * @new_size: truncation new size +- * @free: amount of free space in a bud +- * @dirty: amount of dirty space in a bud from padding and deletion nodes + * +- * UBIFS journal replay must compare node sequence numbers, which means it must +- * build a tree of node information to insert into the TNC. ++ * The replay process first scans all buds and builds the replay list, then ++ * sorts the replay list in nodes sequence number order, and then inserts all ++ * the replay entries to the TNC. + */ + struct replay_entry { + int lnum; + int offs; + int len; ++ unsigned int deletion:1; + unsigned long long sqnum; +- int flags; +- struct rb_node rb; ++ struct list_head list; + union ubifs_key key; + union { + struct qstr nm; +@@ -77,10 +66,6 @@ struct replay_entry { + loff_t old_size; + loff_t new_size; + }; +- struct { +- int free; +- int dirty; +- }; + }; + }; + +@@ -88,83 +73,117 @@ struct replay_entry { + * struct bud_entry - entry in the list of buds to replay. + * @list: next bud in the list + * @bud: bud description object +- * @free: free bytes in the bud + * @sqnum: reference node sequence number ++ * @free: free bytes in the bud ++ * @dirty: dirty bytes in the bud + */ + struct bud_entry { + struct list_head list; + struct ubifs_bud *bud; +- int free; + unsigned long long sqnum; ++ int free; ++ int dirty; + }; + + /** + * set_bud_lprops - set free and dirty space used by a bud. + * @c: UBIFS file-system description object +- * @r: replay entry of bud ++ * @b: bud entry which describes the bud ++ * ++ * This function makes sure the LEB properties of bud @b are set correctly ++ * after the replay. Returns zero in case of success and a negative error code ++ * in case of failure. + */ +-static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) ++static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) + { + const struct ubifs_lprops *lp; + int err = 0, dirty; + + ubifs_get_lprops(c); + +- lp = ubifs_lpt_lookup_dirty(c, r->lnum); ++ lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } + + dirty = lp->dirty; +- if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { ++ if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { + /* + * The LEB was added to the journal with a starting offset of + * zero which means the LEB must have been empty. The LEB +- * property values should be lp->free == c->leb_size and +- * lp->dirty == 0, but that is not the case. The reason is that +- * the LEB was garbage collected. The garbage collector resets +- * the free and dirty space without recording it anywhere except +- * lprops, so if there is not a commit then lprops does not have +- * that information next time the file system is mounted. ++ * property values should be @lp->free == @c->leb_size and ++ * @lp->dirty == 0, but that is not the case. The reason is that ++ * the LEB had been garbage collected before it became the bud, ++ * and there was not commit inbetween. The garbage collector ++ * resets the free and dirty space without recording it ++ * anywhere except lprops, so if there was no commit then ++ * lprops does not have that information. + * + * We do not need to adjust free space because the scan has told + * us the exact value which is recorded in the replay entry as +- * r->free. ++ * @b->free. + * + * However we do need to subtract from the dirty space the + * amount of space that the garbage collector reclaimed, which + * is the whole LEB minus the amount of space that was free. + */ +- dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, ++ dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, + lp->free, lp->dirty); +- dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, ++ dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, + lp->free, lp->dirty); + dirty -= c->leb_size - lp->free; + /* + * If the replay order was perfect the dirty space would now be +- * zero. The order is not perfect because the the journal heads ++ * zero. The order is not perfect because the journal heads + * race with each other. This is not a problem but is does mean + * that the dirty space may temporarily exceed c->leb_size + * during the replay. + */ + if (dirty != 0) + dbg_msg("LEB %d lp: %d free %d dirty " +- "replay: %d free %d dirty", r->lnum, lp->free, +- lp->dirty, r->free, r->dirty); ++ "replay: %d free %d dirty", b->bud->lnum, ++ lp->free, lp->dirty, b->free, b->dirty); + } +- lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, ++ lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, + lp->flags | LPROPS_TAKEN, 0); + if (IS_ERR(lp)) { + err = PTR_ERR(lp); + goto out; + } ++ ++ /* Make sure the journal head points to the latest bud */ ++ err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf, ++ b->bud->lnum, c->leb_size - b->free, ++ UBI_SHORTTERM); ++ + out: + ubifs_release_lprops(c); + return err; + } + + /** ++ * set_buds_lprops - set free and dirty space for all replayed buds. ++ * @c: UBIFS file-system description object ++ * ++ * This function sets LEB properties for all replayed buds. Returns zero in ++ * case of success and a negative error code in case of failure. ++ */ ++static int set_buds_lprops(struct ubifs_info *c) ++{ ++ struct bud_entry *b; ++ int err; ++ ++ list_for_each_entry(b, &c->replay_buds, list) { ++ err = set_bud_lprops(c, b); ++ if (err) ++ return err; ++ } ++ ++ return 0; ++} ++ ++/** + * trun_remove_range - apply a replay entry for a truncation to the TNC. + * @c: UBIFS file-system description object + * @r: replay entry of truncation +@@ -200,24 +219,22 @@ static int trun_remove_range(struct ubif + */ + static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) + { +- int err, deletion = ((r->flags & REPLAY_DELETION) != 0); ++ int err; + +- dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, +- r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); ++ dbg_mnt("LEB %d:%d len %d deletion %d sqnum %llu %s", r->lnum, ++ r->offs, r->len, r->deletion, r->sqnum, DBGKEY(&r->key)); + + /* Set c->replay_sqnum to help deal with dangling branches. */ + c->replay_sqnum = r->sqnum; + +- if (r->flags & REPLAY_REF) +- err = set_bud_lprops(c, r); +- else if (is_hash_key(c, &r->key)) { +- if (deletion) ++ if (is_hash_key(c, &r->key)) { ++ if (r->deletion) + err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); + else + err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, + r->len, &r->nm); + } else { +- if (deletion) ++ if (r->deletion) + switch (key_type(c, &r->key)) { + case UBIFS_INO_KEY: + { +@@ -240,7 +257,7 @@ static int apply_replay_entry(struct ubi + return err; + + if (c->need_recovery) +- err = ubifs_recover_size_accum(c, &r->key, deletion, ++ err = ubifs_recover_size_accum(c, &r->key, r->deletion, + r->new_size); + } + +@@ -248,68 +265,77 @@ static int apply_replay_entry(struct ubi + } + + /** +- * destroy_replay_tree - destroy the replay. +- * @c: UBIFS file-system description object ++ * replay_entries_cmp - compare 2 replay entries. ++ * @priv: UBIFS file-system description object ++ * @a: first replay entry ++ * @a: second replay entry + * +- * Destroy the replay tree. ++ * This is a comparios function for 'list_sort()' which compares 2 replay ++ * entries @a and @b by comparing their sequence numer. Returns %1 if @a has ++ * greater sequence number and %-1 otherwise. + */ +-static void destroy_replay_tree(struct ubifs_info *c) ++static int replay_entries_cmp(void *priv, struct list_head *a, ++ struct list_head *b) + { +- struct rb_node *this = c->replay_tree.rb_node; +- struct replay_entry *r; ++ struct replay_entry *ra, *rb; + +- while (this) { +- if (this->rb_left) { +- this = this->rb_left; +- continue; +- } else if (this->rb_right) { +- this = this->rb_right; +- continue; +- } +- r = rb_entry(this, struct replay_entry, rb); +- this = rb_parent(this); +- if (this) { +- if (this->rb_left == &r->rb) +- this->rb_left = NULL; +- else +- this->rb_right = NULL; +- } +- if (is_hash_key(c, &r->key)) +- kfree(r->nm.name); +- kfree(r); +- } +- c->replay_tree = RB_ROOT; ++ cond_resched(); ++ if (a == b) ++ return 0; ++ ++ ra = list_entry(a, struct replay_entry, list); ++ rb = list_entry(b, struct replay_entry, list); ++ ubifs_assert(ra->sqnum != rb->sqnum); ++ if (ra->sqnum > rb->sqnum) ++ return 1; ++ return -1; + } + + /** +- * apply_replay_tree - apply the replay tree to the TNC. ++ * apply_replay_list - apply the replay list to the TNC. + * @c: UBIFS file-system description object + * +- * Apply the replay tree. +- * Returns zero in case of success and a negative error code in case of +- * failure. ++ * Apply all entries in the replay list to the TNC. Returns zero in case of ++ * success and a negative error code in case of failure. + */ +-static int apply_replay_tree(struct ubifs_info *c) ++static int apply_replay_list(struct ubifs_info *c) + { +- struct rb_node *this = rb_first(&c->replay_tree); ++ struct replay_entry *r; ++ int err; + +- while (this) { +- struct replay_entry *r; +- int err; ++ list_sort(c, &c->replay_list, &replay_entries_cmp); + ++ list_for_each_entry(r, &c->replay_list, list) { + cond_resched(); + +- r = rb_entry(this, struct replay_entry, rb); + err = apply_replay_entry(c, r); + if (err) + return err; +- this = rb_next(this); + } ++ + return 0; + } + + /** +- * insert_node - insert a node to the replay tree. ++ * destroy_replay_list - destroy the replay. ++ * @c: UBIFS file-system description object ++ * ++ * Destroy the replay list. ++ */ ++static void destroy_replay_list(struct ubifs_info *c) ++{ ++ struct replay_entry *r, *tmp; ++ ++ list_for_each_entry_safe(r, tmp, &c->replay_list, list) { ++ if (is_hash_key(c, &r->key)) ++ kfree(r->nm.name); ++ list_del(&r->list); ++ kfree(r); ++ } ++} ++ ++/** ++ * insert_node - insert a node to the replay list + * @c: UBIFS file-system description object + * @lnum: node logical eraseblock number + * @offs: node offset +@@ -321,39 +347,25 @@ static int apply_replay_tree(struct ubif + * @old_size: truncation old size + * @new_size: truncation new size + * +- * This function inserts a scanned non-direntry node to the replay tree. The +- * replay tree is an RB-tree containing @struct replay_entry elements which are +- * indexed by the sequence number. The replay tree is applied at the very end +- * of the replay process. Since the tree is sorted in sequence number order, +- * the older modifications are applied first. This function returns zero in +- * case of success and a negative error code in case of failure. ++ * This function inserts a scanned non-direntry node to the replay list. The ++ * replay list contains @struct replay_entry elements, and we sort this list in ++ * sequence number order before applying it. The replay list is applied at the ++ * very end of the replay process. Since the list is sorted in sequence number ++ * order, the older modifications are applied first. This function returns zero ++ * in case of success and a negative error code in case of failure. + */ + static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, + union ubifs_key *key, unsigned long long sqnum, + int deletion, int *used, loff_t old_size, + loff_t new_size) + { +- struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; + struct replay_entry *r; + ++ dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); ++ + if (key_inum(c, key) >= c->highest_inum) + c->highest_inum = key_inum(c, key); + +- dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); +- while (*p) { +- parent = *p; +- r = rb_entry(parent, struct replay_entry, rb); +- if (sqnum < r->sqnum) { +- p = &(*p)->rb_left; +- continue; +- } else if (sqnum > r->sqnum) { +- p = &(*p)->rb_right; +- continue; +- } +- ubifs_err("duplicate sqnum in replay"); +- return -EINVAL; +- } +- + r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); + if (!r) + return -ENOMEM; +@@ -363,19 +375,18 @@ static int insert_node(struct ubifs_info + r->lnum = lnum; + r->offs = offs; + r->len = len; ++ r->deletion = !!deletion; + r->sqnum = sqnum; +- r->flags = (deletion ? REPLAY_DELETION : 0); ++ key_copy(c, key, &r->key); + r->old_size = old_size; + r->new_size = new_size; +- key_copy(c, key, &r->key); + +- rb_link_node(&r->rb, parent, p); +- rb_insert_color(&r->rb, &c->replay_tree); ++ list_add_tail(&r->list, &c->replay_list); + return 0; + } + + /** +- * insert_dent - insert a directory entry node into the replay tree. ++ * insert_dent - insert a directory entry node into the replay list. + * @c: UBIFS file-system description object + * @lnum: node logical eraseblock number + * @offs: node offset +@@ -387,43 +398,25 @@ static int insert_node(struct ubifs_info + * @deletion: non-zero if this is a deletion + * @used: number of bytes in use in a LEB + * +- * This function inserts a scanned directory entry node to the replay tree. +- * Returns zero in case of success and a negative error code in case of +- * failure. +- * +- * This function is also used for extended attribute entries because they are +- * implemented as directory entry nodes. ++ * This function inserts a scanned directory entry node or an extended ++ * attribute entry to the replay list. Returns zero in case of success and a ++ * negative error code in case of failure. + */ + static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, + union ubifs_key *key, const char *name, int nlen, + unsigned long long sqnum, int deletion, int *used) + { +- struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; + struct replay_entry *r; + char *nbuf; + ++ dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); + if (key_inum(c, key) >= c->highest_inum) + c->highest_inum = key_inum(c, key); + +- dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); +- while (*p) { +- parent = *p; +- r = rb_entry(parent, struct replay_entry, rb); +- if (sqnum < r->sqnum) { +- p = &(*p)->rb_left; +- continue; +- } +- if (sqnum > r->sqnum) { +- p = &(*p)->rb_right; +- continue; +- } +- ubifs_err("duplicate sqnum in replay"); +- return -EINVAL; +- } +- + r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); + if (!r) + return -ENOMEM; ++ + nbuf = kmalloc(nlen + 1, GFP_KERNEL); + if (!nbuf) { + kfree(r); +@@ -435,17 +428,15 @@ static int insert_dent(struct ubifs_info + r->lnum = lnum; + r->offs = offs; + r->len = len; ++ r->deletion = !!deletion; + r->sqnum = sqnum; ++ key_copy(c, key, &r->key); + r->nm.len = nlen; + memcpy(nbuf, name, nlen); + nbuf[nlen] = '\0'; + r->nm.name = nbuf; +- r->flags = (deletion ? REPLAY_DELETION : 0); +- key_copy(c, key, &r->key); + +- ubifs_assert(!*p); +- rb_link_node(&r->rb, parent, p); +- rb_insert_color(&r->rb, &c->replay_tree); ++ list_add_tail(&r->list, &c->replay_list); + return 0; + } + +@@ -482,31 +473,93 @@ int ubifs_validate_entry(struct ubifs_in + } + + /** ++ * is_last_bud - check if the bud is the last in the journal head. ++ * @c: UBIFS file-system description object ++ * @bud: bud description object ++ * ++ * This function checks if bud @bud is the last bud in its journal head. This ++ * information is then used by 'replay_bud()' to decide whether the bud can ++ * have corruptions or not. Indeed, only last buds can be corrupted by power ++ * cuts. Returns %1 if this is the last bud, and %0 if not. ++ */ ++static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) ++{ ++ struct ubifs_jhead *jh = &c->jheads[bud->jhead]; ++ struct ubifs_bud *next; ++ uint32_t data; ++ int err; ++ ++ if (list_is_last(&bud->list, &jh->buds_list)) ++ return 1; ++ ++ /* ++ * The following is a quirk to make sure we work correctly with UBIFS ++ * images used with older UBIFS. ++ * ++ * Normally, the last bud will be the last in the journal head's list ++ * of bud. However, there is one exception if the UBIFS image belongs ++ * to older UBIFS. This is fairly unlikely: one would need to use old ++ * UBIFS, then have a power cut exactly at the right point, and then ++ * try to mount this image with new UBIFS. ++ * ++ * The exception is: it is possible to have 2 buds A and B, A goes ++ * before B, and B is the last, bud B is contains no data, and bud A is ++ * corrupted at the end. The reason is that in older versions when the ++ * journal code switched the next bud (from A to B), it first added a ++ * log reference node for the new bud (B), and only after this it ++ * synchronized the write-buffer of current bud (A). But later this was ++ * changed and UBIFS started to always synchronize the write-buffer of ++ * the bud (A) before writing the log reference for the new bud (B). ++ * ++ * But because older UBIFS always synchronized A's write-buffer before ++ * writing to B, we can recognize this exceptional situation but ++ * checking the contents of bud B - if it is empty, then A can be ++ * treated as the last and we can recover it. ++ * ++ * TODO: remove this piece of code in a couple of years (today it is ++ * 16.05.2011). ++ */ ++ next = list_entry(bud->list.next, struct ubifs_bud, list); ++ if (!list_is_last(&next->list, &jh->buds_list)) ++ return 0; ++ ++ err = ubi_read(c->ubi, next->lnum, (char *)&data, ++ next->start, 4); ++ if (err) ++ return 0; ++ ++ return data == 0xFFFFFFFF; ++} ++ ++/** + * replay_bud - replay a bud logical eraseblock. + * @c: UBIFS file-system description object +- * @lnum: bud logical eraseblock number to replay +- * @offs: bud start offset +- * @jhead: journal head to which this bud belongs +- * @free: amount of free space in the bud is returned here +- * @dirty: amount of dirty space from padding and deletion nodes is returned +- * here ++ * @b: bud entry which describes the bud + * +- * This function returns zero in case of success and a negative error code in +- * case of failure. ++ * This function replays bud @bud, recovers it if needed, and adds all nodes ++ * from this bud to the replay list. Returns zero in case of success and a ++ * negative error code in case of failure. + */ +-static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, +- int *free, int *dirty) ++static int replay_bud(struct ubifs_info *c, struct bud_entry *b) + { +- int err = 0, used = 0; ++ int is_last = is_last_bud(c, b->bud); ++ int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start; + struct ubifs_scan_leb *sleb; + struct ubifs_scan_node *snod; +- struct ubifs_bud *bud; + +- dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); +- if (c->need_recovery) +- sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); ++ dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d", ++ lnum, b->bud->jhead, offs, is_last); ++ ++ if (c->need_recovery && is_last) ++ /* ++ * Recover only last LEBs in the journal heads, because power ++ * cuts may cause corruptions only in these LEBs, because only ++ * these LEBs could possibly be written to at the power cut ++ * time. ++ */ ++ sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead); + else +- sleb = ubifs_scan(c, lnum, offs, c->sbuf); ++ sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + +@@ -620,20 +673,13 @@ static int replay_bud(struct ubifs_info + goto out; + } + +- bud = ubifs_search_bud(c, lnum); +- if (!bud) +- BUG(); +- ++ ubifs_assert(ubifs_search_bud(c, lnum)); + ubifs_assert(sleb->endpt - offs >= used); + ubifs_assert(sleb->endpt % c->min_io_size == 0); + +- if (sleb->endpt + c->min_io_size <= c->leb_size && +- !(c->vfs_sb->s_flags & MS_RDONLY)) +- err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum, +- sleb->endpt, UBI_SHORTTERM); +- +- *dirty = sleb->endpt - offs - used; +- *free = c->leb_size - sleb->endpt; ++ b->dirty = sleb->endpt - offs - used; ++ b->free = c->leb_size - sleb->endpt; ++ dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, b->dirty, b->free); + + out: + ubifs_scan_destroy(sleb); +@@ -647,55 +693,6 @@ out_dump: + } + + /** +- * insert_ref_node - insert a reference node to the replay tree. +- * @c: UBIFS file-system description object +- * @lnum: node logical eraseblock number +- * @offs: node offset +- * @sqnum: sequence number +- * @free: amount of free space in bud +- * @dirty: amount of dirty space from padding and deletion nodes +- * +- * This function inserts a reference node to the replay tree and returns zero +- * in case of success or a negative error code in case of failure. +- */ +-static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, +- unsigned long long sqnum, int free, int dirty) +-{ +- struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; +- struct replay_entry *r; +- +- dbg_mnt("add ref LEB %d:%d", lnum, offs); +- while (*p) { +- parent = *p; +- r = rb_entry(parent, struct replay_entry, rb); +- if (sqnum < r->sqnum) { +- p = &(*p)->rb_left; +- continue; +- } else if (sqnum > r->sqnum) { +- p = &(*p)->rb_right; +- continue; +- } +- ubifs_err("duplicate sqnum in replay tree"); +- return -EINVAL; +- } +- +- r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); +- if (!r) +- return -ENOMEM; +- +- r->lnum = lnum; +- r->offs = offs; +- r->sqnum = sqnum; +- r->flags = REPLAY_REF; +- r->free = free; +- r->dirty = dirty; +- +- rb_link_node(&r->rb, parent, p); +- rb_insert_color(&r->rb, &c->replay_tree); +- return 0; +-} +- +-/** + * replay_buds - replay all buds. + * @c: UBIFS file-system description object + * +@@ -705,17 +702,16 @@ static int insert_ref_node(struct ubifs_ + static int replay_buds(struct ubifs_info *c) + { + struct bud_entry *b; +- int err, uninitialized_var(free), uninitialized_var(dirty); ++ int err; ++ unsigned long long prev_sqnum = 0; + + list_for_each_entry(b, &c->replay_buds, list) { +- err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, +- &free, &dirty); +- if (err) +- return err; +- err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, +- free, dirty); ++ err = replay_bud(c, b); + if (err) + return err; ++ ++ ubifs_assert(b->sqnum > prev_sqnum); ++ prev_sqnum = b->sqnum; + } + + return 0; +@@ -836,10 +832,16 @@ static int replay_log_leb(struct ubifs_i + const struct ubifs_cs_node *node; + + dbg_mnt("replay log LEB %d:%d", lnum, offs); +- sleb = ubifs_scan(c, lnum, offs, sbuf); ++ sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery); + if (IS_ERR(sleb)) { +- if (c->need_recovery) +- sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); ++ if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) ++ return PTR_ERR(sleb); ++ /* ++ * Note, the below function will recover this log LEB only if ++ * it is the last, because unclean reboots can possibly corrupt ++ * only the tail of the log. ++ */ ++ sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); + if (IS_ERR(sleb)) + return PTR_ERR(sleb); + } +@@ -850,7 +852,6 @@ static int replay_log_leb(struct ubifs_i + } + + node = sleb->buf; +- + snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); + if (c->cs_sqnum == 0) { + /* +@@ -897,7 +898,6 @@ static int replay_log_leb(struct ubifs_i + } + + list_for_each_entry(snod, &sleb->nodes, list) { +- + cond_resched(); + + if (snod->sqnum >= SQNUM_WATERMARK) { +@@ -1010,7 +1010,6 @@ out: + int ubifs_replay_journal(struct ubifs_info *c) + { + int err, i, lnum, offs, free; +- void *sbuf = NULL; + + BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); + +@@ -1025,14 +1024,8 @@ int ubifs_replay_journal(struct ubifs_in + return -EINVAL; + } + +- sbuf = vmalloc(c->leb_size); +- if (!sbuf) +- return -ENOMEM; +- + dbg_mnt("start replaying the journal"); +- + c->replaying = 1; +- + lnum = c->ltail_lnum = c->lhead_lnum; + offs = c->lhead_offs; + +@@ -1045,7 +1038,7 @@ int ubifs_replay_journal(struct ubifs_in + lnum = UBIFS_LOG_LNUM; + offs = 0; + } +- err = replay_log_leb(c, lnum, offs, sbuf); ++ err = replay_log_leb(c, lnum, offs, c->sbuf); + if (err == 1) + /* We hit the end of the log */ + break; +@@ -1058,27 +1051,30 @@ int ubifs_replay_journal(struct ubifs_in + if (err) + goto out; + +- err = apply_replay_tree(c); ++ err = apply_replay_list(c); ++ if (err) ++ goto out; ++ ++ err = set_buds_lprops(c); + if (err) + goto out; + + /* +- * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable +- * to roughly estimate index growth. Things like @c->min_idx_lebs ++ * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable ++ * to roughly estimate index growth. Things like @c->bi.min_idx_lebs + * depend on it. This means we have to initialize it to make sure + * budgeting works properly. + */ +- c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); +- c->budg_uncommitted_idx *= c->max_idx_node_sz; ++ c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); ++ c->bi.uncommitted_idx *= c->max_idx_node_sz; + + ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); + dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " + "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, + (unsigned long)c->highest_inum); + out: +- destroy_replay_tree(c); ++ destroy_replay_list(c); + destroy_bud_list(c); +- vfree(sbuf); + c->replaying = 0; + return err; + } +diff -uprN linux-2.6.28/fs/ubifs/sb.c ubifs-v2.6.28/fs/ubifs/sb.c +--- linux-2.6.28/fs/ubifs/sb.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/sb.c 2011-06-15 14:22:09.000000000 -0400 +@@ -181,12 +181,9 @@ static int create_default_filesystem(str + sup->lsave_cnt = cpu_to_le32(c->lsave_cnt); + sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION); + sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN); +- if (c->mount_opts.override_compr) { +- if (c->mount_opts.compr_type == UBIFS_COMPR_LZO999) +- sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO); +- else +- sup->default_compr = cpu_to_le16(c->mount_opts.compr_type); +- } else ++ if (c->mount_opts.override_compr) ++ sup->default_compr = cpu_to_le16(c->mount_opts.compr_type); ++ else + sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO); + + generate_random_uuid(sup->uuid); +@@ -196,6 +193,7 @@ static int create_default_filesystem(str + if (tmp64 > DEFAULT_MAX_RP_SIZE) + tmp64 = DEFAULT_MAX_RP_SIZE; + sup->rp_size = cpu_to_le64(tmp64); ++ sup->ro_compat_version = cpu_to_le32(UBIFS_RO_COMPAT_VERSION); + + err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM); + kfree(sup); +@@ -476,7 +474,8 @@ failed: + * @c: UBIFS file-system description object + * + * This function returns a pointer to the superblock node or a negative error +- * code. ++ * code. Note, the user of this function is responsible of kfree()'ing the ++ * returned superblock buffer. + */ + struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) + { +@@ -535,17 +534,36 @@ int ubifs_read_superblock(struct ubifs_i + if (IS_ERR(sup)) + return PTR_ERR(sup); + ++ c->fmt_version = le32_to_cpu(sup->fmt_version); ++ c->ro_compat_version = le32_to_cpu(sup->ro_compat_version); ++ + /* + * The software supports all previous versions but not future versions, + * due to the unavailability of time-travelling equipment. + */ +- c->fmt_version = le32_to_cpu(sup->fmt_version); + if (c->fmt_version > UBIFS_FORMAT_VERSION) { +- ubifs_err("on-flash format version is %d, but software only " +- "supports up to version %d", c->fmt_version, +- UBIFS_FORMAT_VERSION); +- err = -EINVAL; +- goto out; ++ ubifs_assert(!c->ro_media || c->ro_mount); ++ if (!c->ro_mount || ++ c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) { ++ ubifs_err("on-flash format version is w%d/r%d, but " ++ "software only supports up to version " ++ "w%d/r%d", c->fmt_version, ++ c->ro_compat_version, UBIFS_FORMAT_VERSION, ++ UBIFS_RO_COMPAT_VERSION); ++ if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) { ++ ubifs_msg("only R/O mounting is possible"); ++ err = -EROFS; ++ } else ++ err = -EINVAL; ++ goto out; ++ } ++ ++ /* ++ * The FS is mounted R/O, and the media format is ++ * R/O-compatible with the UBIFS implementation, so we can ++ * mount. ++ */ ++ c->rw_incompat = 1; + } + + if (c->fmt_version < 3) { +@@ -598,12 +616,13 @@ int ubifs_read_superblock(struct ubifs_i + c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); + memcpy(&c->uuid, &sup->uuid, 16); + c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); ++ c->space_fixup = !!(sup_flags & UBIFS_FLG_SPACE_FIXUP); + + /* Automatically increase file system size to the maximum size */ + c->old_leb_cnt = c->leb_cnt; + if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) { + c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size); +- if (c->vfs_sb->s_flags & MS_RDONLY) ++ if (c->ro_mount) + dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs", + c->old_leb_cnt, c->leb_cnt); + else { +@@ -626,10 +645,158 @@ int ubifs_read_superblock(struct ubifs_i + c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS; + c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs; + c->main_first = c->leb_cnt - c->main_lebs; +- c->report_rp_size = ubifs_reported_space(c, c->rp_size); + + err = validate_sb(c, sup); + out: + kfree(sup); + return err; + } ++ ++/** ++ * fixup_leb - fixup/unmap an LEB containing free space. ++ * @c: UBIFS file-system description object ++ * @lnum: the LEB number to fix up ++ * @len: number of used bytes in LEB (starting at offset 0) ++ * ++ * This function reads the contents of the given LEB number @lnum, then fixes ++ * it up, so that empty min. I/O units in the end of LEB are actually erased on ++ * flash (rather than being just all-0xff real data). If the LEB is completely ++ * empty, it is simply unmapped. ++ */ ++static int fixup_leb(struct ubifs_info *c, int lnum, int len) ++{ ++ int err; ++ ++ ubifs_assert(len >= 0); ++ ubifs_assert(len % c->min_io_size == 0); ++ ubifs_assert(len < c->leb_size); ++ ++ if (len == 0) { ++ dbg_mnt("unmap empty LEB %d", lnum); ++ return ubi_leb_unmap(c->ubi, lnum); ++ } ++ ++ dbg_mnt("fixup LEB %d, data len %d", lnum, len); ++ err = ubi_read(c->ubi, lnum, c->sbuf, 0, len); ++ if (err) ++ return err; ++ ++ return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); ++} ++ ++/** ++ * fixup_free_space - find & remap all LEBs containing free space. ++ * @c: UBIFS file-system description object ++ * ++ * This function walks through all LEBs in the filesystem and fiexes up those ++ * containing free/empty space. ++ */ ++static int fixup_free_space(struct ubifs_info *c) ++{ ++ int lnum, err = 0; ++ struct ubifs_lprops *lprops; ++ ++ ubifs_get_lprops(c); ++ ++ /* Fixup LEBs in the master area */ ++ for (lnum = UBIFS_MST_LNUM; lnum < UBIFS_LOG_LNUM; lnum++) { ++ err = fixup_leb(c, lnum, c->mst_offs + c->mst_node_alsz); ++ if (err) ++ goto out; ++ } ++ ++ /* Unmap unused log LEBs */ ++ lnum = ubifs_next_log_lnum(c, c->lhead_lnum); ++ while (lnum != c->ltail_lnum) { ++ err = fixup_leb(c, lnum, 0); ++ if (err) ++ goto out; ++ lnum = ubifs_next_log_lnum(c, lnum); ++ } ++ ++ /* Fixup the current log head */ ++ err = fixup_leb(c, c->lhead_lnum, c->lhead_offs); ++ if (err) ++ goto out; ++ ++ /* Fixup LEBs in the LPT area */ ++ for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) { ++ int free = c->ltab[lnum - c->lpt_first].free; ++ ++ if (free > 0) { ++ err = fixup_leb(c, lnum, c->leb_size - free); ++ if (err) ++ goto out; ++ } ++ } ++ ++ /* Unmap LEBs in the orphans area */ ++ for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { ++ err = fixup_leb(c, lnum, 0); ++ if (err) ++ goto out; ++ } ++ ++ /* Fixup LEBs in the main area */ ++ for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { ++ lprops = ubifs_lpt_lookup(c, lnum); ++ if (IS_ERR(lprops)) { ++ err = PTR_ERR(lprops); ++ goto out; ++ } ++ ++ if (lprops->free > 0) { ++ err = fixup_leb(c, lnum, c->leb_size - lprops->free); ++ if (err) ++ goto out; ++ } ++ } ++ ++out: ++ ubifs_release_lprops(c); ++ return err; ++} ++ ++/** ++ * ubifs_fixup_free_space - find & fix all LEBs with free space. ++ * @c: UBIFS file-system description object ++ * ++ * This function fixes up LEBs containing free space on first mount, if the ++ * appropriate flag was set when the FS was created. Each LEB with one or more ++ * empty min. I/O unit (i.e. free-space-count > 0) is re-written, to make sure ++ * the free space is actually erased. E.g., this is necessary for some NAND ++ * chips, since the free space may have been programmed like real "0xff" data ++ * (generating a non-0xff ECC), causing future writes to the not-really-erased ++ * NAND pages to behave badly. After the space is fixed up, the superblock flag ++ * is cleared, so that this is skipped for all future mounts. ++ */ ++int ubifs_fixup_free_space(struct ubifs_info *c) ++{ ++ int err; ++ struct ubifs_sb_node *sup; ++ ++ ubifs_assert(c->space_fixup); ++ ubifs_assert(!c->ro_mount); ++ ++ ubifs_msg("start fixing up free space"); ++ ++ err = fixup_free_space(c); ++ if (err) ++ return err; ++ ++ sup = ubifs_read_sb_node(c); ++ if (IS_ERR(sup)) ++ return PTR_ERR(sup); ++ ++ /* Free-space fixup is no longer required */ ++ c->space_fixup = 0; ++ sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP); ++ ++ err = ubifs_write_sb_node(c, sup); ++ kfree(sup); ++ if (err) ++ return err; ++ ++ ubifs_msg("free space fixup complete"); ++ return err; ++} +diff -uprN linux-2.6.28/fs/ubifs/scan.c ubifs-v2.6.28/fs/ubifs/scan.c +--- linux-2.6.28/fs/ubifs/scan.c 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/fs/ubifs/scan.c 2011-06-15 14:22:09.000000000 -0400 +@@ -108,10 +108,9 @@ int ubifs_scan_a_node(const struct ubifs + + /* Make the node pads to 8-byte boundary */ + if ((node_len + pad_len) & 7) { +- if (!quiet) { ++ if (!quiet) + dbg_err("bad padding length %d - %d", + offs, offs + node_len + pad_len); +- } + return SCANNED_A_BAD_PAD_NODE; + } + +@@ -198,7 +197,7 @@ int ubifs_add_snod(const struct ubifs_in + struct ubifs_ino_node *ino = buf; + struct ubifs_scan_node *snod; + +- snod = kzalloc(sizeof(struct ubifs_scan_node), GFP_NOFS); ++ snod = kmalloc(sizeof(struct ubifs_scan_node), GFP_NOFS); + if (!snod) + return -ENOMEM; + +@@ -213,13 +212,15 @@ int ubifs_add_snod(const struct ubifs_in + case UBIFS_DENT_NODE: + case UBIFS_XENT_NODE: + case UBIFS_DATA_NODE: +- case UBIFS_TRUN_NODE: + /* + * The key is in the same place in all keyed + * nodes. + */ + key_read(c, &ino->key, &snod->key); + break; ++ default: ++ invalid_key_init(c, &snod->key); ++ break; + } + list_add_tail(&snod->list, &sleb->nodes); + sleb->nodes_cnt += 1; +@@ -238,12 +239,12 @@ void ubifs_scanned_corruption(const stru + { + int len; + +- ubifs_err("corrupted data at LEB %d:%d", lnum, offs); ++ ubifs_err("corruption at LEB %d:%d", lnum, offs); + if (dbg_failure_mode) + return; + len = c->leb_size - offs; +- if (len > 4096) +- len = 4096; ++ if (len > 8192) ++ len = 8192; + dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs); + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1); + } +@@ -253,13 +254,19 @@ void ubifs_scanned_corruption(const stru + * @c: UBIFS file-system description object + * @lnum: logical eraseblock number + * @offs: offset to start at (usually zero) +- * @sbuf: scan buffer (must be c->leb_size) ++ * @sbuf: scan buffer (must be of @c->leb_size bytes in size) ++ * @quiet: print no messages + * + * This function scans LEB number @lnum and returns complete information about +- * its contents. Returns an error code in case of failure. ++ * its contents. Returns the scaned information in case of success and, ++ * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case ++ * of failure. ++ * ++ * If @quiet is non-zero, this function does not print large and scary ++ * error messages and flash dumps in case of errors. + */ + struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, +- int offs, void *sbuf) ++ int offs, void *sbuf, int quiet) + { + void *buf = sbuf + offs; + int err, len = c->leb_size - offs; +@@ -278,8 +285,7 @@ struct ubifs_scan_leb *ubifs_scan(const + + cond_resched(); + +- ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); +- ++ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); + if (ret > 0) { + /* Padding bytes or a valid padding node */ + offs += ret; +@@ -304,7 +310,8 @@ struct ubifs_scan_leb *ubifs_scan(const + goto corrupted; + default: + dbg_err("unknown"); +- goto corrupted; ++ err = -EINVAL; ++ goto error; + } + + err = ubifs_add_snod(c, sleb, buf, offs); +@@ -317,8 +324,12 @@ struct ubifs_scan_leb *ubifs_scan(const + len -= node_len; + } + +- if (offs % c->min_io_size) ++ if (offs % c->min_io_size) { ++ if (!quiet) ++ ubifs_err("empty space starts at non-aligned offset %d", ++ offs); + goto corrupted; ++ } + + ubifs_end_scan(c, sleb, lnum, offs); + +@@ -327,18 +338,25 @@ struct ubifs_scan_leb *ubifs_scan(const + break; + for (; len; offs++, buf++, len--) + if (*(uint8_t *)buf != 0xff) { +- ubifs_err("corrupt empty space at LEB %d:%d", +- lnum, offs); ++ if (!quiet) ++ ubifs_err("corrupt empty space at LEB %d:%d", ++ lnum, offs); + goto corrupted; + } + + return sleb; + + corrupted: +- ubifs_scanned_corruption(c, lnum, offs, buf); ++ if (!quiet) { ++ ubifs_scanned_corruption(c, lnum, offs, buf); ++ ubifs_err("LEB %d scanning failed", lnum); ++ } + err = -EUCLEAN; ++ ubifs_scan_destroy(sleb); ++ return ERR_PTR(err); ++ + error: +- ubifs_err("LEB %d scanning failed", lnum); ++ ubifs_err("LEB %d scanning failed, error %d", lnum, err); + ubifs_scan_destroy(sleb); + return ERR_PTR(err); + } +diff -uprN linux-2.6.28/fs/ubifs/shrinker.c ubifs-v2.6.28/fs/ubifs/shrinker.c +--- linux-2.6.28/fs/ubifs/shrinker.c 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/fs/ubifs/shrinker.c 2011-06-15 14:22:09.000000000 -0400 +@@ -151,7 +151,7 @@ static int shrink_tnc(struct ubifs_info + * @contention: if any contention, this is set to %1 + * + * This function walks the list of mounted UBIFS file-systems and frees clean +- * znodes which are older then @age, until at least @nr znodes are freed. ++ * znodes which are older than @age, until at least @nr znodes are freed. + * Returns the number of freed znodes. + */ + static int shrink_tnc_trees(int nr, int age, int *contention) +@@ -206,8 +206,7 @@ static int shrink_tnc_trees(int nr, int + * Move this one to the end of the list to provide some + * fairness. + */ +- list_del(&c->infos_list); +- list_add_tail(&c->infos_list, &ubifs_infos); ++ list_move_tail(&c->infos_list, &ubifs_infos); + mutex_unlock(&c->umount_mutex); + if (freed >= nr) + break; +@@ -251,7 +250,7 @@ static int kick_a_thread(void) + dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt); + + if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN || +- c->ro_media) { ++ c->ro_mount || c->ro_error) { + mutex_unlock(&c->umount_mutex); + continue; + } +@@ -263,8 +262,7 @@ static int kick_a_thread(void) + } + + if (i == 1) { +- list_del(&c->infos_list); +- list_add_tail(&c->infos_list, &ubifs_infos); ++ list_move_tail(&c->infos_list, &ubifs_infos); + spin_unlock(&ubifs_infos_lock); + + ubifs_request_bg_commit(c); +@@ -285,7 +283,11 @@ int ubifs_shrinker(int nr, gfp_t gfp_mas + long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); + + if (nr == 0) +- return clean_zn_cnt; ++ /* ++ * Due to the way UBIFS updates the clean znode counter it may ++ * temporarily be negative. ++ */ ++ return clean_zn_cnt >= 0 ? clean_zn_cnt : 1; + + if (!clean_zn_cnt) { + /* +diff -uprN linux-2.6.28/fs/ubifs/super.c ubifs-v2.6.28/fs/ubifs/super.c +--- linux-2.6.28/fs/ubifs/super.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/super.c 2011-06-15 14:22:09.000000000 -0400 +@@ -317,6 +317,8 @@ static int ubifs_write_inode(struct inod + if (err) + ubifs_err("can't write inode %lu, error %d", + inode->i_ino, err); ++ else ++ err = dbg_check_inode_size(c, inode, ui->ui_size); + } + + ui->dirty = 0; +@@ -362,7 +364,7 @@ out: + ubifs_release_dirty_inode_budget(c, ui); + else { + /* We've deleted something - clean the "no space" flags */ +- c->nospace = c->nospace_rp = 0; ++ c->bi.nospace = c->bi.nospace_rp = 0; + smp_wmb(); + } + clear_inode(inode); +@@ -426,8 +428,8 @@ static int ubifs_show_options(struct seq + seq_printf(s, ",no_chk_data_crc"); + + if (c->mount_opts.override_compr) { +- seq_printf(s, ",compr="); +- seq_printf(s, ubifs_compr_name(c->mount_opts.compr_type)); ++ seq_printf(s, ",compr=%s", ++ ubifs_compr_name(c->mount_opts.compr_type)); + } + + return 0; +@@ -511,9 +513,12 @@ static int init_constants_early(struct u + + c->leb_cnt = c->vi.size; + c->leb_size = c->vi.usable_leb_size; ++ c->leb_start = c->di.leb_start; + c->half_leb_size = c->leb_size / 2; + c->min_io_size = c->di.min_io_size; + c->min_io_shift = fls(c->min_io_size) - 1; ++ c->max_write_size = c->di.max_write_size; ++ c->max_write_shift = fls(c->max_write_size) - 1; + + if (c->leb_size < UBIFS_MIN_LEB_SZ) { + ubifs_err("too small LEBs (%d bytes), min. is %d bytes", +@@ -533,6 +538,18 @@ static int init_constants_early(struct u + } + + /* ++ * Maximum write size has to be greater or equivalent to min. I/O ++ * size, and be multiple of min. I/O size. ++ */ ++ if (c->max_write_size < c->min_io_size || ++ c->max_write_size % c->min_io_size || ++ !is_power_of_2(c->max_write_size)) { ++ ubifs_err("bad write buffer size %d for %d min. I/O unit", ++ c->max_write_size, c->min_io_size); ++ return -EINVAL; ++ } ++ ++ /* + * UBIFS aligns all node to 8-byte boundary, so to make function in + * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is + * less than 8. +@@ -540,6 +557,10 @@ static int init_constants_early(struct u + if (c->min_io_size < 8) { + c->min_io_size = 8; + c->min_io_shift = 3; ++ if (c->max_write_size < c->min_io_size) { ++ c->max_write_size = c->min_io_size; ++ c->max_write_shift = c->min_io_shift; ++ } + } + + c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size); +@@ -674,11 +695,11 @@ static int init_constants_sb(struct ubif + * be compressed and direntries are of the maximum size. + * + * Note, data, which may be stored in inodes is budgeted separately, so +- * it is not included into 'c->inode_budget'. ++ * it is not included into 'c->bi.inode_budget'. + */ +- c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; +- c->inode_budget = UBIFS_INO_NODE_SZ; +- c->dent_budget = UBIFS_MAX_DENT_NODE_SZ; ++ c->bi.page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; ++ c->bi.inode_budget = UBIFS_INO_NODE_SZ; ++ c->bi.dent_budget = UBIFS_MAX_DENT_NODE_SZ; + + /* + * When the amount of flash space used by buds becomes +@@ -705,6 +726,8 @@ static int init_constants_sb(struct ubif + if (err) + return err; + ++ /* Initialize effective LEB size used in budgeting calculations */ ++ c->idx_leb_size = c->leb_size - c->max_idx_node_sz; + return 0; + } + +@@ -720,7 +743,8 @@ static void init_constants_master(struct + { + long long tmp64; + +- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ c->report_rp_size = ubifs_reported_space(c, c->rp_size); + + /* + * Calculate total amount of FS blocks. This number is not used +@@ -788,15 +812,18 @@ static int alloc_wbufs(struct ubifs_info + + c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; + c->jheads[i].wbuf.jhead = i; ++ c->jheads[i].grouped = 1; + } + + c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; + /* + * Garbage Collector head likely contains long-term data and +- * does not need to be synchronized by timer. ++ * does not need to be synchronized by timer. Also GC head nodes are ++ * not grouped. + */ + c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; + c->jheads[GCHD].wbuf.no_timer = 1; ++ c->jheads[GCHD].grouped = 0; + + return 0; + } +@@ -937,6 +964,27 @@ static const match_table_t tokens = { + }; + + /** ++ * parse_standard_option - parse a standard mount option. ++ * @option: the option to parse ++ * ++ * Normally, standard mount options like "sync" are passed to file-systems as ++ * flags. However, when a "rootflags=" kernel boot parameter is used, they may ++ * be present in the options string. This function tries to deal with this ++ * situation and parse standard options. Returns 0 if the option was not ++ * recognized, and the corresponding integer flag if it was. ++ * ++ * UBIFS is only interested in the "sync" option, so do not check for anything ++ * else. ++ */ ++static int parse_standard_option(const char *option) ++{ ++ ubifs_msg("parse %s", option); ++ if (!strcmp(option, "sync")) ++ return MS_SYNCHRONOUS; ++ return 0; ++} ++ ++/** + * ubifs_parse_options - parse mount parameters. + * @c: UBIFS file-system description object + * @options: parameters to parse +@@ -1001,8 +1049,6 @@ static int ubifs_parse_options(struct ub + c->mount_opts.compr_type = UBIFS_COMPR_LZO; + else if (!strcmp(name, "zlib")) + c->mount_opts.compr_type = UBIFS_COMPR_ZLIB; +- else if (!strcmp(name, "lzo999")) +- c->mount_opts.compr_type = UBIFS_COMPR_LZO999; + else { + ubifs_err("unknown compressor \"%s\"", name); + kfree(name); +@@ -1014,9 +1060,19 @@ static int ubifs_parse_options(struct ub + break; + } + default: +- ubifs_err("unrecognized mount option \"%s\" " +- "or missing value", p); +- return -EINVAL; ++ { ++ unsigned long flag; ++ struct super_block *sb = c->vfs_sb; ++ ++ flag = parse_standard_option(p); ++ if (!flag) { ++ ubifs_err("unrecognized mount option \"%s\" " ++ "or missing value", p); ++ return -EINVAL; ++ } ++ sb->s_flags |= flag; ++ break; ++ } + } + } + +@@ -1092,8 +1148,8 @@ static int check_free_space(struct ubifs + { + ubifs_assert(c->dark_wm > 0); + if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { +- ubifs_err("insufficient free space to mount in read/write mode"); +- dbg_dump_budg(c); ++ ubifs_err("insufficient free space to mount in R/W mode"); ++ dbg_dump_budg(c, &c->bi); + dbg_dump_lprops(c); + return -ENOSPC; + } +@@ -1112,11 +1168,11 @@ static int check_free_space(struct ubifs + */ + static int mount_ubifs(struct ubifs_info *c) + { +- struct super_block *sb = c->vfs_sb; +- int err, mounted_read_only = (sb->s_flags & MS_RDONLY); ++ int err; + long long x; + size_t sz; + ++ c->ro_mount = !!(c->vfs_sb->s_flags & MS_RDONLY); + err = init_constants_early(c); + if (err) + return err; +@@ -1129,7 +1185,7 @@ static int mount_ubifs(struct ubifs_info + if (err) + goto out_free; + +- if (c->empty && (mounted_read_only || c->ro_media)) { ++ if (c->empty && (c->ro_mount || c->ro_media)) { + /* + * This UBI volume is empty, and read-only, or the file system + * is mounted read-only - we cannot format it. +@@ -1140,7 +1196,7 @@ static int mount_ubifs(struct ubifs_info + goto out_free; + } + +- if (c->ro_media && !mounted_read_only) { ++ if (c->ro_media && !c->ro_mount) { + ubifs_err("cannot mount read-write - read-only media"); + err = -EROFS; + goto out_free; +@@ -1160,7 +1216,7 @@ static int mount_ubifs(struct ubifs_info + if (!c->sbuf) + goto out_free; + +- if (!mounted_read_only) { ++ if (!c->ro_mount) { + c->ileb_buf = vmalloc(c->leb_size); + if (!c->ileb_buf) + goto out_free; +@@ -1169,11 +1225,14 @@ static int mount_ubifs(struct ubifs_info + if (c->bulk_read == 1) + bu_init(c); + +- /* +- * We have to check all CRCs, even for data nodes, when we mount the FS +- * (specifically, when we are replaying). +- */ +- c->always_chk_crc = 1; ++ if (!c->ro_mount) { ++ c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, ++ GFP_KERNEL); ++ if (!c->write_reserve_buf) ++ goto out_free; ++ } ++ ++ c->mounting = 1; + + err = ubifs_read_superblock(c); + if (err) +@@ -1186,6 +1245,7 @@ static int mount_ubifs(struct ubifs_info + if (!ubifs_compr_present(c->default_compr)) { + ubifs_err("'compressor \"%s\" is not compiled in", + ubifs_compr_name(c->default_compr)); ++ err = -ENOTSUPP; + goto out_free; + } + +@@ -1201,14 +1261,14 @@ static int mount_ubifs(struct ubifs_info + goto out_free; + } + +- sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); +- if (!mounted_read_only) { +- err = alloc_wbufs(c); +- if (err) +- goto out_cbuf; ++ err = alloc_wbufs(c); ++ if (err) ++ goto out_cbuf; + ++ sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); ++ if (!c->ro_mount) { + /* Create background thread */ +- c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); ++ c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); + if (IS_ERR(c->bgt)) { + err = PTR_ERR(c->bgt); + c->bgt = NULL; +@@ -1228,12 +1288,25 @@ static int mount_ubifs(struct ubifs_info + if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { + ubifs_msg("recovery needed"); + c->need_recovery = 1; +- if (!mounted_read_only) { +- err = ubifs_recover_inl_heads(c, c->sbuf); +- if (err) +- goto out_master; +- } +- } else if (!mounted_read_only) { ++ } ++ ++ if (c->need_recovery && !c->ro_mount) { ++ err = ubifs_recover_inl_heads(c, c->sbuf); ++ if (err) ++ goto out_master; ++ } ++ ++ err = ubifs_lpt_init(c, 1, !c->ro_mount); ++ if (err) ++ goto out_master; ++ ++ if (!c->ro_mount && c->space_fixup) { ++ err = ubifs_fixup_free_space(c); ++ if (err) ++ goto out_master; ++ } ++ ++ if (!c->ro_mount) { + /* + * Set the "dirty" flag so that if we reboot uncleanly we + * will notice this immediately on the next mount. +@@ -1241,14 +1314,10 @@ static int mount_ubifs(struct ubifs_info + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); + err = ubifs_write_master(c); + if (err) +- goto out_master; ++ goto out_lpt; + } + +- err = ubifs_lpt_init(c, 1, !mounted_read_only); +- if (err) +- goto out_lpt; +- +- err = dbg_check_idx_size(c, c->old_idx_sz); ++ err = dbg_check_idx_size(c, c->bi.old_idx_sz); + if (err) + goto out_lpt; + +@@ -1256,11 +1325,14 @@ static int mount_ubifs(struct ubifs_info + if (err) + goto out_journal; + +- err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only); ++ /* Calculate 'min_idx_lebs' after journal replay */ ++ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ ++ err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount); + if (err) + goto out_orphans; + +- if (!mounted_read_only) { ++ if (!c->ro_mount) { + int lnum; + + err = check_free_space(c); +@@ -1282,6 +1354,8 @@ static int mount_ubifs(struct ubifs_info + if (err) + goto out_orphans; + err = ubifs_rcvry_gc_commit(c); ++ if (err) ++ goto out_orphans; + } else { + err = take_gc_lnum(c); + if (err) +@@ -1293,7 +1367,7 @@ static int mount_ubifs(struct ubifs_info + */ + err = ubifs_leb_unmap(c, c->gc_lnum); + if (err) +- return err; ++ goto out_orphans; + } + + err = dbg_check_lprops(c); +@@ -1320,16 +1394,20 @@ static int mount_ubifs(struct ubifs_info + spin_unlock(&ubifs_infos_lock); + + if (c->need_recovery) { +- if (mounted_read_only) ++ if (c->ro_mount) + ubifs_msg("recovery deferred"); + else { + c->need_recovery = 0; + ubifs_msg("recovery completed"); +- /* GC LEB has to be empty and taken at this point */ +- ubifs_assert(c->lst.taken_empty_lebs == 1); ++ /* ++ * GC LEB has to be empty and taken at this point. But ++ * the journal head LEBs may also be accounted as ++ * "empty taken" if they are empty. ++ */ ++ ubifs_assert(c->lst.taken_empty_lebs > 0); + } + } else +- ubifs_assert(c->lst.taken_empty_lebs == 1); ++ ubifs_assert(c->lst.taken_empty_lebs > 0); + + err = dbg_check_filesystem(c); + if (err) +@@ -1339,11 +1417,11 @@ static int mount_ubifs(struct ubifs_info + if (err) + goto out_infos; + +- c->always_chk_crc = 0; ++ c->mounting = 0; + + ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", + c->vi.ubi_num, c->vi.vol_id, c->vi.name); +- if (mounted_read_only) ++ if (c->ro_mount) + ubifs_msg("mounted read-only"); + x = (long long)c->main_lebs * c->leb_size; + ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d " +@@ -1351,14 +1429,16 @@ static int mount_ubifs(struct ubifs_info + x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; + ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d " + "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); +- ubifs_msg("media format: %d (latest is %d)", +- c->fmt_version, UBIFS_FORMAT_VERSION); ++ ubifs_msg("media format: w%d/r%d (latest is w%d/r%d)", ++ c->fmt_version, c->ro_compat_version, ++ UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION); + ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); + ubifs_msg("reserved for root: %llu bytes (%llu KiB)", + c->report_rp_size, c->report_rp_size >> 10); + + dbg_msg("compiled on: " __DATE__ " at " __TIME__); + dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); ++ dbg_msg("max. write size: %d bytes", c->max_write_size); + dbg_msg("LEB size: %d bytes (%d KiB)", + c->leb_size, c->leb_size >> 10); + dbg_msg("data journal heads: %d", +@@ -1380,7 +1460,8 @@ static int mount_ubifs(struct ubifs_info + c->main_lebs, c->main_first, c->leb_cnt - 1); + dbg_msg("index LEBs: %d", c->lst.idx_lebs); + dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", +- c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20); ++ c->bi.old_idx_sz, c->bi.old_idx_sz >> 10, ++ c->bi.old_idx_sz >> 20); + dbg_msg("key hash type: %d", c->key_hash_type); + dbg_msg("tree fanout: %d", c->fanout); + dbg_msg("reserved GC LEB: %d", c->gc_lnum); +@@ -1393,9 +1474,9 @@ static int mount_ubifs(struct ubifs_info + UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ); + dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", + UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); +- dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu", +- UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, +- UBIFS_MAX_DENT_NODE_SZ); ++ dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d", ++ UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, ++ UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout)); + dbg_msg("dead watermark: %d", c->dead_wm); + dbg_msg("dark watermark: %d", c->dark_wm); + dbg_msg("LEB overhead: %d", c->leb_overhead); +@@ -1435,6 +1516,7 @@ out_wbufs: + out_cbuf: + kfree(c->cbuf); + out_free: ++ kfree(c->write_reserve_buf); + kfree(c->bu.buf); + vfree(c->ileb_buf); + vfree(c->sbuf); +@@ -1473,6 +1555,7 @@ static void ubifs_umount(struct ubifs_in + kfree(c->cbuf); + kfree(c->rcvrd_mst_node); + kfree(c->mst_node); ++ kfree(c->write_reserve_buf); + kfree(c->bu.buf); + vfree(c->ileb_buf); + vfree(c->sbuf); +@@ -1492,10 +1575,19 @@ static int ubifs_remount_rw(struct ubifs + { + int err, lnum; + ++ if (c->rw_incompat) { ++ ubifs_err("the file-system is not R/W-compatible"); ++ ubifs_msg("on-flash format version is w%d/r%d, but software " ++ "only supports up to version w%d/r%d", c->fmt_version, ++ c->ro_compat_version, UBIFS_FORMAT_VERSION, ++ UBIFS_RO_COMPAT_VERSION); ++ return -EROFS; ++ } ++ + mutex_lock(&c->umount_mutex); + dbg_save_space_info(c); + c->remounting_rw = 1; +- c->always_chk_crc = 1; ++ c->ro_mount = 0; + + err = check_free_space(c); + if (err) +@@ -1511,6 +1603,7 @@ static int ubifs_remount_rw(struct ubifs + } + sup->leb_cnt = cpu_to_le32(c->leb_cnt); + err = ubifs_write_sb_node(c, sup); ++ kfree(sup); + if (err) + goto out; + } +@@ -1550,18 +1643,16 @@ static int ubifs_remount_rw(struct ubifs + goto out; + } + +- err = ubifs_lpt_init(c, 0, 1); +- if (err) ++ c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL); ++ if (!c->write_reserve_buf) + goto out; + +- err = alloc_wbufs(c); ++ err = ubifs_lpt_init(c, 0, 1); + if (err) + goto out; + +- ubifs_create_buds_lists(c); +- + /* Create background thread */ +- c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); ++ c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); + if (IS_ERR(c->bgt)) { + err = PTR_ERR(c->bgt); + c->bgt = NULL; +@@ -1594,20 +1685,37 @@ static int ubifs_remount_rw(struct ubifs + if (err) + goto out; + ++ dbg_gen("re-mounted read-write"); ++ c->remounting_rw = 0; ++ + if (c->need_recovery) { + c->need_recovery = 0; + ubifs_msg("deferred recovery completed"); ++ } else { ++ /* ++ * Do not run the debugging space check if the were doing ++ * recovery, because when we saved the information we had the ++ * file-system in a state where the TNC and lprops has been ++ * modified in memory, but all the I/O operations (including a ++ * commit) were deferred. So the file-system was in ++ * "non-committed" state. Now the file-system is in committed ++ * state, and of course the amount of free space will change ++ * because, for example, the old index size was imprecise. ++ */ ++ err = dbg_check_space_info(c); ++ } ++ ++ if (c->space_fixup) { ++ err = ubifs_fixup_free_space(c); ++ if (err) ++ goto out; + } + +- dbg_gen("re-mounted read-write"); +- c->vfs_sb->s_flags &= ~MS_RDONLY; +- c->remounting_rw = 0; +- c->always_chk_crc = 0; +- err = dbg_check_space_info(c); + mutex_unlock(&c->umount_mutex); + return err; + + out: ++ c->ro_mount = 1; + vfree(c->orph_buf); + c->orph_buf = NULL; + if (c->bgt) { +@@ -1615,11 +1723,12 @@ out: + c->bgt = NULL; + } + free_wbufs(c); ++ kfree(c->write_reserve_buf); ++ c->write_reserve_buf = NULL; + vfree(c->ileb_buf); + c->ileb_buf = NULL; + ubifs_lpt_free(c, 1); + c->remounting_rw = 0; +- c->always_chk_crc = 0; + mutex_unlock(&c->umount_mutex); + return err; + } +@@ -1636,7 +1745,7 @@ static void ubifs_remount_ro(struct ubif + int i, err; + + ubifs_assert(!c->need_recovery); +- ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); ++ ubifs_assert(!c->ro_mount); + + mutex_lock(&c->umount_mutex); + if (c->bgt) { +@@ -1646,10 +1755,8 @@ static void ubifs_remount_ro(struct ubif + + dbg_save_space_info(c); + +- for (i = 0; i < c->jhead_cnt; i++) { ++ for (i = 0; i < c->jhead_cnt; i++) + ubifs_wbuf_sync(&c->jheads[i].wbuf); +- hrtimer_cancel(&c->jheads[i].wbuf.timer); +- } + + c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); +@@ -1658,12 +1765,14 @@ static void ubifs_remount_ro(struct ubif + if (err) + ubifs_ro_mode(c, err); + +- free_wbufs(c); + vfree(c->orph_buf); + c->orph_buf = NULL; ++ kfree(c->write_reserve_buf); ++ c->write_reserve_buf = NULL; + vfree(c->ileb_buf); + c->ileb_buf = NULL; + ubifs_lpt_free(c, 1); ++ c->ro_mount = 1; + err = dbg_check_space_info(c); + if (err) + ubifs_ro_mode(c, err); +@@ -1682,10 +1791,11 @@ static void ubifs_put_super(struct super + * of the media. For example, there will be dirty inodes if we failed + * to write them back because of I/O errors. + */ +- ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); +- ubifs_assert(c->budg_idx_growth == 0); +- ubifs_assert(c->budg_dd_growth == 0); +- ubifs_assert(c->budg_data_growth == 0); ++ if (!c->ro_error) { ++ ubifs_assert(c->bi.idx_growth == 0); ++ ubifs_assert(c->bi.dd_growth == 0); ++ ubifs_assert(c->bi.data_growth == 0); ++ } + + /* + * The 'c->umount_lock' prevents races between UBIFS memory shrinker +@@ -1694,7 +1804,7 @@ static void ubifs_put_super(struct super + * the mutex is locked. + */ + mutex_lock(&c->umount_mutex); +- if (!(c->vfs_sb->s_flags & MS_RDONLY)) { ++ if (!c->ro_mount) { + /* + * First of all kill the background thread to make sure it does + * not interfere with un-mounting and freeing resources. +@@ -1704,23 +1814,22 @@ static void ubifs_put_super(struct super + c->bgt = NULL; + } + +- /* Synchronize write-buffers */ +- if (c->jheads) +- for (i = 0; i < c->jhead_cnt; i++) +- ubifs_wbuf_sync(&c->jheads[i].wbuf); +- + /* +- * On fatal errors c->ro_media is set to 1, in which case we do ++ * On fatal errors c->ro_error is set to 1, in which case we do + * not write the master node. + */ +- if (!c->ro_media) { ++ if (!c->ro_error) { ++ int err; ++ ++ /* Synchronize write-buffers */ ++ for (i = 0; i < c->jhead_cnt; i++) ++ ubifs_wbuf_sync(&c->jheads[i].wbuf); ++ + /* + * We are being cleanly unmounted which means the + * orphans were killed - indicate this in the master + * node. Also save the reserved GC LEB number. + */ +- int err; +- + c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); + c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); +@@ -1733,6 +1842,10 @@ static void ubifs_put_super(struct super + */ + ubifs_err("failed to write master node, " + "error %d", err); ++ } else { ++ for (i = 0; i < c->jhead_cnt; i++) ++ /* Make sure write-buffer timers are canceled */ ++ hrtimer_cancel(&c->jheads[i].wbuf.timer); + } + } + +@@ -1756,17 +1869,21 @@ static int ubifs_remount_fs(struct super + return err; + } + +- if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { ++ if (c->ro_mount && !(*flags & MS_RDONLY)) { ++ if (c->ro_error) { ++ ubifs_msg("cannot re-mount R/W due to prior errors"); ++ return -EROFS; ++ } + if (c->ro_media) { +- ubifs_msg("cannot re-mount due to prior errors"); ++ ubifs_msg("cannot re-mount R/W - UBI volume is R/O"); + return -EROFS; + } + err = ubifs_remount_rw(c); + if (err) + return err; +- } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { +- if (c->ro_media) { +- ubifs_msg("cannot re-mount due to prior errors"); ++ } else if (!c->ro_mount && (*flags & MS_RDONLY)) { ++ if (c->ro_error) { ++ ubifs_msg("cannot re-mount R/O due to prior errors"); + return -EROFS; + } + ubifs_remount_ro(c); +@@ -1780,7 +1897,7 @@ static int ubifs_remount_fs(struct super + c->bu.buf = NULL; + } + +- ubifs_assert(c->lst.taken_empty_lebs == 1); ++ ubifs_assert(c->lst.taken_empty_lebs > 0); + return 0; + } + +@@ -1802,22 +1919,32 @@ const struct super_operations ubifs_supe + * @name: UBI volume name + * @mode: UBI volume open mode + * +- * There are several ways to specify UBI volumes when mounting UBIFS: +- * o ubiX_Y - UBI device number X, volume Y; +- * o ubiY - UBI device number 0, volume Y; ++ * The primary method of mounting UBIFS is by specifying the UBI volume ++ * character device node path. However, UBIFS may also be mounted withoug any ++ * character device node using one of the following methods: ++ * ++ * o ubiX_Y - mount UBI device number X, volume Y; ++ * o ubiY - mount UBI device number 0, volume Y; + * o ubiX:NAME - mount UBI device X, volume with name NAME; + * o ubi:NAME - mount UBI device 0, volume with name NAME. + * + * Alternative '!' separator may be used instead of ':' (because some shells + * like busybox may interpret ':' as an NFS host name separator). This function +- * returns ubi volume object in case of success and a negative error code in +- * case of failure. ++ * returns UBI volume description object in case of success and a negative ++ * error code in case of failure. + */ + static struct ubi_volume_desc *open_ubi(const char *name, int mode) + { ++ struct ubi_volume_desc *ubi; + int dev, vol; + char *endptr; + ++ /* First, try to open using the device node path method */ ++ ubi = ubi_open_volume_path(name, mode); ++ if (!IS_ERR(ubi)) ++ return ubi; ++ ++ /* Try the "nodev" method */ + if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i') + return ERR_PTR(-EINVAL); + +@@ -1872,6 +1999,7 @@ static int ubifs_fill_super(struct super + mutex_init(&c->mst_mutex); + mutex_init(&c->umount_mutex); + mutex_init(&c->bu_mutex); ++ mutex_init(&c->write_reserve_mutex); + init_waitqueue_head(&c->cmt_wq); + c->buds = RB_ROOT; + c->old_idx = RB_ROOT; +@@ -1889,7 +2017,9 @@ static int ubifs_fill_super(struct super + INIT_LIST_HEAD(&c->old_buds); + INIT_LIST_HEAD(&c->orph_list); + INIT_LIST_HEAD(&c->orph_new); ++ c->no_chk_data_crc = 1; + ++ c->vfs_sb = sb; + c->highest_inum = UBIFS_FIRST_INO; + c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; + +@@ -1916,18 +2046,19 @@ static int ubifs_fill_super(struct super + err = bdi_init(&c->bdi); + if (err) + goto out_close; ++ err = bdi_register(&c->bdi, NULL, "ubifs_%d_%d", ++ c->vi.ubi_num, c->vi.vol_id); ++ if (err) ++ goto out_bdi; + + err = ubifs_parse_options(c, data, 0); + if (err) + goto out_bdi; + +- c->vfs_sb = sb; +- + sb->s_fs_info = c; + sb->s_magic = UBIFS_SUPER_MAGIC; + sb->s_blocksize = UBIFS_BLOCK_SIZE; + sb->s_blocksize_bits = UBIFS_BLOCK_SHIFT; +- sb->s_dev = c->vi.cdev; + sb->s_maxbytes = c->max_inode_sz = key_max_inode_size(c); + if (c->max_inode_sz > MAX_LFS_FILESIZE) + sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE; +@@ -1972,16 +2103,9 @@ out_free: + static int sb_test(struct super_block *sb, void *data) + { + dev_t *dev = data; ++ struct ubifs_info *c = sb->s_fs_info; + +- return sb->s_dev == *dev; +-} +- +-static int sb_set(struct super_block *sb, void *data) +-{ +- dev_t *dev = data; +- +- sb->s_dev = *dev; +- return 0; ++ return c->vi.cdev == *dev; + } + + static int ubifs_get_sb(struct file_system_type *fs_type, int flags, +@@ -2001,24 +2125,26 @@ static int ubifs_get_sb(struct file_syst + */ + ubi = open_ubi(name, UBI_READONLY); + if (IS_ERR(ubi)) { +- ubifs_err("cannot open \"%s\", error %d", +- name, (int)PTR_ERR(ubi)); ++ dbg_err("cannot open \"%s\", error %d", ++ name, (int)PTR_ERR(ubi)); + return PTR_ERR(ubi); + } + ubi_get_volume_info(ubi, &vi); + + dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id); + +- sb = sget(fs_type, &sb_test, &sb_set, &vi.cdev); ++ sb = sget(fs_type, &sb_test, &set_anon_super, &vi.cdev); + if (IS_ERR(sb)) { + err = PTR_ERR(sb); + goto out_close; + } + + if (sb->s_root) { ++ struct ubifs_info *c1 = sb->s_fs_info; ++ + /* A new mount point for already mounted UBIFS */ + dbg_gen("this ubi volume is already mounted"); +- if ((flags ^ sb->s_flags) & MS_RDONLY) { ++ if (!!(flags & MS_RDONLY) != c1->ro_mount) { + err = -EBUSY; + goto out_deact; + } +@@ -2049,16 +2175,11 @@ out_close: + return err; + } + +-static void ubifs_kill_sb(struct super_block *sb) +-{ +- generic_shutdown_super(sb); +-} +- + static struct file_system_type ubifs_fs_type = { + .name = "ubifs", + .owner = THIS_MODULE, + .get_sb = ubifs_get_sb, +- .kill_sb = ubifs_kill_sb ++ .kill_sb = kill_anon_super, + }; + + /* +diff -uprN linux-2.6.28/fs/ubifs/tnc.c ubifs-v2.6.28/fs/ubifs/tnc.c +--- linux-2.6.28/fs/ubifs/tnc.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/tnc.c 2011-06-15 14:22:09.000000000 -0400 +@@ -446,8 +446,11 @@ static int tnc_read_node_nm(struct ubifs + * + * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc + * is true (it is controlled by corresponding mount option). However, if +- * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always +- * checked. ++ * @c->mounting or @c->remounting_rw is true (we are mounting or re-mounting to ++ * R/W mode), @c->no_chk_data_crc is ignored and CRC is checked. This is ++ * because during mounting or re-mounting from R/O mode to R/W mode we may read ++ * journal nodes (when replying the journal or doing the recovery) and the ++ * journal nodes may potentially be corrupted, so checking is required. + */ + static int try_read_node(const struct ubifs_info *c, void *buf, int type, + int len, int lnum, int offs) +@@ -475,7 +478,8 @@ static int try_read_node(const struct ub + if (node_len != len) + return 0; + +- if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc) ++ if (type == UBIFS_DATA_NODE && c->no_chk_data_crc && !c->mounting && ++ !c->remounting_rw) + return 1; + + crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); +@@ -1159,8 +1163,8 @@ static struct ubifs_znode *dirty_cow_bot + * o exact match, i.e. the found zero-level znode contains key @key, then %1 + * is returned and slot number of the matched branch is stored in @n; + * o not exact match, which means that zero-level znode does not contain +- * @key, then %0 is returned and slot number of the closed branch is stored +- * in @n; ++ * @key, then %0 is returned and slot number of the closest branch is stored ++ * in @n; + * o @key is so small that it is even less than the lowest key of the + * leftmost zero-level node, then %0 is returned and %0 is stored in @n. + * +@@ -1176,6 +1180,7 @@ int ubifs_lookup_level0(struct ubifs_inf + unsigned long time = get_seconds(); + + dbg_tnc("search key %s", DBGKEY(key)); ++ ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY); + + znode = c->zroot.znode; + if (unlikely(!znode)) { +@@ -1252,7 +1257,7 @@ int ubifs_lookup_level0(struct ubifs_inf + * splitting in the middle of the colliding sequence. Also, when + * removing the leftmost key, we would have to correct the key of the + * parent node, which would introduce additional complications. Namely, +- * if we changed the the leftmost key of the parent znode, the garbage ++ * if we changed the leftmost key of the parent znode, the garbage + * collector would be unable to find it (GC is doing this when GC'ing + * indexing LEBs). Although we already have an additional RB-tree where + * we save such changed znodes (see 'ins_clr_old_idx_znode()') until +@@ -1433,7 +1438,7 @@ static int maybe_leb_gced(struct ubifs_i + * @lnum: LEB number is returned here + * @offs: offset is returned here + * +- * This function look up and reads node with key @key. The caller has to make ++ * This function looks up and reads node with key @key. The caller has to make + * sure the @node buffer is large enough to fit the node. Returns zero in case + * of success, %-ENOENT if the node was not found, and a negative error code in + * case of failure. The node location can be returned in @lnum and @offs. +@@ -2551,11 +2556,11 @@ int ubifs_tnc_remove_nm(struct ubifs_inf + if (err) { + /* Ensure the znode is dirtied */ + if (znode->cnext || !ubifs_zn_dirty(znode)) { +- znode = dirty_cow_bottom_up(c, znode); +- if (IS_ERR(znode)) { +- err = PTR_ERR(znode); +- goto out_unlock; +- } ++ znode = dirty_cow_bottom_up(c, znode); ++ if (IS_ERR(znode)) { ++ err = PTR_ERR(znode); ++ goto out_unlock; ++ } + } + err = tnc_delete(c, znode, n); + } +@@ -2870,12 +2875,13 @@ static void tnc_destroy_cnext(struct ubi + */ + void ubifs_tnc_close(struct ubifs_info *c) + { +- long clean_freed; +- + tnc_destroy_cnext(c); + if (c->zroot.znode) { +- clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode); +- atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt); ++ long n; ++ ++ ubifs_destroy_tnc_subtree(c->zroot.znode); ++ n = atomic_long_read(&c->clean_zn_cnt); ++ atomic_long_sub(n, &ubifs_clean_zn_cnt); + } + kfree(c->gap_lebs); + kfree(c->ilebs); +@@ -2965,7 +2971,7 @@ static struct ubifs_znode *right_znode(s + * + * This function searches an indexing node by its first key @key and its + * address @lnum:@offs. It looks up the indexing tree by pulling all indexing +- * nodes it traverses to TNC. This function is called fro indexing nodes which ++ * nodes it traverses to TNC. This function is called for indexing nodes which + * were found on the media by scanning, for example when garbage-collecting or + * when doing in-the-gaps commit. This means that the indexing node which is + * looked for does not have to have exactly the same leftmost key @key, because +@@ -2987,6 +2993,8 @@ static struct ubifs_znode *lookup_znode( + struct ubifs_znode *znode, *zn; + int n, nn; + ++ ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY); ++ + /* + * The arguments have probably been read off flash, so don't assume + * they are valid. +@@ -3268,3 +3276,73 @@ out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; + } ++ ++#ifdef CONFIG_UBIFS_FS_DEBUG ++ ++/** ++ * dbg_check_inode_size - check if inode size is correct. ++ * @c: UBIFS file-system description object ++ * @inum: inode number ++ * @size: inode size ++ * ++ * This function makes sure that the inode size (@size) is correct and it does ++ * not have any pages beyond @size. Returns zero if the inode is OK, %-EINVAL ++ * if it has a data page beyond @size, and other negative error code in case of ++ * other errors. ++ */ ++int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, ++ loff_t size) ++{ ++ int err, n; ++ union ubifs_key from_key, to_key, *key; ++ struct ubifs_znode *znode; ++ unsigned int block; ++ ++ if (!S_ISREG(inode->i_mode)) ++ return 0; ++ if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) ++ return 0; ++ ++ block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; ++ data_key_init(c, &from_key, inode->i_ino, block); ++ highest_data_key(c, &to_key, inode->i_ino); ++ ++ mutex_lock(&c->tnc_mutex); ++ err = ubifs_lookup_level0(c, &from_key, &znode, &n); ++ if (err < 0) ++ goto out_unlock; ++ ++ if (err) { ++ err = -EINVAL; ++ key = &from_key; ++ goto out_dump; ++ } ++ ++ err = tnc_next(c, &znode, &n); ++ if (err == -ENOENT) { ++ err = 0; ++ goto out_unlock; ++ } ++ if (err < 0) ++ goto out_unlock; ++ ++ ubifs_assert(err == 0); ++ key = &znode->zbranch[n].key; ++ if (!key_in_range(c, key, &from_key, &to_key)) ++ goto out_unlock; ++ ++out_dump: ++ block = key_block(c, key); ++ ubifs_err("inode %lu has size %lld, but there are data at offset %lld " ++ "(data key %s)", (unsigned long)inode->i_ino, size, ++ ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key)); ++ dbg_dump_inode(c, inode); ++ dbg_dump_stack(); ++ err = -EINVAL; ++ ++out_unlock: ++ mutex_unlock(&c->tnc_mutex); ++ return err; ++} ++ ++#endif /* CONFIG_UBIFS_FS_DEBUG */ +diff -uprN linux-2.6.28/fs/ubifs/tnc_commit.c ubifs-v2.6.28/fs/ubifs/tnc_commit.c +--- linux-2.6.28/fs/ubifs/tnc_commit.c 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/tnc_commit.c 2011-06-15 14:22:09.000000000 -0400 +@@ -245,7 +245,7 @@ static int layout_leb_in_gaps(struct ubi + * it is more comprehensive and less efficient than is needed for this + * purpose. + */ +- sleb = ubifs_scan(c, lnum, 0, c->ileb_buf); ++ sleb = ubifs_scan(c, lnum, 0, c->ileb_buf, 0); + c->ileb_len = 0; + if (IS_ERR(sleb)) + return PTR_ERR(sleb); +@@ -377,15 +377,13 @@ static int layout_in_gaps(struct ubifs_i + c->gap_lebs = NULL; + return err; + } +- if (!dbg_force_in_the_gaps_enabled) { ++ if (dbg_force_in_the_gaps_enabled()) { + /* + * Do not print scary warnings if the debugging + * option which forces in-the-gaps is enabled. + */ +- ubifs_err("out of space"); +- spin_lock(&c->space_lock); +- dbg_dump_budg(c); +- spin_unlock(&c->space_lock); ++ ubifs_warn("out of space"); ++ dbg_dump_budg(c, &c->bi); + dbg_dump_lprops(c); + } + /* Try to commit anyway */ +@@ -796,16 +794,16 @@ int ubifs_tnc_start_commit(struct ubifs_ + spin_lock(&c->space_lock); + /* + * Although we have not finished committing yet, update size of the +- * committed index ('c->old_idx_sz') and zero out the index growth ++ * committed index ('c->bi.old_idx_sz') and zero out the index growth + * budget. It is OK to do this now, because we've reserved all the + * space which is needed to commit the index, and it is save for the + * budgeting subsystem to assume the index is already committed, + * even though it is not. + */ +- ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); +- c->old_idx_sz = c->calc_idx_sz; +- c->budg_uncommitted_idx = 0; +- c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); ++ ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c)); ++ c->bi.old_idx_sz = c->calc_idx_sz; ++ c->bi.uncommitted_idx = 0; ++ c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); + spin_unlock(&c->space_lock); + mutex_unlock(&c->tnc_mutex); + +diff -uprN linux-2.6.28/fs/ubifs/ubifs.h ubifs-v2.6.28/fs/ubifs/ubifs.h +--- linux-2.6.28/fs/ubifs/ubifs.h 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/ubifs.h 2011-06-15 14:22:09.000000000 -0400 +@@ -105,12 +105,10 @@ + /* Number of non-data journal heads */ + #define NONDATA_JHEADS_CNT 2 + +-/* Garbage collector head */ +-#define GCHD 0 +-/* Base journal head number */ +-#define BASEHD 1 +-/* First "general purpose" journal head */ +-#define DATAHD 2 ++/* Shorter names for journal head numbers for internal usage */ ++#define GCHD UBIFS_GC_HEAD ++#define BASEHD UBIFS_BASE_HEAD ++#define DATAHD UBIFS_DATA_HEAD + + /* 'No change' value for 'ubifs_change_lp()' */ + #define LPROPS_NC 0x80000001 +@@ -120,8 +118,12 @@ + * in TNC. However, when replaying, it is handy to introduce fake "truncation" + * keys for truncation nodes because the code becomes simpler. So we define + * %UBIFS_TRUN_KEY type. ++ * ++ * But otherwise, out of the journal reply scope, the truncation keys are ++ * invalid. + */ +-#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT ++#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT ++#define UBIFS_INVALID_KEY UBIFS_KEY_TYPES_CNT + + /* + * How much a directory entry/extended attribute entry adds to the parent/host +@@ -148,6 +150,12 @@ + */ + #define WORST_COMPR_FACTOR 2 + ++/* ++ * How much memory is needed for a buffer where we comress a data node. ++ */ ++#define COMPRESSED_DATA_NODE_BUF_SZ \ ++ (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR) ++ + /* Maximum expected tree height for use by bottom_up_buf */ + #define BOTTOM_UP_HEIGHT 64 + +@@ -643,6 +651,7 @@ typedef int (*ubifs_lpt_scan_callback)(s + * @offs: write-buffer offset in this logical eraseblock + * @avail: number of bytes available in the write-buffer + * @used: number of used bytes in the write-buffer ++ * @size: write-buffer size (in [@c->min_io_size, @c->max_write_size] range) + * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM, + * %UBI_UNKNOWN) + * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep +@@ -677,6 +686,7 @@ struct ubifs_wbuf { + int offs; + int avail; + int used; ++ int size; + int dtype; + int jhead; + int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad); +@@ -711,12 +721,14 @@ struct ubifs_bud { + * struct ubifs_jhead - journal head. + * @wbuf: head's write-buffer + * @buds_list: list of bud LEBs belonging to this journal head ++ * @grouped: non-zero if UBIFS groups nodes when writing to this journal head + * + * Note, the @buds list is protected by the @c->buds_lock. + */ + struct ubifs_jhead { + struct ubifs_wbuf wbuf; + struct list_head buds_list; ++ unsigned int grouped:1; + }; + + /** +@@ -926,6 +938,40 @@ struct ubifs_mount_opts { + unsigned int compr_type:2; + }; + ++/** ++ * struct ubifs_budg_info - UBIFS budgeting information. ++ * @idx_growth: amount of bytes budgeted for index growth ++ * @data_growth: amount of bytes budgeted for cached data ++ * @dd_growth: amount of bytes budgeted for cached data that will make ++ * other data dirty ++ * @uncommitted_idx: amount of bytes were budgeted for growth of the index, but ++ * which still have to be taken into account because the index ++ * has not been committed so far ++ * @old_idx_sz: size of index on flash ++ * @min_idx_lebs: minimum number of LEBs required for the index ++ * @nospace: non-zero if the file-system does not have flash space (used as ++ * optimization) ++ * @nospace_rp: the same as @nospace, but additionally means that even reserved ++ * pool is full ++ * @page_budget: budget for a page (constant, nenver changed after mount) ++ * @inode_budget: budget for an inode (constant, nenver changed after mount) ++ * @dent_budget: budget for a directory entry (constant, nenver changed after ++ * mount) ++ */ ++struct ubifs_budg_info { ++ long long idx_growth; ++ long long data_growth; ++ long long dd_growth; ++ long long uncommitted_idx; ++ unsigned long long old_idx_sz; ++ int min_idx_lebs; ++ unsigned int nospace:1; ++ unsigned int nospace_rp:1; ++ int page_budget; ++ int inode_budget; ++ int dent_budget; ++}; ++ + struct ubifs_debug_info; + + /** +@@ -940,6 +986,7 @@ struct ubifs_debug_info; + * by @commit_sem + * @cnt_lock: protects @highest_inum and @max_sqnum counters + * @fmt_version: UBIFS on-flash format version ++ * @ro_compat_version: R/O compatibility version + * @uuid: UUID from super block + * + * @lhead_lnum: log head logical eraseblock number +@@ -968,10 +1015,12 @@ struct ubifs_debug_info; + * @cmt_wq: wait queue to sleep on if the log is full and a commit is running + * + * @big_lpt: flag that LPT is too big to write whole during commit ++ * @space_fixup: flag indicating that free space in LEBs needs to be cleaned up + * @no_chk_data_crc: do not check CRCs when reading data nodes (except during + * recovery) + * @bulk_read: enable bulk-reads + * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc) ++ * @rw_incompat: the media is not R/W compatible + * + * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and + * @calc_idx_sz +@@ -998,6 +1047,11 @@ struct ubifs_debug_info; + * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu + * @bu: pre-allocated bulk-read information + * ++ * @write_reserve_mutex: protects @write_reserve_buf ++ * @write_reserve_buf: on the write path we allocate memory, which might ++ * sometimes be unavailable, in which case we use this ++ * write reserve buffer ++ * + * @log_lebs: number of logical eraseblocks in the log + * @log_bytes: log size in bytes + * @log_last: last LEB of the log +@@ -1019,43 +1073,34 @@ struct ubifs_debug_info; + * + * @min_io_size: minimal input/output unit size + * @min_io_shift: number of bits in @min_io_size minus one ++ * @max_write_size: maximum amount of bytes the underlying flash can write at a ++ * time (MTD write buffer size) ++ * @max_write_shift: number of bits in @max_write_size minus one + * @leb_size: logical eraseblock size in bytes ++ * @leb_start: starting offset of logical eraseblocks within physical ++ * eraseblocks + * @half_leb_size: half LEB size ++ * @idx_leb_size: how many bytes of an LEB are effectively available when it is ++ * used to store indexing nodes (@leb_size - @max_idx_node_sz) + * @leb_cnt: count of logical eraseblocks + * @max_leb_cnt: maximum count of logical eraseblocks + * @old_leb_cnt: count of logical eraseblocks before re-size + * @ro_media: the underlying UBI volume is read-only ++ * @ro_mount: the file-system was mounted as read-only ++ * @ro_error: UBIFS switched to R/O mode because an error happened + * + * @dirty_pg_cnt: number of dirty pages (not used) + * @dirty_zn_cnt: number of dirty znodes + * @clean_zn_cnt: number of clean znodes + * +- * @budg_idx_growth: amount of bytes budgeted for index growth +- * @budg_data_growth: amount of bytes budgeted for cached data +- * @budg_dd_growth: amount of bytes budgeted for cached data that will make +- * other data dirty +- * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index, +- * but which still have to be taken into account because +- * the index has not been committed so far +- * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, +- * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst, +- * @nospace, and @nospace_rp; +- * @min_idx_lebs: minimum number of LEBs required for the index +- * @old_idx_sz: size of index on flash ++ * @space_lock: protects @bi and @lst ++ * @lst: lprops statistics ++ * @bi: budgeting information + * @calc_idx_sz: temporary variable which is used to calculate new index size + * (contains accurate new index size at end of TNC commit start) +- * @lst: lprops statistics +- * @nospace: non-zero if the file-system does not have flash space (used as +- * optimization) +- * @nospace_rp: the same as @nospace, but additionally means that even reserved +- * pool is full +- * +- * @page_budget: budget for a page +- * @inode_budget: budget for an inode +- * @dent_budget: budget for a directory entry + * + * @ref_node_alsz: size of the LEB reference node aligned to the min. flash +- * I/O unit ++ * I/O unit + * @mst_node_alsz: master node aligned size + * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary + * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary +@@ -1138,8 +1183,8 @@ struct ubifs_debug_info; + * previous commit start + * @uncat_list: list of un-categorized LEBs + * @empty_list: list of empty LEBs +- * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size) +- * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size) ++ * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size) ++ * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size) + * @freeable_cnt: number of freeable LEBs in @freeable_list + * + * @ltab_lnum: LEB number of LPT's own lprops table +@@ -1157,19 +1202,20 @@ struct ubifs_debug_info; + * @rp_uid: reserved pool user ID + * @rp_gid: reserved pool group ID + * +- * @empty: if the UBI device is empty +- * @replay_tree: temporary tree used during journal replay ++ * @empty: %1 if the UBI device is empty ++ * @need_recovery: %1 if the file-system needs recovery ++ * @replaying: %1 during journal replay ++ * @mounting: %1 while mounting ++ * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode + * @replay_list: temporary list used during journal replay + * @replay_buds: list of buds to replay + * @cs_sqnum: sequence number of first node in the log (commit start node) + * @replay_sqnum: sequence number of node currently being replayed +- * @need_recovery: file-system needs recovery +- * @replaying: set to %1 during journal replay +- * @unclean_leb_list: LEBs to recover when mounting ro to rw +- * @rcvrd_mst_node: recovered master node to write when mounting ro to rw ++ * @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W ++ * mode ++ * @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted ++ * FS to R/W mode + * @size_tree: inode size information for recovery +- * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY) +- * @always_chk_crc: always check CRCs (while mounting and remounting rw) + * @mount_opts: UBIFS-specific mount options + * + * @dbg: debugging-related information +@@ -1183,6 +1229,7 @@ struct ubifs_info { + unsigned long long cmt_no; + spinlock_t cnt_lock; + int fmt_version; ++ int ro_compat_version; + unsigned char uuid[16]; + + int lhead_lnum; +@@ -1208,9 +1255,11 @@ struct ubifs_info { + wait_queue_head_t cmt_wq; + + unsigned int big_lpt:1; ++ unsigned int space_fixup:1; + unsigned int no_chk_data_crc:1; + unsigned int bulk_read:1; + unsigned int default_compr:2; ++ unsigned int rw_incompat:1; + + struct mutex tnc_mutex; + struct ubifs_zbranch zroot; +@@ -1236,6 +1285,9 @@ struct ubifs_info { + struct mutex bu_mutex; + struct bu_info bu; + ++ struct mutex write_reserve_mutex; ++ void *write_reserve_buf; ++ + int log_lebs; + long long log_bytes; + int log_last; +@@ -1257,32 +1309,27 @@ struct ubifs_info { + + int min_io_size; + int min_io_shift; ++ int max_write_size; ++ int max_write_shift; + int leb_size; ++ int leb_start; + int half_leb_size; ++ int idx_leb_size; + int leb_cnt; + int max_leb_cnt; + int old_leb_cnt; +- int ro_media; ++ unsigned int ro_media:1; ++ unsigned int ro_mount:1; ++ unsigned int ro_error:1; + + atomic_long_t dirty_pg_cnt; + atomic_long_t dirty_zn_cnt; + atomic_long_t clean_zn_cnt; + +- long long budg_idx_growth; +- long long budg_data_growth; +- long long budg_dd_growth; +- long long budg_uncommitted_idx; + spinlock_t space_lock; +- int min_idx_lebs; +- unsigned long long old_idx_sz; +- unsigned long long calc_idx_sz; + struct ubifs_lp_stats lst; +- unsigned int nospace:1; +- unsigned int nospace_rp:1; +- +- int page_budget; +- int inode_budget; +- int dent_budget; ++ struct ubifs_budg_info bi; ++ unsigned long long calc_idx_sz; + + int ref_node_alsz; + int mst_node_alsz; +@@ -1385,19 +1432,18 @@ struct ubifs_info { + gid_t rp_gid; + + /* The below fields are used only during mounting and re-mounting */ +- int empty; +- struct rb_root replay_tree; ++ unsigned int empty:1; ++ unsigned int need_recovery:1; ++ unsigned int replaying:1; ++ unsigned int mounting:1; ++ unsigned int remounting_rw:1; + struct list_head replay_list; + struct list_head replay_buds; + unsigned long long cs_sqnum; + unsigned long long replay_sqnum; +- int need_recovery; +- int replaying; + struct list_head unclean_leb_list; + struct ubifs_mst_node *rcvrd_mst_node; + struct rb_root size_tree; +- int remounting_rw; +- int always_chk_crc; + struct ubifs_mount_opts mount_opts; + + #ifdef CONFIG_UBIFS_FS_DEBUG +@@ -1444,7 +1490,7 @@ int ubifs_sync_wbufs_by_inode(struct ubi + + /* scan.c */ + struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, +- int offs, void *sbuf); ++ int offs, void *sbuf, int quiet); + void ubifs_scan_destroy(struct ubifs_scan_leb *sleb); + int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, + int offs, int quiet); +@@ -1506,7 +1552,7 @@ long long ubifs_reported_space(const str + long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); + + /* find.c */ +-int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, ++int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, + int squeeze); + int ubifs_find_free_leb_for_idx(struct ubifs_info *c); + int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, +@@ -1588,6 +1634,7 @@ int ubifs_write_master(struct ubifs_info + int ubifs_read_superblock(struct ubifs_info *c); + struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); + int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); ++int ubifs_fixup_free_space(struct ubifs_info *c); + + /* replay.c */ + int ubifs_validate_entry(struct ubifs_info *c, +@@ -1669,6 +1716,7 @@ const struct ubifs_lprops *ubifs_fast_fi + const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c); + const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c); + const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c); ++int ubifs_calc_dark(const struct ubifs_info *c, int spc); + + /* file.c */ + int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync); +@@ -1695,7 +1743,7 @@ struct inode *ubifs_iget(struct super_bl + int ubifs_recover_master_node(struct ubifs_info *c); + int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); + struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, +- int offs, void *sbuf, int grouped); ++ int offs, void *sbuf, int jhead); + struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, + int offs, void *sbuf); + int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); +diff -uprN linux-2.6.28/fs/ubifs/ubifs-media.h ubifs-v2.6.28/fs/ubifs/ubifs-media.h +--- linux-2.6.28/fs/ubifs/ubifs-media.h 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/fs/ubifs/ubifs-media.h 2011-06-15 14:22:09.000000000 -0400 +@@ -36,9 +36,31 @@ + /* UBIFS node magic number (must not have the padding byte first or last) */ + #define UBIFS_NODE_MAGIC 0x06101831 + +-/* UBIFS on-flash format version */ ++/* ++ * UBIFS on-flash format version. This version is increased when the on-flash ++ * format is changing. If this happens, UBIFS is will support older versions as ++ * well. But older UBIFS code will not support newer formats. Format changes ++ * will be rare and only when absolutely necessary, e.g. to fix a bug or to add ++ * a new feature. ++ * ++ * UBIFS went into mainline kernel with format version 4. The older formats ++ * were development formats. ++ */ + #define UBIFS_FORMAT_VERSION 4 + ++/* ++ * Read-only compatibility version. If the UBIFS format is changed, older UBIFS ++ * implementations will not be able to mount newer formats in read-write mode. ++ * However, depending on the change, it may be possible to mount newer formats ++ * in R/O mode. This is indicated by the R/O compatibility version which is ++ * stored in the super-block. ++ * ++ * This is needed to support boot-loaders which only need R/O mounting. With ++ * this flag it is possible to do UBIFS format changes without a need to update ++ * boot-loaders. ++ */ ++#define UBIFS_RO_COMPAT_VERSION 0 ++ + /* Minimum logical eraseblock size in bytes */ + #define UBIFS_MIN_LEB_SZ (15*1024) + +@@ -53,7 +75,7 @@ + + /* + * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes +- * shorter than uncompressed data length, UBIFS preferes to leave this data ++ * shorter than uncompressed data length, UBIFS prefers to leave this data + * node uncompress, because it'll be read faster. + */ + #define UBIFS_MIN_COMPRESS_DIFF 64 +@@ -113,6 +135,13 @@ + /* The key is always at the same position in all keyed nodes */ + #define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key) + ++/* Garbage collector journal head number */ ++#define UBIFS_GC_HEAD 0 ++/* Base journal head number */ ++#define UBIFS_BASE_HEAD 1 ++/* Data journal head number */ ++#define UBIFS_DATA_HEAD 2 ++ + /* + * LEB Properties Tree node types. + * +@@ -303,14 +332,12 @@ enum { + * UBIFS_COMPR_NONE: no compression + * UBIFS_COMPR_LZO: LZO compression + * UBIFS_COMPR_ZLIB: ZLIB compression +- * UBIFS_COMPR_LZO999: LZO999 compression + * UBIFS_COMPR_TYPES_CNT: count of supported compression types + */ + enum { + UBIFS_COMPR_NONE, + UBIFS_COMPR_LZO, + UBIFS_COMPR_ZLIB, +- UBIFS_COMPR_LZO999, + UBIFS_COMPR_TYPES_CNT, + }; + +@@ -381,9 +408,11 @@ enum { + * Superblock flags. + * + * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set ++ * UBIFS_FLG_SPACE_FIXUP: first-mount "fixup" of free space within LEBs needed + */ + enum { + UBIFS_FLG_BIGLPT = 0x02, ++ UBIFS_FLG_SPACE_FIXUP = 0x04, + }; + + /** +@@ -407,7 +436,7 @@ struct ubifs_ch { + __u8 node_type; + __u8 group_type; + __u8 padding[2]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * union ubifs_dev_desc - device node descriptor. +@@ -421,7 +450,7 @@ struct ubifs_ch { + union ubifs_dev_desc { + __le32 new; + __le64 huge; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_ino_node - inode node. +@@ -482,7 +511,7 @@ struct ubifs_ino_node { + __le16 compr_type; + __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ + __u8 data[]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_dent_node - directory entry node. +@@ -507,7 +536,7 @@ struct ubifs_dent_node { + __le16 nlen; + __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ + __u8 name[]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_data_node - data node. +@@ -528,7 +557,7 @@ struct ubifs_data_node { + __le16 compr_type; + __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ + __u8 data[]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_trun_node - truncation node. +@@ -548,7 +577,7 @@ struct ubifs_trun_node { + __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ + __le64 old_size; + __le64 new_size; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_pad_node - padding node. +@@ -559,7 +588,7 @@ struct ubifs_trun_node { + struct ubifs_pad_node { + struct ubifs_ch ch; + __le32 pad_len; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_sb_node - superblock node. +@@ -588,6 +617,7 @@ struct ubifs_pad_node { + * @padding2: reserved for future, zeroes + * @time_gran: time granularity in nanoseconds + * @uuid: UUID generated when the file system image was created ++ * @ro_compat_version: UBIFS R/O compatibility version + */ + struct ubifs_sb_node { + struct ubifs_ch ch; +@@ -614,8 +644,9 @@ struct ubifs_sb_node { + __le64 rp_size; + __le32 time_gran; + __u8 uuid[16]; +- __u8 padding2[3972]; +-} __attribute__ ((packed)); ++ __le32 ro_compat_version; ++ __u8 padding2[3968]; ++} __packed; + + /** + * struct ubifs_mst_node - master node. +@@ -682,7 +713,7 @@ struct ubifs_mst_node { + __le32 idx_lebs; + __le32 leb_cnt; + __u8 padding[344]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_ref_node - logical eraseblock reference node. +@@ -698,7 +729,7 @@ struct ubifs_ref_node { + __le32 offs; + __le32 jhead; + __u8 padding[28]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_branch - key/reference/length branch +@@ -712,7 +743,7 @@ struct ubifs_branch { + __le32 offs; + __le32 len; + __u8 key[]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_idx_node - indexing node. +@@ -726,7 +757,7 @@ struct ubifs_idx_node { + __le16 child_cnt; + __le16 level; + __u8 branches[]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_cs_node - commit start node. +@@ -736,7 +767,7 @@ struct ubifs_idx_node { + struct ubifs_cs_node { + struct ubifs_ch ch; + __le64 cmt_no; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubifs_orph_node - orphan node. +@@ -748,6 +779,6 @@ struct ubifs_orph_node { + struct ubifs_ch ch; + __le64 cmt_no; + __le64 inos[]; +-} __attribute__ ((packed)); ++} __packed; + + #endif /* __UBIFS_MEDIA_H__ */ +diff -uprN linux-2.6.28/fs/ubifs/xattr.c ubifs-v2.6.28/fs/ubifs/xattr.c +--- linux-2.6.28/fs/ubifs/xattr.c 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/fs/ubifs/xattr.c 2011-06-15 14:22:09.000000000 -0400 +@@ -78,9 +78,9 @@ enum { + SECURITY_XATTR, + }; + +-static struct inode_operations none_inode_operations; +-static struct address_space_operations none_address_operations; +-static struct file_operations none_file_operations; ++static const struct inode_operations empty_iops; ++static const struct file_operations empty_fops; ++static struct address_space_operations empty_aops; + + /** + * create_xattr - create an extended attribute. +@@ -129,9 +129,9 @@ static int create_xattr(struct ubifs_inf + } + + /* Re-define all operations to be "nothing" */ +- inode->i_mapping->a_ops = &none_address_operations; +- inode->i_op = &none_inode_operations; +- inode->i_fop = &none_file_operations; ++ inode->i_mapping->a_ops = &empty_aops; ++ inode->i_op = &empty_iops; ++ inode->i_fop = &empty_fops; + + inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA; + ui = ubifs_inode(inode); +diff -uprN linux-2.6.28/include/linux/kernel.h ubifs-v2.6.28/include/linux/kernel.h +--- linux-2.6.28/include/linux/kernel.h 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/include/linux/kernel.h 2011-06-15 14:22:09.000000000 -0400 +@@ -45,6 +45,16 @@ extern const char linux_proc_banner[]; + + #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) + ++/* ++ * This looks more complex than it should be. But we need to ++ * get the type for the ~ right in round_down (it needs to be ++ * as wide as the result!), and we want to evaluate the macro ++ * arguments just once each. ++ */ ++#define __round_mask(x, y) ((__typeof__(x))((y)-1)) ++#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) ++#define round_down(x, y) ((x) & ~__round_mask(x, y)) ++ + #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) + #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) + #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) +diff -uprN linux-2.6.28/include/linux/list_sort.h ubifs-v2.6.28/include/linux/list_sort.h +--- linux-2.6.28/include/linux/list_sort.h 1969-12-31 19:00:00.000000000 -0500 ++++ ubifs-v2.6.28/include/linux/list_sort.h 2011-06-15 14:22:09.000000000 -0400 +@@ -0,0 +1,11 @@ ++#ifndef _LINUX_LIST_SORT_H ++#define _LINUX_LIST_SORT_H ++ ++#include ++ ++struct list_head; ++ ++void list_sort(void *priv, struct list_head *head, ++ int (*cmp)(void *priv, struct list_head *a, ++ struct list_head *b)); ++#endif +diff -uprN linux-2.6.28/include/linux/mtd/mtd.h ubifs-v2.6.28/include/linux/mtd/mtd.h +--- linux-2.6.28/include/linux/mtd/mtd.h 2011-06-15 15:12:27.000000000 -0400 ++++ ubifs-v2.6.28/include/linux/mtd/mtd.h 2011-06-15 15:16:03.000000000 -0400 +@@ -117,6 +117,17 @@ struct mtd_info { + */ + u_int32_t writesize; + ++ /* ++ * Size of the write buffer used by the MTD. MTD devices having a write ++ * buffer can write multiple writesize chunks at a time. E.g. while ++ * writing 4 * writesize bytes to a device with 2 * writesize bytes ++ * buffer the MTD driver can (but doesn't have to) do 2 writesize ++ * operations, but not 4. Currently, all NANDs have writebufsize ++ * equivalent to writesize (NAND page size). Some NOR flashes do have ++ * writebufsize greater than writesize. ++ */ ++ uint32_t writebufsize; ++ + u_int32_t oobsize; // Amount of OOB data per block (e.g. 16) + u_int32_t oobavail; // Available OOB bytes per block + +diff -uprN linux-2.6.28/include/linux/mtd/ubi.h ubifs-v2.6.28/include/linux/mtd/ubi.h +--- linux-2.6.28/include/linux/mtd/ubi.h 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/include/linux/mtd/ubi.h 2011-06-15 14:22:09.000000000 -0400 +@@ -21,7 +21,7 @@ + #ifndef __LINUX_UBI_H__ + #define __LINUX_UBI_H__ + +-#include ++#include + #include + #include + +@@ -87,7 +87,7 @@ enum { + * physical eraseblock size and on how much bytes UBI headers consume. But + * because of the volume alignment (@alignment), the usable size of logical + * eraseblocks if a volume may be less. The following equation is true: +- * @usable_leb_size = LEB size - (LEB size mod @alignment), ++ * @usable_leb_size = LEB size - (LEB size mod @alignment), + * where LEB size is the logical eraseblock size defined by the UBI device. + * + * The alignment is multiple to the minimal flash input/output unit size or %1 +@@ -116,22 +116,77 @@ struct ubi_volume_info { + * struct ubi_device_info - UBI device description data structure. + * @ubi_num: ubi device number + * @leb_size: logical eraseblock size on this UBI device ++ * @leb_start: starting offset of logical eraseblocks within physical ++ * eraseblocks + * @min_io_size: minimal I/O unit size ++ * @max_write_size: maximum amount of bytes the underlying flash can write at a ++ * time (MTD write buffer size) + * @ro_mode: if this device is in read-only mode + * @cdev: UBI character device major and minor numbers + * + * Note, @leb_size is the logical eraseblock size offered by the UBI device. + * Volumes of this UBI device may have smaller logical eraseblock size if their + * alignment is not equivalent to %1. ++ * ++ * The @max_write_size field describes flash write maximum write unit. For ++ * example, NOR flash allows for changing individual bytes, so @min_io_size is ++ * %1. However, it does not mean than NOR flash has to write data byte-by-byte. ++ * Instead, CFI NOR flashes have a write-buffer of, e.g., 64 bytes, and when ++ * writing large chunks of data, they write 64-bytes at a time. Obviously, this ++ * improves write throughput. ++ * ++ * Also, the MTD device may have N interleaved (striped) flash chips ++ * underneath, in which case @min_io_size can be physical min. I/O size of ++ * single flash chip, while @max_write_size can be N * @min_io_size. ++ * ++ * The @max_write_size field is always greater or equivalent to @min_io_size. ++ * E.g., some NOR flashes may have (@min_io_size = 1, @max_write_size = 64). In ++ * contrast, NAND flashes usually have @min_io_size = @max_write_size = NAND ++ * page size. + */ + struct ubi_device_info { + int ubi_num; + int leb_size; ++ int leb_start; + int min_io_size; ++ int max_write_size; + int ro_mode; + dev_t cdev; + }; + ++/* ++ * enum - volume notification types. ++ * @UBI_VOLUME_ADDED: volume has been added ++ * @UBI_VOLUME_REMOVED: start volume volume ++ * @UBI_VOLUME_RESIZED: volume size has been re-sized ++ * @UBI_VOLUME_RENAMED: volume name has been re-named ++ * @UBI_VOLUME_UPDATED: volume name has been updated ++ * ++ * These constants define which type of event has happened when a volume ++ * notification function is invoked. ++ */ ++enum { ++ UBI_VOLUME_ADDED, ++ UBI_VOLUME_REMOVED, ++ UBI_VOLUME_RESIZED, ++ UBI_VOLUME_RENAMED, ++ UBI_VOLUME_UPDATED, ++}; ++ ++/* ++ * struct ubi_notification - UBI notification description structure. ++ * @di: UBI device description object ++ * @vi: UBI volume description object ++ * ++ * UBI notifiers are called with a pointer to an object of this type. The ++ * object describes the notification. Namely, it provides a description of the ++ * UBI device and UBI volume the notification informs about. ++ */ ++struct ubi_notification { ++ struct ubi_device_info di; ++ struct ubi_volume_info vi; ++}; ++ + /* UBI descriptor given to users when they open UBI volumes */ + struct ubi_volume_desc; + +@@ -141,6 +196,12 @@ void ubi_get_volume_info(struct ubi_volu + struct ubi_volume_desc *ubi_open_volume(int ubi_num, int vol_id, int mode); + struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name, + int mode); ++struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode); ++ ++int ubi_register_volume_notifier(struct notifier_block *nb, ++ int ignore_existing); ++int ubi_unregister_volume_notifier(struct notifier_block *nb); ++ + void ubi_close_volume(struct ubi_volume_desc *desc); + int ubi_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, + int len, int check); +diff -uprN linux-2.6.28/include/mtd/ubi-user.h ubifs-v2.6.28/include/mtd/ubi-user.h +--- linux-2.6.28/include/mtd/ubi-user.h 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/include/mtd/ubi-user.h 2011-06-15 14:22:09.000000000 -0400 +@@ -40,37 +40,37 @@ + * UBI volume creation + * ~~~~~~~~~~~~~~~~~~~ + * +- * UBI volumes are created via the %UBI_IOCMKVOL IOCTL command of UBI character ++ * UBI volumes are created via the %UBI_IOCMKVOL ioctl command of UBI character + * device. A &struct ubi_mkvol_req object has to be properly filled and a +- * pointer to it has to be passed to the IOCTL. ++ * pointer to it has to be passed to the ioctl. + * + * UBI volume deletion + * ~~~~~~~~~~~~~~~~~~~ + * +- * To delete a volume, the %UBI_IOCRMVOL IOCTL command of the UBI character ++ * To delete a volume, the %UBI_IOCRMVOL ioctl command of the UBI character + * device should be used. A pointer to the 32-bit volume ID hast to be passed +- * to the IOCTL. ++ * to the ioctl. + * + * UBI volume re-size + * ~~~~~~~~~~~~~~~~~~ + * +- * To re-size a volume, the %UBI_IOCRSVOL IOCTL command of the UBI character ++ * To re-size a volume, the %UBI_IOCRSVOL ioctl command of the UBI character + * device should be used. A &struct ubi_rsvol_req object has to be properly +- * filled and a pointer to it has to be passed to the IOCTL. ++ * filled and a pointer to it has to be passed to the ioctl. + * + * UBI volumes re-name + * ~~~~~~~~~~~~~~~~~~~ + * + * To re-name several volumes atomically at one go, the %UBI_IOCRNVOL command + * of the UBI character device should be used. A &struct ubi_rnvol_req object +- * has to be properly filled and a pointer to it has to be passed to the IOCTL. ++ * has to be properly filled and a pointer to it has to be passed to the ioctl. + * + * UBI volume update + * ~~~~~~~~~~~~~~~~~ + * +- * Volume update should be done via the %UBI_IOCVOLUP IOCTL command of the ++ * Volume update should be done via the %UBI_IOCVOLUP ioctl command of the + * corresponding UBI volume character device. A pointer to a 64-bit update +- * size should be passed to the IOCTL. After this, UBI expects user to write ++ * size should be passed to the ioctl. After this, UBI expects user to write + * this number of bytes to the volume character device. The update is finished + * when the claimed number of bytes is passed. So, the volume update sequence + * is something like: +@@ -80,14 +80,58 @@ + * write(fd, buf, image_size); + * close(fd); + * +- * Atomic eraseblock change ++ * Logical eraseblock erase + * ~~~~~~~~~~~~~~~~~~~~~~~~ + * +- * Atomic eraseblock change operation is done via the %UBI_IOCEBCH IOCTL +- * command of the corresponding UBI volume character device. A pointer to +- * &struct ubi_leb_change_req has to be passed to the IOCTL. Then the user is +- * expected to write the requested amount of bytes. This is similar to the +- * "volume update" IOCTL. ++ * To erase a logical eraseblock, the %UBI_IOCEBER ioctl command of the ++ * corresponding UBI volume character device should be used. This command ++ * unmaps the requested logical eraseblock, makes sure the corresponding ++ * physical eraseblock is successfully erased, and returns. ++ * ++ * Atomic logical eraseblock change ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * ++ * Atomic logical eraseblock change operation is called using the %UBI_IOCEBCH ++ * ioctl command of the corresponding UBI volume character device. A pointer to ++ * a &struct ubi_leb_change_req object has to be passed to the ioctl. Then the ++ * user is expected to write the requested amount of bytes (similarly to what ++ * should be done in case of the "volume update" ioctl). ++ * ++ * Logical eraseblock map ++ * ~~~~~~~~~~~~~~~~~~~~~ ++ * ++ * To map a logical eraseblock to a physical eraseblock, the %UBI_IOCEBMAP ++ * ioctl command should be used. A pointer to a &struct ubi_map_req object is ++ * expected to be passed. The ioctl maps the requested logical eraseblock to ++ * a physical eraseblock and returns. Only non-mapped logical eraseblocks can ++ * be mapped. If the logical eraseblock specified in the request is already ++ * mapped to a physical eraseblock, the ioctl fails and returns error. ++ * ++ * Logical eraseblock unmap ++ * ~~~~~~~~~~~~~~~~~~~~~~~~ ++ * ++ * To unmap a logical eraseblock to a physical eraseblock, the %UBI_IOCEBUNMAP ++ * ioctl command should be used. The ioctl unmaps the logical eraseblocks, ++ * schedules corresponding physical eraseblock for erasure, and returns. Unlike ++ * the "LEB erase" command, it does not wait for the physical eraseblock being ++ * erased. Note, the side effect of this is that if an unclean reboot happens ++ * after the unmap ioctl returns, you may find the LEB mapped again to the same ++ * physical eraseblock after the UBI is run again. ++ * ++ * Check if logical eraseblock is mapped ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * ++ * To check if a logical eraseblock is mapped to a physical eraseblock, the ++ * %UBI_IOCEBISMAP ioctl command should be used. It returns %0 if the LEB is ++ * not mapped, and %1 if it is mapped. ++ * ++ * Set an UBI volume property ++ * ~~~~~~~~~~~~~~~~~~~~~~~~~ ++ * ++ * To set an UBI volume property the %UBI_IOCSETPROP ioctl command should be ++ * used. A pointer to a &struct ubi_set_vol_prop_req object is expected to be ++ * passed. The object describes which property should be set, and to which value ++ * it should be set. + */ + + /* +@@ -101,7 +145,7 @@ + /* Maximum volume name length */ + #define UBI_MAX_VOLUME_NAME 127 + +-/* IOCTL commands of UBI character devices */ ++/* ioctl commands of UBI character devices */ + + #define UBI_IOC_MAGIC 'o' + +@@ -114,7 +158,7 @@ + /* Re-name volumes */ + #define UBI_IOCRNVOL _IOW(UBI_IOC_MAGIC, 3, struct ubi_rnvol_req) + +-/* IOCTL commands of the UBI control character device */ ++/* ioctl commands of the UBI control character device */ + + #define UBI_CTRL_IOC_MAGIC 'o' + +@@ -123,16 +167,25 @@ + /* Detach an MTD device */ + #define UBI_IOCDET _IOW(UBI_CTRL_IOC_MAGIC, 65, int32_t) + +-/* IOCTL commands of UBI volume character devices */ ++/* ioctl commands of UBI volume character devices */ + + #define UBI_VOL_IOC_MAGIC 'O' + + /* Start UBI volume update */ + #define UBI_IOCVOLUP _IOW(UBI_VOL_IOC_MAGIC, 0, int64_t) +-/* An eraseblock erasure command, used for debugging, disabled by default */ ++/* LEB erasure command, used for debugging, disabled by default */ + #define UBI_IOCEBER _IOW(UBI_VOL_IOC_MAGIC, 1, int32_t) +-/* An atomic eraseblock change command */ ++/* Atomic LEB change command */ + #define UBI_IOCEBCH _IOW(UBI_VOL_IOC_MAGIC, 2, int32_t) ++/* Map LEB command */ ++#define UBI_IOCEBMAP _IOW(UBI_VOL_IOC_MAGIC, 3, struct ubi_map_req) ++/* Unmap LEB command */ ++#define UBI_IOCEBUNMAP _IOW(UBI_VOL_IOC_MAGIC, 4, int32_t) ++/* Check if LEB is mapped command */ ++#define UBI_IOCEBISMAP _IOR(UBI_VOL_IOC_MAGIC, 5, int32_t) ++/* Set an UBI volume property */ ++#define UBI_IOCSETVOLPROP _IOW(UBI_VOL_IOC_MAGIC, 6, \ ++ struct ubi_set_vol_prop_req) + + /* Maximum MTD device name length supported by UBI */ + #define MAX_UBI_MTD_NAME_LEN 127 +@@ -168,6 +221,17 @@ enum { + UBI_STATIC_VOLUME = 4, + }; + ++/* ++ * UBI set volume property ioctl constants. ++ * ++ * @UBI_VOL_PROP_DIRECT_WRITE: allow (any non-zero value) or disallow (value 0) ++ * user to directly write and erase individual ++ * eraseblocks on dynamic volumes ++ */ ++enum { ++ UBI_VOL_PROP_DIRECT_WRITE = 1, ++}; ++ + /** + * struct ubi_attach_req - attach MTD device request. + * @ubi_num: UBI device number to create +@@ -244,7 +308,7 @@ struct ubi_mkvol_req { + int16_t name_len; + int8_t padding2[4]; + char name[UBI_MAX_VOLUME_NAME + 1]; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubi_rsvol_req - a data structure used in volume re-size requests. +@@ -260,7 +324,7 @@ struct ubi_mkvol_req { + struct ubi_rsvol_req { + int64_t bytes; + int32_t vol_id; +-} __attribute__ ((packed)); ++} __packed; + + /** + * struct ubi_rnvol_req - volumes re-name request. +@@ -302,11 +366,11 @@ struct ubi_rnvol_req { + int8_t padding2[2]; + char name[UBI_MAX_VOLUME_NAME + 1]; + } ents[UBI_MAX_RNVOL]; +-} __attribute__ ((packed)); ++} __packed; + + /** +- * struct ubi_leb_change_req - a data structure used in atomic logical +- * eraseblock change requests. ++ * struct ubi_leb_change_req - a data structure used in atomic LEB change ++ * requests. + * @lnum: logical eraseblock number to change + * @bytes: how many bytes will be written to the logical eraseblock + * @dtype: data type (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN) +@@ -317,6 +381,32 @@ struct ubi_leb_change_req { + int32_t bytes; + int8_t dtype; + int8_t padding[7]; +-} __attribute__ ((packed)); ++} __packed; ++ ++/** ++ * struct ubi_map_req - a data structure used in map LEB requests. ++ * @lnum: logical eraseblock number to unmap ++ * @dtype: data type (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN) ++ * @padding: reserved for future, not used, has to be zeroed ++ */ ++struct ubi_map_req { ++ int32_t lnum; ++ int8_t dtype; ++ int8_t padding[3]; ++} __packed; ++ ++ ++/** ++ * struct ubi_set_vol_prop_req - a data structure used to set an UBI volume ++ * property. ++ * @property: property to set (%UBI_VOL_PROP_DIRECT_WRITE) ++ * @padding: reserved for future, not used, has to be zeroed ++ * @value: value to set ++ */ ++struct ubi_set_vol_prop_req { ++ uint8_t property; ++ uint8_t padding[7]; ++ uint64_t value; ++} __packed; + + #endif /* __UBI_USER_H__ */ +diff -uprN linux-2.6.28/lib/list_sort.c ubifs-v2.6.28/lib/list_sort.c +--- linux-2.6.28/lib/list_sort.c 1969-12-31 19:00:00.000000000 -0500 ++++ ubifs-v2.6.28/lib/list_sort.c 2011-06-15 14:22:09.000000000 -0400 +@@ -0,0 +1,291 @@ ++#include ++#include ++#include ++#include ++#include ++ ++#define MAX_LIST_LENGTH_BITS 20 ++ ++/* ++ * Returns a list organized in an intermediate format suited ++ * to chaining of merge() calls: null-terminated, no reserved or ++ * sentinel head node, "prev" links not maintained. ++ */ ++static struct list_head *merge(void *priv, ++ int (*cmp)(void *priv, struct list_head *a, ++ struct list_head *b), ++ struct list_head *a, struct list_head *b) ++{ ++ struct list_head head, *tail = &head; ++ ++ while (a && b) { ++ /* if equal, take 'a' -- important for sort stability */ ++ if ((*cmp)(priv, a, b) <= 0) { ++ tail->next = a; ++ a = a->next; ++ } else { ++ tail->next = b; ++ b = b->next; ++ } ++ tail = tail->next; ++ } ++ tail->next = a?:b; ++ return head.next; ++} ++ ++/* ++ * Combine final list merge with restoration of standard doubly-linked ++ * list structure. This approach duplicates code from merge(), but ++ * runs faster than the tidier alternatives of either a separate final ++ * prev-link restoration pass, or maintaining the prev links ++ * throughout. ++ */ ++static void merge_and_restore_back_links(void *priv, ++ int (*cmp)(void *priv, struct list_head *a, ++ struct list_head *b), ++ struct list_head *head, ++ struct list_head *a, struct list_head *b) ++{ ++ struct list_head *tail = head; ++ ++ while (a && b) { ++ /* if equal, take 'a' -- important for sort stability */ ++ if ((*cmp)(priv, a, b) <= 0) { ++ tail->next = a; ++ a->prev = tail; ++ a = a->next; ++ } else { ++ tail->next = b; ++ b->prev = tail; ++ b = b->next; ++ } ++ tail = tail->next; ++ } ++ tail->next = a ? : b; ++ ++ do { ++ /* ++ * In worst cases this loop may run many iterations. ++ * Continue callbacks to the client even though no ++ * element comparison is needed, so the client's cmp() ++ * routine can invoke cond_resched() periodically. ++ */ ++ (*cmp)(priv, tail->next, tail->next); ++ ++ tail->next->prev = tail; ++ tail = tail->next; ++ } while (tail->next); ++ ++ tail->next = head; ++ head->prev = tail; ++} ++ ++/** ++ * list_sort - sort a list ++ * @priv: private data, opaque to list_sort(), passed to @cmp ++ * @head: the list to sort ++ * @cmp: the elements comparison function ++ * ++ * This function implements "merge sort", which has O(nlog(n)) ++ * complexity. ++ * ++ * The comparison function @cmp must return a negative value if @a ++ * should sort before @b, and a positive value if @a should sort after ++ * @b. If @a and @b are equivalent, and their original relative ++ * ordering is to be preserved, @cmp must return 0. ++ */ ++void list_sort(void *priv, struct list_head *head, ++ int (*cmp)(void *priv, struct list_head *a, ++ struct list_head *b)) ++{ ++ struct list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists ++ -- last slot is a sentinel */ ++ int lev; /* index into part[] */ ++ int max_lev = 0; ++ struct list_head *list; ++ ++ if (list_empty(head)) ++ return; ++ ++ memset(part, 0, sizeof(part)); ++ ++ head->prev->next = NULL; ++ list = head->next; ++ ++ while (list) { ++ struct list_head *cur = list; ++ list = list->next; ++ cur->next = NULL; ++ ++ for (lev = 0; part[lev]; lev++) { ++ cur = merge(priv, cmp, part[lev], cur); ++ part[lev] = NULL; ++ } ++ if (lev > max_lev) { ++ if (unlikely(lev >= ARRAY_SIZE(part)-1)) { ++ printk(KERN_DEBUG "list passed to" ++ " list_sort() too long for" ++ " efficiency\n"); ++ lev--; ++ } ++ max_lev = lev; ++ } ++ part[lev] = cur; ++ } ++ ++ for (lev = 0; lev < max_lev; lev++) ++ if (part[lev]) ++ list = merge(priv, cmp, part[lev], list); ++ ++ merge_and_restore_back_links(priv, cmp, head, part[max_lev], list); ++} ++EXPORT_SYMBOL(list_sort); ++ ++#ifdef CONFIG_TEST_LIST_SORT ++ ++#include ++ ++/* ++ * The pattern of set bits in the list length determines which cases ++ * are hit in list_sort(). ++ */ ++#define TEST_LIST_LEN (512+128+2) /* not including head */ ++ ++#define TEST_POISON1 0xDEADBEEF ++#define TEST_POISON2 0xA324354C ++ ++struct debug_el { ++ unsigned int poison1; ++ struct list_head list; ++ unsigned int poison2; ++ int value; ++ unsigned serial; ++}; ++ ++/* Array, containing pointers to all elements in the test list */ ++static struct debug_el **elts __initdata; ++ ++static int __init check(struct debug_el *ela, struct debug_el *elb) ++{ ++ if (ela->serial >= TEST_LIST_LEN) { ++ printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n", ++ ela->serial); ++ return -EINVAL; ++ } ++ if (elb->serial >= TEST_LIST_LEN) { ++ printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n", ++ elb->serial); ++ return -EINVAL; ++ } ++ if (elts[ela->serial] != ela || elts[elb->serial] != elb) { ++ printk(KERN_ERR "list_sort_test: error: phantom element\n"); ++ return -EINVAL; ++ } ++ if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) { ++ printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n", ++ ela->poison1, ela->poison2); ++ return -EINVAL; ++ } ++ if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) { ++ printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n", ++ elb->poison1, elb->poison2); ++ return -EINVAL; ++ } ++ return 0; ++} ++ ++static int __init cmp(void *priv, struct list_head *a, struct list_head *b) ++{ ++ struct debug_el *ela, *elb; ++ ++ ela = container_of(a, struct debug_el, list); ++ elb = container_of(b, struct debug_el, list); ++ ++ check(ela, elb); ++ return ela->value - elb->value; ++} ++ ++static int __init list_sort_test(void) ++{ ++ int i, count = 1, err = -EINVAL; ++ struct debug_el *el; ++ struct list_head *cur, *tmp; ++ LIST_HEAD(head); ++ ++ printk(KERN_DEBUG "list_sort_test: start testing list_sort()\n"); ++ ++ elts = kmalloc(sizeof(void *) * TEST_LIST_LEN, GFP_KERNEL); ++ if (!elts) { ++ printk(KERN_ERR "list_sort_test: error: cannot allocate " ++ "memory\n"); ++ goto exit; ++ } ++ ++ for (i = 0; i < TEST_LIST_LEN; i++) { ++ el = kmalloc(sizeof(*el), GFP_KERNEL); ++ if (!el) { ++ printk(KERN_ERR "list_sort_test: error: cannot " ++ "allocate memory\n"); ++ goto exit; ++ } ++ /* force some equivalencies */ ++ el->value = random32() % (TEST_LIST_LEN/3); ++ el->serial = i; ++ el->poison1 = TEST_POISON1; ++ el->poison2 = TEST_POISON2; ++ elts[i] = el; ++ list_add_tail(&el->list, &head); ++ } ++ ++ list_sort(NULL, &head, cmp); ++ ++ for (cur = head.next; cur->next != &head; cur = cur->next) { ++ struct debug_el *el1; ++ int cmp_result; ++ ++ if (cur->next->prev != cur) { ++ printk(KERN_ERR "list_sort_test: error: list is " ++ "corrupted\n"); ++ goto exit; ++ } ++ ++ cmp_result = cmp(NULL, cur, cur->next); ++ if (cmp_result > 0) { ++ printk(KERN_ERR "list_sort_test: error: list is not " ++ "sorted\n"); ++ goto exit; ++ } ++ ++ el = container_of(cur, struct debug_el, list); ++ el1 = container_of(cur->next, struct debug_el, list); ++ if (cmp_result == 0 && el->serial >= el1->serial) { ++ printk(KERN_ERR "list_sort_test: error: order of " ++ "equivalent elements not preserved\n"); ++ goto exit; ++ } ++ ++ if (check(el, el1)) { ++ printk(KERN_ERR "list_sort_test: error: element check " ++ "failed\n"); ++ goto exit; ++ } ++ count++; ++ } ++ ++ if (count != TEST_LIST_LEN) { ++ printk(KERN_ERR "list_sort_test: error: bad list length %d", ++ count); ++ goto exit; ++ } ++ ++ err = 0; ++exit: ++ kfree(elts); ++ list_for_each_safe(cur, tmp, &head) { ++ list_del(cur); ++ kfree(container_of(cur, struct debug_el, list)); ++ } ++ return err; ++} ++module_init(list_sort_test); ++#endif /* CONFIG_TEST_LIST_SORT */ +diff -uprN linux-2.6.28/lib/Makefile ubifs-v2.6.28/lib/Makefile +--- linux-2.6.28/lib/Makefile 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/lib/Makefile 2011-06-15 14:22:09.000000000 -0400 +@@ -20,7 +20,7 @@ lib-y += kobject.o kref.o klist.o + + obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ + bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ +- string_helpers.o ++ string_helpers.o list_sort.o + + ifeq ($(CONFIG_DEBUG_KOBJECT),y) + CFLAGS_kobject.o += -DDEBUG +diff -uprN linux-2.6.28/MAINTAINERS ubifs-v2.6.28/MAINTAINERS +--- linux-2.6.28/MAINTAINERS 2011-06-15 15:12:26.000000000 -0400 ++++ ubifs-v2.6.28/MAINTAINERS 2011-06-15 15:16:03.000000000 -0400 +@@ -4242,9 +4242,9 @@ S: Maintained + + UBI FILE SYSTEM (UBIFS) + P: Artem Bityutskiy +-M: dedekind@infradead.org ++M: dedekind1@gmail.com + P: Adrian Hunter +-M: ext-adrian.hunter@nokia.com ++M: adrian.hunter@nokia.com + L: linux-mtd@lists.infradead.org + T: git git://git.infradead.org/ubifs-2.6.git + W: http://www.linux-mtd.infradead.org/doc/ubifs.html +@@ -4297,7 +4297,7 @@ S: Maintained + + UNSORTED BLOCK IMAGES (UBI) + P: Artem Bityutskiy +-M: dedekind@infradead.org ++M: dedekind1@gmail.com + W: http://www.linux-mtd.infradead.org/ + L: linux-mtd@lists.infradead.org + T: git git://git.infradead.org/ubi-2.6.git +diff -uprN linux-2.6.28/scripts/unifdef.c ubifs-v2.6.28/scripts/unifdef.c +--- linux-2.6.28/scripts/unifdef.c 2008-12-24 18:26:37.000000000 -0500 ++++ ubifs-v2.6.28/scripts/unifdef.c 2011-06-15 14:22:10.000000000 -0400 +@@ -206,7 +206,7 @@ static void done(void); + static void error(const char *); + static int findsym(const char *); + static void flushline(bool); +-static Linetype getline(void); ++static Linetype get_line(void); + static Linetype ifeval(const char **); + static void ignoreoff(void); + static void ignoreon(void); +@@ -512,7 +512,7 @@ process(void) + + for (;;) { + linenum++; +- lineval = getline(); ++ lineval = get_line(); + trans_table[ifstate[depth]][lineval](); + debug("process %s -> %s depth %d", + linetype_name[lineval], +@@ -526,7 +526,7 @@ process(void) + * help from skipcomment(). + */ + static Linetype +-getline(void) ++get_line(void) + { + const char *cp; + int cursym; diff --git a/kernel-bfs-2.6.28/debian/patches/series b/kernel-bfs-2.6.28/debian/patches/series index 7106bd0..20b669e 100644 --- a/kernel-bfs-2.6.28/debian/patches/series +++ b/kernel-bfs-2.6.28/debian/patches/series @@ -101,6 +101,7 @@ extra/voltage_scaling_1.diff extra/voltage_scaling_0.diff extra/reiser4-for-2.6.28.patch extra/reiser4-2.6.28.1-fix.patch +extra/ubifs.diff ################################# # BFQ patches