1 diff -uprN linux-2.6.28/arch/x86/include/asm/proto.h ubifs-v2.6.28/arch/x86/include/asm/proto.h
2 --- linux-2.6.28/arch/x86/include/asm/proto.h 2008-12-24 18:26:37.000000000 -0500
3 +++ ubifs-v2.6.28/arch/x86/include/asm/proto.h 2011-06-15 14:22:06.000000000 -0400
4 @@ -26,7 +26,4 @@ static const int reboot_force = 0;
6 long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
8 -#define round_up(x, y) (((x) + (y) - 1) & ~((y) - 1))
9 -#define round_down(x, y) ((x) & ~((y) - 1))
11 #endif /* _ASM_X86_PROTO_H */
12 diff -uprN linux-2.6.28/Documentation/filesystems/ubifs.txt ubifs-v2.6.28/Documentation/filesystems/ubifs.txt
13 --- linux-2.6.28/Documentation/filesystems/ubifs.txt 2011-06-15 15:12:26.000000000 -0400
14 +++ ubifs-v2.6.28/Documentation/filesystems/ubifs.txt 2011-06-15 14:22:04.000000000 -0400
15 @@ -82,12 +82,12 @@ Mount options
16 bulk_read read more in one go to take advantage of flash
17 media that read faster sequentially
18 no_bulk_read (*) do not bulk-read
19 -no_chk_data_crc skip checking of CRCs on data nodes in order to
20 +no_chk_data_crc (*) skip checking of CRCs on data nodes in order to
21 improve read performance. Use this option only
22 if the flash media is highly reliable. The effect
23 of this option is that corruption of the contents
24 of a file can go unnoticed.
25 -chk_data_crc (*) do not skip checking CRCs on data nodes
26 +chk_data_crc do not skip checking CRCs on data nodes
27 compr=none override default compressor and set it to "none"
28 compr=lzo override default compressor and set it to "lzo"
29 compr=zlib override default compressor and set it to "zlib"
30 @@ -115,28 +115,8 @@ ubi.mtd=0 root=ubi0:rootfs rootfstype=ub
31 Module Parameters for Debugging
32 ===============================
34 -When UBIFS has been compiled with debugging enabled, there are 3 module
35 +When UBIFS has been compiled with debugging enabled, there are 2 module
36 parameters that are available to control aspects of testing and debugging.
37 -The parameters are unsigned integers where each bit controls an option.
40 -debug_msgs Selects which debug messages to display, as follows:
42 - Message Type Flag value
48 - LEB search messages 16
49 - Budgeting messages 32
50 - Garbage collection messages 64
51 - Tree Node Cache (TNC) messages 128
52 - LEB properties (lprops) messages 256
53 - Input/output messages 512
56 - Recovery messages 4096
58 debug_chks Selects extra checks that UBIFS can do while running:
60 @@ -154,11 +134,9 @@ debug_tsts Selects a mode of testing, as
64 - Force in-the-gaps method 2
65 Failure mode for recovery testing 4
67 -For example, set debug_msgs to 5 to display General messages and Mount
69 +For example, set debug_chks to 3 to enable general and TNC checks.
73 diff -uprN linux-2.6.28/drivers/char/random.c ubifs-v2.6.28/drivers/char/random.c
74 --- linux-2.6.28/drivers/char/random.c 2011-06-15 15:12:27.000000000 -0400
75 +++ ubifs-v2.6.28/drivers/char/random.c 2011-06-15 15:16:03.000000000 -0400
76 @@ -1018,12 +1018,6 @@ random_read(struct file *file, char __us
77 /* like a named pipe */
81 - * If we gave the user some bytes, update the access time.
84 - file_accessed(file);
86 return (count ? count : retval);
89 @@ -1074,7 +1068,6 @@ static ssize_t random_write(struct file
90 size_t count, loff_t *ppos)
93 - struct inode *inode = file->f_path.dentry->d_inode;
95 ret = write_pool(&blocking_pool, buffer, count);
97 @@ -1083,8 +1076,6 @@ static ssize_t random_write(struct file
101 - inode->i_mtime = current_fs_time(inode->i_sb);
102 - mark_inode_dirty(inode);
103 return (ssize_t)count;
106 diff -uprN linux-2.6.28/drivers/mtd/chips/cfi_cmdset_0001.c ubifs-v2.6.28/drivers/mtd/chips/cfi_cmdset_0001.c
107 --- linux-2.6.28/drivers/mtd/chips/cfi_cmdset_0001.c 2008-12-24 18:26:37.000000000 -0500
108 +++ ubifs-v2.6.28/drivers/mtd/chips/cfi_cmdset_0001.c 2011-06-15 14:22:07.000000000 -0400
109 @@ -421,6 +421,7 @@ struct mtd_info *cfi_cmdset_0001(struct
110 mtd->flags = MTD_CAP_NORFLASH;
111 mtd->name = map->name;
113 + mtd->writebufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
115 mtd->reboot_notifier.notifier_call = cfi_intelext_reboot;
117 diff -uprN linux-2.6.28/drivers/mtd/chips/cfi_cmdset_0002.c ubifs-v2.6.28/drivers/mtd/chips/cfi_cmdset_0002.c
118 --- linux-2.6.28/drivers/mtd/chips/cfi_cmdset_0002.c 2008-12-24 18:26:37.000000000 -0500
119 +++ ubifs-v2.6.28/drivers/mtd/chips/cfi_cmdset_0002.c 2011-06-15 14:22:07.000000000 -0400
120 @@ -346,6 +346,10 @@ struct mtd_info *cfi_cmdset_0002(struct
121 mtd->flags = MTD_CAP_NORFLASH;
122 mtd->name = map->name;
124 + mtd->writebufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
126 + DEBUG(MTD_DEBUG_LEVEL3, "MTD %s(): write buffer size %d\n",
127 + __func__, mtd->writebufsize);
129 if (cfi->cfi_mode==CFI_MODE_CFI){
130 unsigned char bootloc;
131 diff -uprN linux-2.6.28/drivers/mtd/chips/cfi_cmdset_0020.c ubifs-v2.6.28/drivers/mtd/chips/cfi_cmdset_0020.c
132 --- linux-2.6.28/drivers/mtd/chips/cfi_cmdset_0020.c 2008-12-24 18:26:37.000000000 -0500
133 +++ ubifs-v2.6.28/drivers/mtd/chips/cfi_cmdset_0020.c 2011-06-15 14:22:07.000000000 -0400
134 @@ -239,6 +239,7 @@ static struct mtd_info *cfi_staa_setup(s
135 mtd->resume = cfi_staa_resume;
136 mtd->flags = MTD_CAP_NORFLASH & ~MTD_BIT_WRITEABLE;
137 mtd->writesize = 8; /* FIXME: Should be 0 for STMicro flashes w/out ECC */
138 + mtd->writebufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
139 map->fldrv = &cfi_staa_chipdrv;
140 __module_get(THIS_MODULE);
141 mtd->name = map->name;
142 diff -uprN linux-2.6.28/drivers/mtd/devices/mtdram.c ubifs-v2.6.28/drivers/mtd/devices/mtdram.c
143 --- linux-2.6.28/drivers/mtd/devices/mtdram.c 2008-12-24 18:26:37.000000000 -0500
144 +++ ubifs-v2.6.28/drivers/mtd/devices/mtdram.c 2011-06-15 14:22:07.000000000 -0400
145 @@ -109,6 +109,7 @@ int mtdram_init_device(struct mtd_info *
146 mtd->flags = MTD_CAP_RAM;
149 + mtd->writebufsize = 64; /* Mimic CFI NOR flashes */
150 mtd->erasesize = MTDRAM_ERASE_SIZE;
151 mtd->priv = mapped_address;
153 diff -uprN linux-2.6.28/drivers/mtd/mtd_blkdevs.c ubifs-v2.6.28/drivers/mtd/mtd_blkdevs.c
154 --- linux-2.6.28/drivers/mtd/mtd_blkdevs.c 2008-12-24 18:26:37.000000000 -0500
155 +++ ubifs-v2.6.28/drivers/mtd/mtd_blkdevs.c 2011-06-15 14:22:07.000000000 -0400
156 @@ -139,7 +139,7 @@ static int blktrans_open(struct block_de
157 struct mtd_blktrans_ops *tr = dev->tr;
160 - if (!try_module_get(dev->mtd->owner))
161 + if (!get_mtd_device(NULL, dev->mtd->index))
164 if (!try_module_get(tr->owner))
165 @@ -153,7 +153,7 @@ static int blktrans_open(struct block_de
167 if (tr->open && (ret = tr->open(dev))) {
168 dev->mtd->usecount--;
169 - module_put(dev->mtd->owner);
170 + put_mtd_device(dev->mtd);
172 module_put(tr->owner);
174 @@ -172,7 +172,7 @@ static int blktrans_release(struct gendi
177 dev->mtd->usecount--;
178 - module_put(dev->mtd->owner);
179 + put_mtd_device(dev->mtd);
180 module_put(tr->owner);
183 diff -uprN linux-2.6.28/drivers/mtd/mtdconcat.c ubifs-v2.6.28/drivers/mtd/mtdconcat.c
184 --- linux-2.6.28/drivers/mtd/mtdconcat.c 2008-12-24 18:26:37.000000000 -0500
185 +++ ubifs-v2.6.28/drivers/mtd/mtdconcat.c 2011-06-15 14:22:07.000000000 -0400
186 @@ -698,6 +698,7 @@ struct mtd_info *mtd_concat_create(struc
187 struct mtd_concat *concat;
188 u_int32_t max_erasesize, curr_erasesize;
189 int num_erase_region;
190 + int max_writebufsize = 0;
192 printk(KERN_NOTICE "Concatenating MTD devices:\n");
193 for (i = 0; i < num_devs; i++)
194 @@ -724,6 +725,12 @@ struct mtd_info *mtd_concat_create(struc
195 concat->mtd.size = subdev[0]->size;
196 concat->mtd.erasesize = subdev[0]->erasesize;
197 concat->mtd.writesize = subdev[0]->writesize;
199 + for (i = 0; i < num_devs; i++)
200 + if (max_writebufsize < subdev[i]->writebufsize)
201 + max_writebufsize = subdev[i]->writebufsize;
202 + concat->mtd.writebufsize = max_writebufsize;
204 concat->mtd.subpage_sft = subdev[0]->subpage_sft;
205 concat->mtd.oobsize = subdev[0]->oobsize;
206 concat->mtd.oobavail = subdev[0]->oobavail;
207 diff -uprN linux-2.6.28/drivers/mtd/mtdpart.c ubifs-v2.6.28/drivers/mtd/mtdpart.c
208 --- linux-2.6.28/drivers/mtd/mtdpart.c 2011-06-15 15:12:27.000000000 -0400
209 +++ ubifs-v2.6.28/drivers/mtd/mtdpart.c 2011-06-15 15:16:03.000000000 -0400
210 @@ -363,6 +363,7 @@ static struct mtd_part *add_one_partitio
211 slave->mtd.flags = master->flags & ~part->mask_flags;
212 slave->mtd.size = part->size;
213 slave->mtd.writesize = master->writesize;
214 + slave->mtd.writebufsize = master->writebufsize;
215 slave->mtd.oobsize = master->oobsize;
216 slave->mtd.oobavail = master->oobavail;
217 slave->mtd.subpage_sft = master->subpage_sft;
218 diff -uprN linux-2.6.28/drivers/mtd/nand/nand_base.c ubifs-v2.6.28/drivers/mtd/nand/nand_base.c
219 --- linux-2.6.28/drivers/mtd/nand/nand_base.c 2008-12-24 18:26:37.000000000 -0500
220 +++ ubifs-v2.6.28/drivers/mtd/nand/nand_base.c 2011-06-15 14:22:07.000000000 -0400
221 @@ -1084,7 +1084,8 @@ static int nand_do_read_ops(struct mtd_i
223 /* Transfer not aligned data */
225 - if (!NAND_SUBPAGE_READ(chip) && !oob)
226 + if (!NAND_SUBPAGE_READ(chip) && !oob &&
227 + !(mtd->ecc_stats.failed - stats.failed))
228 chip->pagebuf = realpage;
229 memcpy(buf, chip->buffers->databuf + col, bytes);
231 @@ -2703,6 +2704,7 @@ int nand_scan_tail(struct mtd_info *mtd)
232 mtd->resume = nand_resume;
233 mtd->block_isbad = nand_block_isbad;
234 mtd->block_markbad = nand_block_markbad;
235 + mtd->writebufsize = mtd->writesize;
237 /* propagate ecc.layout to mtd_info */
238 mtd->ecclayout = chip->ecc.layout;
239 diff -uprN linux-2.6.28/drivers/mtd/nand/nandsim.c ubifs-v2.6.28/drivers/mtd/nand/nandsim.c
240 --- linux-2.6.28/drivers/mtd/nand/nandsim.c 2008-12-24 18:26:37.000000000 -0500
241 +++ ubifs-v2.6.28/drivers/mtd/nand/nandsim.c 2011-06-15 14:22:07.000000000 -0400
242 @@ -1736,13 +1736,17 @@ static void ns_nand_write_byte(struct mt
244 /* Check if chip is expecting command */
245 if (NS_STATE(ns->nxstate) != STATE_UNKNOWN && !(ns->nxstate & STATE_CMD_MASK)) {
247 - * We are in situation when something else (not command)
248 - * was expected but command was input. In this case ignore
249 - * previous command(s)/state(s) and accept the last one.
251 - NS_WARN("write_byte: command (%#x) wasn't expected, expected state is %s, "
252 - "ignore previous states\n", (uint)byte, get_state_name(ns->nxstate));
253 + /* Do not warn if only 2 id bytes are read */
254 + if (!(ns->regs.command == NAND_CMD_READID &&
255 + NS_STATE(ns->state) == STATE_DATAOUT_ID && ns->regs.count == 2)) {
257 + * We are in situation when something else (not command)
258 + * was expected but command was input. In this case ignore
259 + * previous command(s)/state(s) and accept the last one.
261 + NS_WARN("write_byte: command (%#x) wasn't expected, expected state is %s, "
262 + "ignore previous states\n", (uint)byte, get_state_name(ns->nxstate));
264 switch_to_ready_state(ns, NS_STATUS_FAILED(ns));
267 diff -uprN linux-2.6.28/drivers/mtd/onenand/onenand_base.c ubifs-v2.6.28/drivers/mtd/onenand/onenand_base.c
268 --- linux-2.6.28/drivers/mtd/onenand/onenand_base.c 2011-06-15 15:12:27.000000000 -0400
269 +++ ubifs-v2.6.28/drivers/mtd/onenand/onenand_base.c 2011-06-15 15:16:03.000000000 -0400
270 @@ -2858,6 +2858,7 @@ int onenand_scan(struct mtd_info *mtd, i
271 mtd->block_isbad = onenand_block_isbad;
272 mtd->block_markbad = onenand_block_markbad;
273 mtd->owner = THIS_MODULE;
274 + mtd->writebufsize = mtd->writesize;
276 /* Unlock whole block */
277 onenand_unlock_all(mtd);
278 diff -uprN linux-2.6.28/drivers/mtd/ubi/build.c ubifs-v2.6.28/drivers/mtd/ubi/build.c
279 --- linux-2.6.28/drivers/mtd/ubi/build.c 2011-06-15 15:12:27.000000000 -0400
280 +++ ubifs-v2.6.28/drivers/mtd/ubi/build.c 2011-06-15 14:22:07.000000000 -0400
282 #include <linux/module.h>
283 #include <linux/moduleparam.h>
284 #include <linux/stringify.h>
285 +#include <linux/namei.h>
286 #include <linux/stat.h>
287 #include <linux/miscdevice.h>
288 #include <linux/log2.h>
290 /* Maximum length of the 'mtd=' parameter */
291 #define MTD_PARAM_LEN_MAX 64
293 +#ifdef CONFIG_MTD_UBI_MODULE
294 +#define ubi_is_module() 1
296 +#define ubi_is_module() 0
300 * struct mtd_dev_param - MTD device parameter description data structure.
301 - * @name: MTD device name or number string
302 + * @name: MTD character device node path, MTD device name, or MTD device number
304 * @vid_hdr_offs: VID header offset
306 struct mtd_dev_param {
307 @@ -57,10 +65,10 @@ struct mtd_dev_param {
310 /* Numbers of elements set in the @mtd_dev_param array */
311 -static int mtd_devs;
312 +static int __initdata mtd_devs;
314 /* MTD devices specification parameters */
315 -static struct mtd_dev_param mtd_dev_param[UBI_MAX_DEVICES];
316 +static struct mtd_dev_param __initdata mtd_dev_param[UBI_MAX_DEVICES];
318 /* Root UBI "class" object (corresponds to '/<sysfs>/class/ubi/') */
319 struct class *ubi_class;
320 @@ -122,6 +130,94 @@ static struct device_attribute dev_mtd_n
321 __ATTR(mtd_num, S_IRUGO, dev_attribute_show, NULL);
324 + * ubi_volume_notify - send a volume change notification.
325 + * @ubi: UBI device description object
326 + * @vol: volume description object of the changed volume
327 + * @ntype: notification type to send (%UBI_VOLUME_ADDED, etc)
329 + * This is a helper function which notifies all subscribers about a volume
330 + * change event (creation, removal, re-sizing, re-naming, updating). Returns
331 + * zero in case of success and a negative error code in case of failure.
333 +int ubi_volume_notify(struct ubi_device *ubi, struct ubi_volume *vol, int ntype)
335 + struct ubi_notification nt;
337 + ubi_do_get_device_info(ubi, &nt.di);
338 + ubi_do_get_volume_info(ubi, vol, &nt.vi);
339 + return blocking_notifier_call_chain(&ubi_notifiers, ntype, &nt);
343 + * ubi_notify_all - send a notification to all volumes.
344 + * @ubi: UBI device description object
345 + * @ntype: notification type to send (%UBI_VOLUME_ADDED, etc)
346 + * @nb: the notifier to call
348 + * This function walks all volumes of UBI device @ubi and sends the @ntype
349 + * notification for each volume. If @nb is %NULL, then all registered notifiers
350 + * are called, otherwise only the @nb notifier is called. Returns the number of
351 + * sent notifications.
353 +int ubi_notify_all(struct ubi_device *ubi, int ntype, struct notifier_block *nb)
355 + struct ubi_notification nt;
358 + ubi_do_get_device_info(ubi, &nt.di);
360 + mutex_lock(&ubi->device_mutex);
361 + for (i = 0; i < ubi->vtbl_slots; i++) {
363 + * Since the @ubi->device is locked, and we are not going to
364 + * change @ubi->volumes, we do not have to lock
365 + * @ubi->volumes_lock.
367 + if (!ubi->volumes[i])
370 + ubi_do_get_volume_info(ubi, ubi->volumes[i], &nt.vi);
372 + nb->notifier_call(nb, ntype, &nt);
374 + blocking_notifier_call_chain(&ubi_notifiers, ntype,
378 + mutex_unlock(&ubi->device_mutex);
384 + * ubi_enumerate_volumes - send "add" notification for all existing volumes.
385 + * @nb: the notifier to call
387 + * This function walks all UBI devices and volumes and sends the
388 + * %UBI_VOLUME_ADDED notification for each volume. If @nb is %NULL, then all
389 + * registered notifiers are called, otherwise only the @nb notifier is called.
390 + * Returns the number of sent notifications.
392 +int ubi_enumerate_volumes(struct notifier_block *nb)
397 + * Since the @ubi_devices_mutex is locked, and we are not going to
398 + * change @ubi_devices, we do not have to lock @ubi_devices_lock.
400 + for (i = 0; i < UBI_MAX_DEVICES; i++) {
401 + struct ubi_device *ubi = ubi_devices[i];
405 + count += ubi_notify_all(ubi, UBI_VOLUME_ADDED, nb);
412 * ubi_get_device - get UBI device.
413 * @ubi_num: UBI device number
415 @@ -263,17 +359,23 @@ static ssize_t dev_attribute_show(struct
419 -/* Fake "release" method for UBI devices */
420 -static void dev_release(struct device *dev) { }
421 +static void dev_release(struct device *dev)
423 + struct ubi_device *ubi = container_of(dev, struct ubi_device, dev);
429 * ubi_sysfs_init - initialize sysfs for an UBI device.
430 * @ubi: UBI device description object
431 + * @ref: set to %1 on exit in case of failure if a reference to @ubi->dev was
434 * This function returns zero in case of success and a negative error code in
437 -static int ubi_sysfs_init(struct ubi_device *ubi)
438 +static int ubi_sysfs_init(struct ubi_device *ubi, int *ref)
442 @@ -285,6 +387,7 @@ static int ubi_sysfs_init(struct ubi_dev
447 err = device_create_file(&ubi->dev, &dev_eraseblock_size);
450 @@ -340,7 +443,7 @@ static void ubi_sysfs_close(struct ubi_d
454 - * kill_volumes - destroy all volumes.
455 + * kill_volumes - destroy all user volumes.
456 * @ubi: UBI device description object
458 static void kill_volumes(struct ubi_device *ubi)
459 @@ -353,36 +456,29 @@ static void kill_volumes(struct ubi_devi
463 - * free_user_volumes - free all user volumes.
464 - * @ubi: UBI device description object
466 - * Normally the volumes are freed at the release function of the volume device
467 - * objects. However, on error paths the volumes have to be freed before the
468 - * device objects have been initialized.
470 -static void free_user_volumes(struct ubi_device *ubi)
474 - for (i = 0; i < ubi->vtbl_slots; i++)
475 - if (ubi->volumes[i]) {
476 - kfree(ubi->volumes[i]->eba_tbl);
477 - kfree(ubi->volumes[i]);
482 * uif_init - initialize user interfaces for an UBI device.
483 * @ubi: UBI device description object
484 + * @ref: set to %1 on exit in case of failure if a reference to @ubi->dev was
485 + * taken, otherwise set to %0
487 + * This function initializes various user interfaces for an UBI device. If the
488 + * initialization fails at an early stage, this function frees all the
489 + * resources it allocated, returns an error, and @ref is set to %0. However,
490 + * if the initialization fails after the UBI device was registered in the
491 + * driver core subsystem, this function takes a reference to @ubi->dev, because
492 + * otherwise the release function ('dev_release()') would free whole @ubi
493 + * object. The @ref argument is set to %1 in this case. The caller has to put
496 * This function returns zero in case of success and a negative error code in
497 - * case of failure. Note, this function destroys all volumes if it failes.
500 -static int uif_init(struct ubi_device *ubi)
501 +static int uif_init(struct ubi_device *ubi, int *ref)
503 - int i, err, do_free = 0;
508 sprintf(ubi->ubi_name, UBI_NAME_STR "%d", ubi->ubi_num);
511 @@ -410,7 +506,7 @@ static int uif_init(struct ubi_device *u
515 - err = ubi_sysfs_init(ubi);
516 + err = ubi_sysfs_init(ubi, ref);
520 @@ -427,13 +523,12 @@ static int uif_init(struct ubi_device *u
527 + get_device(&ubi->dev);
528 ubi_sysfs_close(ubi);
529 cdev_del(&ubi->cdev);
532 - free_user_volumes(ubi);
533 unregister_chrdev_region(ubi->cdev.dev, ubi->vtbl_slots + 1);
534 ubi_err("cannot initialize UBI %s, error %d", ubi->ubi_name, err);
536 @@ -493,8 +588,10 @@ static int attach_by_scanning(struct ubi
538 ubi->bad_peb_count = si->bad_peb_count;
539 ubi->good_peb_count = ubi->peb_count - ubi->bad_peb_count;
540 + ubi->corr_peb_count = si->corr_peb_count;
541 ubi->max_ec = si->max_ec;
542 ubi->mean_ec = si->mean_ec;
543 + ubi_msg("max. sequence number: %llu", si->max_sqnum);
545 err = ubi_read_volume_table(ubi, si);
547 @@ -567,6 +664,11 @@ static int io_init(struct ubi_device *ub
548 if (ubi->mtd->block_isbad && ubi->mtd->block_markbad)
549 ubi->bad_allowed = 1;
551 + if (ubi->mtd->type == MTD_NORFLASH) {
552 + ubi_assert(ubi->mtd->writesize == 1);
553 + ubi->nor_flash = 1;
556 ubi->min_io_size = ubi->mtd->writesize;
557 ubi->hdrs_min_io_size = ubi->mtd->writesize >> ubi->mtd->subpage_sft;
559 @@ -585,11 +687,25 @@ static int io_init(struct ubi_device *ub
560 ubi_assert(ubi->hdrs_min_io_size <= ubi->min_io_size);
561 ubi_assert(ubi->min_io_size % ubi->hdrs_min_io_size == 0);
563 + ubi->max_write_size = ubi->mtd->writebufsize;
565 + * Maximum write size has to be greater or equivalent to min. I/O
566 + * size, and be multiple of min. I/O size.
568 + if (ubi->max_write_size < ubi->min_io_size ||
569 + ubi->max_write_size % ubi->min_io_size ||
570 + !is_power_of_2(ubi->max_write_size)) {
571 + ubi_err("bad write buffer size %d for %d min. I/O unit",
572 + ubi->max_write_size, ubi->min_io_size);
576 /* Calculate default aligned sizes of EC and VID headers */
577 ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE, ubi->hdrs_min_io_size);
578 ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE, ubi->hdrs_min_io_size);
580 dbg_msg("min_io_size %d", ubi->min_io_size);
581 + dbg_msg("max_write_size %d", ubi->max_write_size);
582 dbg_msg("hdrs_min_io_size %d", ubi->hdrs_min_io_size);
583 dbg_msg("ec_hdr_alsize %d", ubi->ec_hdr_alsize);
584 dbg_msg("vid_hdr_alsize %d", ubi->vid_hdr_alsize);
585 @@ -606,7 +722,7 @@ static int io_init(struct ubi_device *ub
588 /* Similar for the data offset */
589 - ubi->leb_start = ubi->vid_hdr_offset + UBI_EC_HDR_SIZE;
590 + ubi->leb_start = ubi->vid_hdr_offset + UBI_VID_HDR_SIZE;
591 ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size);
593 dbg_msg("vid_hdr_offset %d", ubi->vid_hdr_offset);
594 @@ -751,7 +867,7 @@ static int autoresize(struct ubi_device
595 int ubi_attach_mtd_dev(struct mtd_info *mtd, int ubi_num, int vid_hdr_offset)
597 struct ubi_device *ubi;
598 - int i, err, do_free = 1;
599 + int i, err, ref = 0;
602 * Check if we already have the same MTD device attached.
603 @@ -814,11 +930,12 @@ int ubi_attach_mtd_dev(struct mtd_info *
605 mutex_init(&ubi->buf_mutex);
606 mutex_init(&ubi->ckvol_mutex);
607 - mutex_init(&ubi->mult_mutex);
608 - mutex_init(&ubi->volumes_mutex);
609 + mutex_init(&ubi->device_mutex);
610 spin_lock_init(&ubi->volumes_lock);
612 ubi_msg("attaching mtd%d to ubi%d", mtd->index, ubi_num);
613 + dbg_msg("sizeof(struct ubi_scan_leb) %zu", sizeof(struct ubi_scan_leb));
614 + dbg_msg("sizeof(struct ubi_wl_entry) %zu", sizeof(struct ubi_wl_entry));
618 @@ -833,13 +950,6 @@ int ubi_attach_mtd_dev(struct mtd_info *
622 -#ifdef CONFIG_MTD_UBI_DEBUG
623 - mutex_init(&ubi->dbg_buf_mutex);
624 - ubi->dbg_peb_buf = vmalloc(ubi->peb_size);
625 - if (!ubi->dbg_peb_buf)
629 err = attach_by_scanning(ubi);
631 dbg_err("failed to attach by scanning, error %d", err);
632 @@ -852,9 +962,9 @@ int ubi_attach_mtd_dev(struct mtd_info *
636 - err = uif_init(ubi);
637 + err = uif_init(ubi, &ref);
642 ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name);
643 if (IS_ERR(ubi->bgt_thread)) {
644 @@ -869,6 +979,7 @@ int ubi_attach_mtd_dev(struct mtd_info *
645 ubi_msg("MTD device size: %llu MiB", ubi->flash_size >> 20);
646 ubi_msg("number of good PEBs: %d", ubi->good_peb_count);
647 ubi_msg("number of bad PEBs: %d", ubi->bad_peb_count);
648 + ubi_msg("number of corrupted PEBs: %d", ubi->corr_peb_count);
649 ubi_msg("max. allowed volumes: %d", ubi->vtbl_slots);
650 ubi_msg("wear-leveling threshold: %d", CONFIG_MTD_UBI_WL_THRESHOLD);
651 ubi_msg("number of internal volumes: %d", UBI_INT_VOL_COUNT);
652 @@ -879,32 +990,34 @@ int ubi_attach_mtd_dev(struct mtd_info *
653 ubi_msg("number of PEBs reserved for bad PEB handling: %d",
655 ubi_msg("max/mean erase counter: %d/%d", ubi->max_ec, ubi->mean_ec);
656 - ubi_msg("image sequence number: %d", ubi->image_seq);
657 + ubi_msg("image sequence number: %d", ubi->image_seq);
659 - if (!DBG_DISABLE_BGT)
660 - ubi->thread_enabled = 1;
662 + * The below lock makes sure we do not race with 'ubi_thread()' which
663 + * checks @ubi->thread_enabled. Otherwise we may fail to wake it up.
665 + spin_lock(&ubi->wl_lock);
666 + ubi->thread_enabled = 1;
667 wake_up_process(ubi->bgt_thread);
668 + spin_unlock(&ubi->wl_lock);
670 ubi_devices[ubi_num] = ubi;
671 + ubi_notify_all(ubi, UBI_VOLUME_ADDED, NULL);
681 - free_user_volumes(ubi);
682 free_internal_volumes(ubi);
685 vfree(ubi->peb_buf1);
686 vfree(ubi->peb_buf2);
687 -#ifdef CONFIG_MTD_UBI_DEBUG
688 - vfree(ubi->dbg_peb_buf);
692 + put_device(&ubi->dev);
698 @@ -928,13 +1041,13 @@ int ubi_detach_mtd_dev(int ubi_num, int
699 if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES)
702 - spin_lock(&ubi_devices_lock);
703 - ubi = ubi_devices[ubi_num];
705 - spin_unlock(&ubi_devices_lock);
706 + ubi = ubi_get_device(ubi_num);
711 + spin_lock(&ubi_devices_lock);
712 + put_device(&ubi->dev);
713 + ubi->ref_count -= 1;
714 if (ubi->ref_count) {
716 spin_unlock(&ubi_devices_lock);
717 @@ -948,6 +1061,7 @@ int ubi_detach_mtd_dev(int ubi_num, int
718 spin_unlock(&ubi_devices_lock);
720 ubi_assert(ubi_num == ubi->ubi_num);
721 + ubi_notify_all(ubi, UBI_VOLUME_REMOVED, NULL);
722 dbg_msg("detaching mtd%d from ubi%d", ubi->mtd->index, ubi_num);
725 @@ -957,6 +1071,12 @@ int ubi_detach_mtd_dev(int ubi_num, int
727 kthread_stop(ubi->bgt_thread);
730 + * Get a reference to the device in order to prevent 'dev_release()'
731 + * from freeing the @ubi object.
733 + get_device(&ubi->dev);
737 free_internal_volumes(ubi);
738 @@ -964,22 +1084,56 @@ int ubi_detach_mtd_dev(int ubi_num, int
739 put_mtd_device(ubi->mtd);
740 vfree(ubi->peb_buf1);
741 vfree(ubi->peb_buf2);
742 -#ifdef CONFIG_MTD_UBI_DEBUG
743 - vfree(ubi->dbg_peb_buf);
745 ubi_msg("mtd%d is detached from ubi%d", ubi->mtd->index, ubi->ubi_num);
747 + put_device(&ubi->dev);
752 - * find_mtd_device - open an MTD device by its name or number.
753 - * @mtd_dev: name or number of the device
754 + * open_mtd_by_chdev - open an MTD device by its character device node path.
755 + * @mtd_dev: MTD character device node path
757 + * This helper function opens an MTD device by its character node device path.
758 + * Returns MTD device description object in case of success and a negative
759 + * error code in case of failure.
761 +static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev)
763 + int err, major, minor, mode;
766 + /* Probably this is an MTD character device node path */
767 + err = kern_path(mtd_dev, LOOKUP_FOLLOW, &path);
769 + return ERR_PTR(err);
771 + /* MTD device number is defined by the major / minor numbers */
772 + major = imajor(path.dentry->d_inode);
773 + minor = iminor(path.dentry->d_inode);
774 + mode = path.dentry->d_inode->i_mode;
776 + if (major != MTD_CHAR_MAJOR || !S_ISCHR(mode))
777 + return ERR_PTR(-EINVAL);
781 + * Just do not think the "/dev/mtdrX" devices support is need,
782 + * so do not support them to avoid doing extra work.
784 + return ERR_PTR(-EINVAL);
786 + return get_mtd_device(NULL, minor / 2);
790 + * open_mtd_device - open MTD device by name, character device path, or number.
791 + * @mtd_dev: name, character device node path, or MTD device device number
793 * This function tries to open and MTD device described by @mtd_dev string,
794 - * which is first treated as an ASCII number, and if it is not true, it is
795 - * treated as MTD device name. Returns MTD device description object in case of
796 - * success and a negative error code in case of failure.
797 + * which is first treated as ASCII MTD device number, and if it is not true, it
798 + * is treated as MTD device name, and if that is also not true, it is treated
799 + * as MTD character device node path. Returns MTD device description object in
800 + * case of success and a negative error code in case of failure.
802 static struct mtd_info * __init open_mtd_device(const char *mtd_dev)
804 @@ -994,6 +1148,9 @@ static struct mtd_info * __init open_mtd
807 mtd = get_mtd_device_nm(mtd_dev);
808 + if (IS_ERR(mtd) && PTR_ERR(mtd) == -ENODEV)
809 + /* Probably this is an MTD character device node path */
810 + mtd = open_mtd_by_chdev(mtd_dev);
812 mtd = get_mtd_device(NULL, mtd_num);
814 @@ -1057,9 +1214,24 @@ static int __init ubi_init(void)
816 mutex_unlock(&ubi_devices_mutex);
818 - put_mtd_device(mtd);
819 ubi_err("cannot attach mtd%d", mtd->index);
821 + put_mtd_device(mtd);
824 + * Originally UBI stopped initializing on any error.
825 + * However, later on it was found out that this
826 + * behavior is not very good when UBI is compiled into
827 + * the kernel and the MTD devices to attach are passed
828 + * through the command line. Indeed, UBI failure
829 + * stopped whole boot sequence.
831 + * To fix this, we changed the behavior for the
832 + * non-module case, but preserved the old behavior for
833 + * the module case, just for compatibility. This is a
834 + * little inconsistent, though.
836 + if (ubi_is_module())
841 @@ -1209,13 +1381,15 @@ static int __init ubi_mtd_param_parse(co
843 module_param_call(mtd, ubi_mtd_param_parse, NULL, NULL, 000);
844 MODULE_PARM_DESC(mtd, "MTD devices to attach. Parameter format: "
845 - "mtd=<name|num>[,<vid_hdr_offs>].\n"
846 + "mtd=<name|num|path>[,<vid_hdr_offs>].\n"
847 "Multiple \"mtd\" parameters may be specified.\n"
848 - "MTD devices may be specified by their number or name.\n"
849 + "MTD devices may be specified by their number, name, or "
850 + "path to the MTD character device node.\n"
851 "Optional \"vid_hdr_offs\" parameter specifies UBI VID "
852 - "header position and data starting position to be used "
854 - "Example: mtd=content,1984 mtd=4 - attach MTD device"
855 + "header position to be used by UBI.\n"
856 + "Example 1: mtd=/dev/mtd0 - attach MTD device "
858 + "Example 2: mtd=content,1984 mtd=4 - attach MTD device "
859 "with name \"content\" using VID header offset 1984, and "
860 "MTD device number 4 with default VID header offset.");
862 diff -uprN linux-2.6.28/drivers/mtd/ubi/cdev.c ubifs-v2.6.28/drivers/mtd/ubi/cdev.c
863 --- linux-2.6.28/drivers/mtd/ubi/cdev.c 2011-06-15 15:12:27.000000000 -0400
864 +++ ubifs-v2.6.28/drivers/mtd/ubi/cdev.c 2011-06-15 14:22:07.000000000 -0400
866 #include <linux/ioctl.h>
867 #include <linux/capability.h>
868 #include <linux/uaccess.h>
869 -#include <linux/smp_lock.h>
870 +#include <linux/compat.h>
871 +#include <linux/math64.h>
872 #include <mtd/ubi-user.h>
873 -#include <asm/div64.h>
877 @@ -113,7 +113,8 @@ static int vol_cdev_open(struct inode *i
881 - dbg_gen("open volume %d, mode %d", vol_id, mode);
882 + dbg_gen("open device %d, volume %d, mode %d",
883 + ubi_num, vol_id, mode);
885 desc = ubi_open_volume(ubi_num, vol_id, mode);
887 @@ -128,7 +129,8 @@ static int vol_cdev_release(struct inode
888 struct ubi_volume_desc *desc = file->private_data;
889 struct ubi_volume *vol = desc->vol;
891 - dbg_gen("release volume %d, mode %d", vol->vol_id, desc->mode);
892 + dbg_gen("release device %d, volume %d, mode %d",
893 + vol->ubi->ubi_num, vol->vol_id, desc->mode);
896 ubi_warn("update of volume %d not finished, volume is damaged",
897 @@ -155,7 +157,7 @@ static loff_t vol_cdev_llseek(struct fil
901 - /* Update is in progress, seeking is prohibited */
902 + /* Update is in progress, seeking is prohibited */
906 @@ -186,6 +188,16 @@ static loff_t vol_cdev_llseek(struct fil
910 +static int vol_cdev_fsync(struct file *file, struct dentry *dentry,
913 + struct ubi_volume_desc *desc = file->private_data;
914 + struct ubi_device *ubi = desc->vol->ubi;
916 + return ubi_sync(ubi->ubi_num);
920 static ssize_t vol_cdev_read(struct file *file, __user char *buf, size_t count,
923 @@ -195,7 +207,6 @@ static ssize_t vol_cdev_read(struct file
924 int err, lnum, off, len, tbuf_size;
925 size_t count_save = count;
929 dbg_gen("read %zd bytes from offset %lld of volume %d",
930 count, *offp, vol->vol_id);
931 @@ -225,10 +236,7 @@ static ssize_t vol_cdev_read(struct file
934 len = count > tbuf_size ? tbuf_size : count;
937 - off = do_div(tmp, vol->usable_leb_size);
939 + lnum = div_u64_rem(*offp, vol->usable_leb_size, &off);
943 @@ -263,12 +271,9 @@ static ssize_t vol_cdev_read(struct file
944 return err ? err : count_save - count;
947 -#ifdef CONFIG_MTD_UBI_DEBUG_USERSPACE_IO
950 * This function allows to directly write to dynamic UBI volumes, without
951 - * issuing the volume update operation. Available only as a debugging feature.
952 - * Very useful for testing UBI.
953 + * issuing the volume update operation.
955 static ssize_t vol_cdev_direct_write(struct file *file, const char __user *buf,
956 size_t count, loff_t *offp)
957 @@ -279,7 +284,9 @@ static ssize_t vol_cdev_direct_write(str
958 int lnum, off, len, tbuf_size, err = 0;
959 size_t count_save = count;
963 + if (!vol->direct_writes)
966 dbg_gen("requested: write %zd bytes to offset %lld of volume %u",
967 count, *offp, vol->vol_id);
968 @@ -287,10 +294,7 @@ static ssize_t vol_cdev_direct_write(str
969 if (vol->vol_type == UBI_STATIC_VOLUME)
973 - off = do_div(tmp, vol->usable_leb_size);
976 + lnum = div_u64_rem(*offp, vol->usable_leb_size, &off);
977 if (off & (ubi->min_io_size - 1)) {
978 dbg_err("unaligned position");
980 @@ -347,10 +351,6 @@ static ssize_t vol_cdev_direct_write(str
981 return err ? err : count_save - count;
985 -#define vol_cdev_direct_write(file, buf, count, offp) (-EPERM)
986 -#endif /* CONFIG_MTD_UBI_DEBUG_USERSPACE_IO */
988 static ssize_t vol_cdev_write(struct file *file, const char __user *buf,
989 size_t count, loff_t *offp)
991 @@ -395,15 +395,15 @@ static ssize_t vol_cdev_write(struct fil
995 - ubi_gluebi_updated(vol);
996 + ubi_volume_notify(ubi, vol, UBI_VOLUME_UPDATED);
997 revoke_exclusive(desc, UBI_READWRITE);
1003 -static int vol_cdev_ioctl(struct inode *inode, struct file *file,
1004 - unsigned int cmd, unsigned long arg)
1005 +static long vol_cdev_ioctl(struct file *file, unsigned int cmd,
1006 + unsigned long arg)
1009 struct ubi_volume_desc *desc = file->private_data;
1010 @@ -487,7 +487,6 @@ static int vol_cdev_ioctl(struct inode *
1014 -#ifdef CONFIG_MTD_UBI_DEBUG_USERSPACE_IO
1015 /* Logical eraseblock erasure command */
1018 @@ -518,13 +517,77 @@ static int vol_cdev_ioctl(struct inode *
1019 err = ubi_wl_flush(ubi);
1024 + /* Logical eraseblock map command */
1025 + case UBI_IOCEBMAP:
1027 + struct ubi_map_req req;
1029 + err = copy_from_user(&req, argp, sizeof(struct ubi_map_req));
1034 + err = ubi_leb_map(desc, req.lnum, req.dtype);
1038 + /* Logical eraseblock un-map command */
1039 + case UBI_IOCEBUNMAP:
1043 + err = get_user(lnum, (__user int32_t *)argp);
1048 + err = ubi_leb_unmap(desc, lnum);
1052 + /* Check if logical eraseblock is mapped command */
1053 + case UBI_IOCEBISMAP:
1057 + err = get_user(lnum, (__user int32_t *)argp);
1062 + err = ubi_is_mapped(desc, lnum);
1066 + /* Set volume property command */
1067 + case UBI_IOCSETVOLPROP:
1069 + struct ubi_set_vol_prop_req req;
1071 + err = copy_from_user(&req, argp,
1072 + sizeof(struct ubi_set_vol_prop_req));
1077 + switch (req.property) {
1078 + case UBI_VOL_PROP_DIRECT_WRITE:
1079 + mutex_lock(&ubi->device_mutex);
1080 + desc->vol->direct_writes = !!req.value;
1081 + mutex_unlock(&ubi->device_mutex);
1098 @@ -735,23 +798,23 @@ static int rename_volumes(struct ubi_dev
1102 - re = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL);
1104 + re1 = kzalloc(sizeof(struct ubi_rename_entry), GFP_KERNEL);
1107 ubi_close_volume(desc);
1113 - list_add(&re->list, &rename_list);
1116 + list_add(&re1->list, &rename_list);
1117 dbg_msg("will remove volume %d, name \"%s\"",
1118 - re->desc->vol->vol_id, re->desc->vol->name);
1119 + re1->desc->vol->vol_id, re1->desc->vol->name);
1122 - mutex_lock(&ubi->volumes_mutex);
1123 + mutex_lock(&ubi->device_mutex);
1124 err = ubi_rename_volumes(ubi, &rename_list);
1125 - mutex_unlock(&ubi->volumes_mutex);
1126 + mutex_unlock(&ubi->device_mutex);
1129 list_for_each_entry_safe(re, re1, &rename_list, list) {
1130 @@ -762,8 +825,8 @@ out_free:
1134 -static int ubi_cdev_ioctl(struct inode *inode, struct file *file,
1135 - unsigned int cmd, unsigned long arg)
1136 +static long ubi_cdev_ioctl(struct file *file, unsigned int cmd,
1137 + unsigned long arg)
1140 struct ubi_device *ubi;
1141 @@ -773,7 +836,7 @@ static int ubi_cdev_ioctl(struct inode *
1142 if (!capable(CAP_SYS_RESOURCE))
1145 - ubi = ubi_get_by_major(imajor(inode));
1146 + ubi = ubi_get_by_major(imajor(file->f_mapping->host));
1150 @@ -794,9 +857,9 @@ static int ubi_cdev_ioctl(struct inode *
1154 - mutex_lock(&ubi->volumes_mutex);
1155 + mutex_lock(&ubi->device_mutex);
1156 err = ubi_create_volume(ubi, &req);
1157 - mutex_unlock(&ubi->volumes_mutex);
1158 + mutex_unlock(&ubi->device_mutex);
1162 @@ -825,9 +887,9 @@ static int ubi_cdev_ioctl(struct inode *
1166 - mutex_lock(&ubi->volumes_mutex);
1167 + mutex_lock(&ubi->device_mutex);
1168 err = ubi_remove_volume(desc, 0);
1169 - mutex_unlock(&ubi->volumes_mutex);
1170 + mutex_unlock(&ubi->device_mutex);
1173 * The volume is deleted (unless an error occurred), and the
1174 @@ -842,7 +904,6 @@ static int ubi_cdev_ioctl(struct inode *
1179 struct ubi_rsvol_req req;
1181 dbg_gen("re-size volume");
1182 @@ -862,13 +923,12 @@ static int ubi_cdev_ioctl(struct inode *
1187 - pebs = !!do_div(tmp, desc->vol->usable_leb_size);
1189 + pebs = div_u64(req.bytes + desc->vol->usable_leb_size - 1,
1190 + desc->vol->usable_leb_size);
1192 - mutex_lock(&ubi->volumes_mutex);
1193 + mutex_lock(&ubi->device_mutex);
1194 err = ubi_resize_volume(desc, pebs);
1195 - mutex_unlock(&ubi->volumes_mutex);
1196 + mutex_unlock(&ubi->device_mutex);
1197 ubi_close_volume(desc);
1200 @@ -892,9 +952,7 @@ static int ubi_cdev_ioctl(struct inode *
1204 - mutex_lock(&ubi->mult_mutex);
1205 err = rename_volumes(ubi, req);
1206 - mutex_unlock(&ubi->mult_mutex);
1210 @@ -908,8 +966,8 @@ static int ubi_cdev_ioctl(struct inode *
1214 -static int ctrl_cdev_ioctl(struct inode *inode, struct file *file,
1215 - unsigned int cmd, unsigned long arg)
1216 +static long ctrl_cdev_ioctl(struct file *file, unsigned int cmd,
1217 + unsigned long arg)
1220 void __user *argp = (void __user *)arg;
1221 @@ -985,26 +1043,61 @@ static int ctrl_cdev_ioctl(struct inode
1225 -/* UBI control character device operations */
1226 -struct file_operations ubi_ctrl_cdev_operations = {
1227 - .ioctl = ctrl_cdev_ioctl,
1228 - .owner = THIS_MODULE,
1229 +#ifdef CONFIG_COMPAT
1230 +static long vol_cdev_compat_ioctl(struct file *file, unsigned int cmd,
1231 + unsigned long arg)
1233 + unsigned long translated_arg = (unsigned long)compat_ptr(arg);
1235 + return vol_cdev_ioctl(file, cmd, translated_arg);
1238 +static long ubi_cdev_compat_ioctl(struct file *file, unsigned int cmd,
1239 + unsigned long arg)
1241 + unsigned long translated_arg = (unsigned long)compat_ptr(arg);
1243 + return ubi_cdev_ioctl(file, cmd, translated_arg);
1246 +static long ctrl_cdev_compat_ioctl(struct file *file, unsigned int cmd,
1247 + unsigned long arg)
1249 + unsigned long translated_arg = (unsigned long)compat_ptr(arg);
1251 + return ctrl_cdev_ioctl(file, cmd, translated_arg);
1254 +#define vol_cdev_compat_ioctl NULL
1255 +#define ubi_cdev_compat_ioctl NULL
1256 +#define ctrl_cdev_compat_ioctl NULL
1259 +/* UBI volume character device operations */
1260 +const struct file_operations ubi_vol_cdev_operations = {
1261 + .owner = THIS_MODULE,
1262 + .open = vol_cdev_open,
1263 + .release = vol_cdev_release,
1264 + .llseek = vol_cdev_llseek,
1265 + .read = vol_cdev_read,
1266 + .write = vol_cdev_write,
1267 + .fsync = vol_cdev_fsync,
1268 + .unlocked_ioctl = vol_cdev_ioctl,
1269 + .compat_ioctl = vol_cdev_compat_ioctl,
1272 /* UBI character device operations */
1273 -struct file_operations ubi_cdev_operations = {
1274 - .owner = THIS_MODULE,
1275 - .ioctl = ubi_cdev_ioctl,
1276 - .llseek = no_llseek,
1277 +const struct file_operations ubi_cdev_operations = {
1278 + .owner = THIS_MODULE,
1279 + .llseek = no_llseek,
1280 + .unlocked_ioctl = ubi_cdev_ioctl,
1281 + .compat_ioctl = ubi_cdev_compat_ioctl,
1284 -/* UBI volume character device operations */
1285 -struct file_operations ubi_vol_cdev_operations = {
1286 - .owner = THIS_MODULE,
1287 - .open = vol_cdev_open,
1288 - .release = vol_cdev_release,
1289 - .llseek = vol_cdev_llseek,
1290 - .read = vol_cdev_read,
1291 - .write = vol_cdev_write,
1292 - .ioctl = vol_cdev_ioctl,
1293 +/* UBI control character device operations */
1294 +const struct file_operations ubi_ctrl_cdev_operations = {
1295 + .owner = THIS_MODULE,
1296 + .unlocked_ioctl = ctrl_cdev_ioctl,
1297 + .compat_ioctl = ctrl_cdev_compat_ioctl,
1298 + .llseek = no_llseek,
1300 diff -uprN linux-2.6.28/drivers/mtd/ubi/debug.c ubifs-v2.6.28/drivers/mtd/ubi/debug.c
1301 --- linux-2.6.28/drivers/mtd/ubi/debug.c 2011-06-15 15:12:27.000000000 -0400
1302 +++ ubifs-v2.6.28/drivers/mtd/ubi/debug.c 2011-06-15 14:22:07.000000000 -0400
1304 #ifdef CONFIG_MTD_UBI_DEBUG
1307 +#include <linux/module.h>
1308 +#include <linux/moduleparam.h>
1310 +unsigned int ubi_chk_flags;
1311 +unsigned int ubi_tst_flags;
1313 +module_param_named(debug_chks, ubi_chk_flags, uint, S_IRUGO | S_IWUSR);
1314 +module_param_named(debug_tsts, ubi_chk_flags, uint, S_IRUGO | S_IWUSR);
1316 +MODULE_PARM_DESC(debug_chks, "Debug check flags");
1317 +MODULE_PARM_DESC(debug_tsts, "Debug special test flags");
1320 * ubi_dbg_dump_ec_hdr - dump an erase counter header.
1321 @@ -61,15 +72,15 @@ void ubi_dbg_dump_vid_hdr(const struct u
1323 printk(KERN_DEBUG "Volume identifier header dump:\n");
1324 printk(KERN_DEBUG "\tmagic %08x\n", be32_to_cpu(vid_hdr->magic));
1325 - printk(KERN_DEBUG "\tversion %d\n", (int)vid_hdr->version);
1326 - printk(KERN_DEBUG "\tvol_type %d\n", (int)vid_hdr->vol_type);
1327 - printk(KERN_DEBUG "\tcopy_flag %d\n", (int)vid_hdr->copy_flag);
1328 - printk(KERN_DEBUG "\tcompat %d\n", (int)vid_hdr->compat);
1329 - printk(KERN_DEBUG "\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id));
1330 - printk(KERN_DEBUG "\tlnum %d\n", be32_to_cpu(vid_hdr->lnum));
1331 - printk(KERN_DEBUG "\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size));
1332 - printk(KERN_DEBUG "\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs));
1333 - printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad));
1334 + printk(KERN_DEBUG "\tversion %d\n", (int)vid_hdr->version);
1335 + printk(KERN_DEBUG "\tvol_type %d\n", (int)vid_hdr->vol_type);
1336 + printk(KERN_DEBUG "\tcopy_flag %d\n", (int)vid_hdr->copy_flag);
1337 + printk(KERN_DEBUG "\tcompat %d\n", (int)vid_hdr->compat);
1338 + printk(KERN_DEBUG "\tvol_id %d\n", be32_to_cpu(vid_hdr->vol_id));
1339 + printk(KERN_DEBUG "\tlnum %d\n", be32_to_cpu(vid_hdr->lnum));
1340 + printk(KERN_DEBUG "\tdata_size %d\n", be32_to_cpu(vid_hdr->data_size));
1341 + printk(KERN_DEBUG "\tused_ebs %d\n", be32_to_cpu(vid_hdr->used_ebs));
1342 + printk(KERN_DEBUG "\tdata_pad %d\n", be32_to_cpu(vid_hdr->data_pad));
1343 printk(KERN_DEBUG "\tsqnum %llu\n",
1344 (unsigned long long)be64_to_cpu(vid_hdr->sqnum));
1345 printk(KERN_DEBUG "\thdr_crc %08x\n", be32_to_cpu(vid_hdr->hdr_crc));
1346 @@ -196,4 +207,36 @@ void ubi_dbg_dump_mkvol_req(const struct
1347 printk(KERN_DEBUG "\t1st 16 characters of name: %s\n", nm);
1351 + * ubi_dbg_dump_flash - dump a region of flash.
1352 + * @ubi: UBI device description object
1353 + * @pnum: the physical eraseblock number to dump
1354 + * @offset: the starting offset within the physical eraseblock to dump
1355 + * @len: the length of the region to dump
1357 +void ubi_dbg_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len)
1362 + loff_t addr = (loff_t)pnum * ubi->peb_size + offset;
1364 + buf = vmalloc(len);
1367 + err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf);
1368 + if (err && err != -EUCLEAN) {
1369 + ubi_err("error %d while reading %d bytes from PEB %d:%d, "
1370 + "read %zd bytes", err, len, pnum, offset, read);
1374 + dbg_msg("dumping %d bytes of data from PEB %d, offset %d",
1375 + len, pnum, offset);
1376 + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, buf, len, 1);
1382 #endif /* CONFIG_MTD_UBI_DEBUG */
1383 diff -uprN linux-2.6.28/drivers/mtd/ubi/debug.h ubifs-v2.6.28/drivers/mtd/ubi/debug.h
1384 --- linux-2.6.28/drivers/mtd/ubi/debug.h 2011-06-15 15:12:27.000000000 -0400
1385 +++ ubifs-v2.6.28/drivers/mtd/ubi/debug.h 2011-06-15 14:22:07.000000000 -0400
1387 #ifndef __UBI_DEBUG_H__
1388 #define __UBI_DEBUG_H__
1391 +struct ubi_vid_hdr;
1393 +struct ubi_vtbl_record;
1394 +struct ubi_scan_volume;
1395 +struct ubi_scan_leb;
1396 +struct ubi_mkvol_req;
1398 #ifdef CONFIG_MTD_UBI_DEBUG
1399 #include <linux/random.h>
1401 -#define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__)
1403 #define ubi_assert(expr) do { \
1404 if (unlikely(!(expr))) { \
1405 printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \
1410 -#define dbg_msg(fmt, ...) \
1411 - printk(KERN_DEBUG "UBI DBG (pid %d): %s: " fmt "\n", \
1412 - current->pid, __func__, ##__VA_ARGS__)
1413 +#define dbg_err(fmt, ...) ubi_err(fmt, ##__VA_ARGS__)
1415 #define ubi_dbg_dump_stack() dump_stack()
1418 -struct ubi_vid_hdr;
1420 -struct ubi_vtbl_record;
1421 -struct ubi_scan_volume;
1422 -struct ubi_scan_leb;
1423 -struct ubi_mkvol_req;
1424 +#define ubi_dbg_print_hex_dump(l, ps, pt, r, g, b, len, a) \
1425 + print_hex_dump(l, ps, pt, r, g, b, len, a)
1427 +#define ubi_dbg_msg(type, fmt, ...) \
1428 + pr_debug("UBI DBG " type ": " fmt "\n", ##__VA_ARGS__)
1430 +/* Just a debugging messages not related to any specific UBI subsystem */
1431 +#define dbg_msg(fmt, ...) ubi_dbg_msg("msg", fmt, ##__VA_ARGS__)
1432 +/* General debugging messages */
1433 +#define dbg_gen(fmt, ...) ubi_dbg_msg("gen", fmt, ##__VA_ARGS__)
1434 +/* Messages from the eraseblock association sub-system */
1435 +#define dbg_eba(fmt, ...) ubi_dbg_msg("eba", fmt, ##__VA_ARGS__)
1436 +/* Messages from the wear-leveling sub-system */
1437 +#define dbg_wl(fmt, ...) ubi_dbg_msg("wl", fmt, ##__VA_ARGS__)
1438 +/* Messages from the input/output sub-system */
1439 +#define dbg_io(fmt, ...) ubi_dbg_msg("io", fmt, ##__VA_ARGS__)
1440 +/* Initialization and build messages */
1441 +#define dbg_bld(fmt, ...) ubi_dbg_msg("bld", fmt, ##__VA_ARGS__)
1443 void ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr);
1444 void ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr);
1445 @@ -55,51 +70,53 @@ void ubi_dbg_dump_vtbl_record(const stru
1446 void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv);
1447 void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb, int type);
1448 void ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req);
1449 +void ubi_dbg_dump_flash(struct ubi_device *ubi, int pnum, int offset, int len);
1451 -#ifdef CONFIG_MTD_UBI_DEBUG_MSG
1452 -/* General debugging messages */
1453 -#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
1455 -#define dbg_gen(fmt, ...) ({})
1458 -#ifdef CONFIG_MTD_UBI_DEBUG_MSG_EBA
1459 -/* Messages from the eraseblock association sub-system */
1460 -#define dbg_eba(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
1462 -#define dbg_eba(fmt, ...) ({})
1464 +extern unsigned int ubi_chk_flags;
1466 -#ifdef CONFIG_MTD_UBI_DEBUG_MSG_WL
1467 -/* Messages from the wear-leveling sub-system */
1468 -#define dbg_wl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
1470 -#define dbg_wl(fmt, ...) ({})
1473 + * Debugging check flags.
1475 + * UBI_CHK_GEN: general checks
1476 + * UBI_CHK_IO: check writes and erases
1479 + UBI_CHK_GEN = 0x1,
1483 +int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len);
1484 +int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum,
1485 + int offset, int len);
1487 -#ifdef CONFIG_MTD_UBI_DEBUG_MSG_IO
1488 -/* Messages from the input/output sub-system */
1489 -#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
1491 -#define dbg_io(fmt, ...) ({})
1493 +extern unsigned int ubi_tst_flags;
1495 -#ifdef CONFIG_MTD_UBI_DEBUG_MSG_BLD
1496 -/* Initialization and build messages */
1497 -#define dbg_bld(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
1498 -#define UBI_IO_DEBUG 1
1500 -#define dbg_bld(fmt, ...) ({})
1501 -#define UBI_IO_DEBUG 0
1504 + * Special testing flags.
1506 + * UBIFS_TST_DISABLE_BGT: disable the background thread
1507 + * UBI_TST_EMULATE_BITFLIPS: emulate bit-flips
1508 + * UBI_TST_EMULATE_WRITE_FAILURES: emulate write failures
1509 + * UBI_TST_EMULATE_ERASE_FAILURES: emulate erase failures
1512 + UBI_TST_DISABLE_BGT = 0x1,
1513 + UBI_TST_EMULATE_BITFLIPS = 0x2,
1514 + UBI_TST_EMULATE_WRITE_FAILURES = 0x4,
1515 + UBI_TST_EMULATE_ERASE_FAILURES = 0x8,
1518 -#ifdef CONFIG_MTD_UBI_DEBUG_DISABLE_BGT
1519 -#define DBG_DISABLE_BGT 1
1521 -#define DBG_DISABLE_BGT 0
1524 + * ubi_dbg_is_bgt_disabled - if the background thread is disabled.
1526 + * Returns non-zero if the UBI background thread is disabled for testing
1529 +static inline int ubi_dbg_is_bgt_disabled(void)
1531 + return ubi_tst_flags & UBI_TST_DISABLE_BGT;
1534 -#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_BITFLIPS
1536 * ubi_dbg_is_bitflip - if it is time to emulate a bit-flip.
1538 @@ -107,13 +124,11 @@ void ubi_dbg_dump_mkvol_req(const struct
1540 static inline int ubi_dbg_is_bitflip(void)
1542 - return !(random32() % 200);
1543 + if (ubi_tst_flags & UBI_TST_EMULATE_BITFLIPS)
1544 + return !(random32() % 200);
1548 -#define ubi_dbg_is_bitflip() 0
1551 -#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_WRITE_FAILURES
1553 * ubi_dbg_is_write_failure - if it is time to emulate a write failure.
1555 @@ -122,13 +137,11 @@ static inline int ubi_dbg_is_bitflip(voi
1557 static inline int ubi_dbg_is_write_failure(void)
1559 - return !(random32() % 500);
1560 + if (ubi_tst_flags & UBI_TST_EMULATE_WRITE_FAILURES)
1561 + return !(random32() % 500);
1565 -#define ubi_dbg_is_write_failure() 0
1568 -#ifdef CONFIG_MTD_UBI_DEBUG_EMULATE_ERASE_FAILURES
1570 * ubi_dbg_is_erase_failure - if its time to emulate an erase failure.
1572 @@ -137,36 +150,68 @@ static inline int ubi_dbg_is_write_failu
1574 static inline int ubi_dbg_is_erase_failure(void)
1576 + if (ubi_tst_flags & UBI_TST_EMULATE_ERASE_FAILURES)
1577 return !(random32() % 400);
1581 -#define ubi_dbg_is_erase_failure() 0
1586 -#define ubi_assert(expr) ({})
1587 -#define dbg_err(fmt, ...) ({})
1588 -#define dbg_msg(fmt, ...) ({})
1589 -#define dbg_gen(fmt, ...) ({})
1590 -#define dbg_eba(fmt, ...) ({})
1591 -#define dbg_wl(fmt, ...) ({})
1592 -#define dbg_io(fmt, ...) ({})
1593 -#define dbg_bld(fmt, ...) ({})
1594 -#define ubi_dbg_dump_stack() ({})
1595 -#define ubi_dbg_dump_ec_hdr(ec_hdr) ({})
1596 -#define ubi_dbg_dump_vid_hdr(vid_hdr) ({})
1597 -#define ubi_dbg_dump_vol_info(vol) ({})
1598 -#define ubi_dbg_dump_vtbl_record(r, idx) ({})
1599 -#define ubi_dbg_dump_sv(sv) ({})
1600 -#define ubi_dbg_dump_seb(seb, type) ({})
1601 -#define ubi_dbg_dump_mkvol_req(req) ({})
1603 -#define UBI_IO_DEBUG 0
1604 -#define DBG_DISABLE_BGT 0
1605 -#define ubi_dbg_is_bitflip() 0
1606 -#define ubi_dbg_is_write_failure() 0
1607 -#define ubi_dbg_is_erase_failure() 0
1608 +/* Use "if (0)" to make compiler check arguments even if debugging is off */
1609 +#define ubi_assert(expr) do { \
1611 + printk(KERN_CRIT "UBI assert failed in %s at %u (pid %d)\n", \
1612 + __func__, __LINE__, current->pid); \
1616 +#define dbg_err(fmt, ...) do { \
1618 + ubi_err(fmt, ##__VA_ARGS__); \
1621 +#define ubi_dbg_msg(fmt, ...) do { \
1623 + pr_debug(fmt "\n", ##__VA_ARGS__); \
1626 +#define dbg_msg(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__)
1627 +#define dbg_gen(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__)
1628 +#define dbg_eba(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__)
1629 +#define dbg_wl(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__)
1630 +#define dbg_io(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__)
1631 +#define dbg_bld(fmt, ...) ubi_dbg_msg(fmt, ##__VA_ARGS__)
1633 +static inline void ubi_dbg_dump_stack(void) { return; }
1635 +ubi_dbg_dump_ec_hdr(const struct ubi_ec_hdr *ec_hdr) { return; }
1637 +ubi_dbg_dump_vid_hdr(const struct ubi_vid_hdr *vid_hdr) { return; }
1639 +ubi_dbg_dump_vol_info(const struct ubi_volume *vol) { return; }
1641 +ubi_dbg_dump_vtbl_record(const struct ubi_vtbl_record *r, int idx) { return; }
1642 +static inline void ubi_dbg_dump_sv(const struct ubi_scan_volume *sv) { return; }
1643 +static inline void ubi_dbg_dump_seb(const struct ubi_scan_leb *seb,
1644 + int type) { return; }
1646 +ubi_dbg_dump_mkvol_req(const struct ubi_mkvol_req *req) { return; }
1647 +static inline void ubi_dbg_dump_flash(struct ubi_device *ubi,
1648 + int pnum, int offset, int len) { return; }
1650 +ubi_dbg_print_hex_dump(const char *l, const char *ps, int pt, int r,
1651 + int g, const void *b, size_t len, bool a) { return; }
1653 +static inline int ubi_dbg_is_bgt_disabled(void) { return 0; }
1654 +static inline int ubi_dbg_is_bitflip(void) { return 0; }
1655 +static inline int ubi_dbg_is_write_failure(void) { return 0; }
1656 +static inline int ubi_dbg_is_erase_failure(void) { return 0; }
1657 +static inline int ubi_dbg_check_all_ff(struct ubi_device *ubi,
1658 + int pnum, int offset,
1659 + int len) { return 0; }
1660 +static inline int ubi_dbg_check_write(struct ubi_device *ubi,
1661 + const void *buf, int pnum,
1662 + int offset, int len) { return 0; }
1664 #endif /* !CONFIG_MTD_UBI_DEBUG */
1665 #endif /* !__UBI_DEBUG_H__ */
1666 diff -uprN linux-2.6.28/drivers/mtd/ubi/eba.c ubifs-v2.6.28/drivers/mtd/ubi/eba.c
1667 --- linux-2.6.28/drivers/mtd/ubi/eba.c 2011-06-15 15:12:27.000000000 -0400
1668 +++ ubifs-v2.6.28/drivers/mtd/ubi/eba.c 2011-06-15 14:22:07.000000000 -0400
1669 @@ -418,7 +418,8 @@ retry:
1670 * may try to recover data. FIXME: but this is
1673 - if (err == UBI_IO_BAD_VID_HDR) {
1674 + if (err == UBI_IO_BAD_HDR_EBADMSG ||
1675 + err == UBI_IO_BAD_HDR) {
1676 ubi_warn("corrupted VID header at PEB "
1677 "%d, LEB %d:%d", pnum, vol_id,
1679 @@ -718,7 +719,7 @@ write_error:
1680 * to the real data size, although the @buf buffer has to contain the
1681 * alignment. In all other cases, @len has to be aligned.
1683 - * It is prohibited to write more then once to logical eraseblocks of static
1684 + * It is prohibited to write more than once to logical eraseblocks of static
1685 * volumes. This function returns zero in case of success and a negative error
1686 * code in case of failure.
1688 @@ -961,8 +962,8 @@ write_error:
1690 static int is_error_sane(int err)
1692 - if (err == -EIO || err == -ENOMEM || err == UBI_IO_BAD_VID_HDR ||
1693 - err == -ETIMEDOUT)
1694 + if (err == -EIO || err == -ENOMEM || err == UBI_IO_BAD_HDR ||
1695 + err == UBI_IO_BAD_HDR_EBADMSG || err == -ETIMEDOUT)
1699 @@ -1165,6 +1166,47 @@ out_unlock_leb:
1703 + * print_rsvd_warning - warn about not having enough reserved PEBs.
1704 + * @ubi: UBI device description object
1706 + * This is a helper function for 'ubi_eba_init_scan()' which is called when UBI
1707 + * cannot reserve enough PEBs for bad block handling. This function makes a
1708 + * decision whether we have to print a warning or not. The algorithm is as
1710 + * o if this is a new UBI image, then just print the warning
1711 + * o if this is an UBI image which has already been used for some time, print
1712 + * a warning only if we can reserve less than 10% of the expected amount of
1713 + * the reserved PEB.
1715 + * The idea is that when UBI is used, PEBs become bad, and the reserved pool
1716 + * of PEBs becomes smaller, which is normal and we do not want to scare users
1717 + * with a warning every time they attach the MTD device. This was an issue
1718 + * reported by real users.
1720 +static void print_rsvd_warning(struct ubi_device *ubi,
1721 + struct ubi_scan_info *si)
1724 + * The 1 << 18 (256KiB) number is picked randomly, just a reasonably
1725 + * large number to distinguish between newly flashed and used images.
1727 + if (si->max_sqnum > (1 << 18)) {
1728 + int min = ubi->beb_rsvd_level / 10;
1732 + if (ubi->beb_rsvd_pebs > min)
1736 + ubi_warn("cannot reserve enough PEBs for bad PEB handling, reserved %d,"
1737 + " need %d", ubi->beb_rsvd_pebs, ubi->beb_rsvd_level);
1738 + if (ubi->corr_peb_count)
1739 + ubi_warn("%d PEBs are corrupted and not used",
1740 + ubi->corr_peb_count);
1744 * ubi_eba_init_scan - initialize the EBA sub-system using scanning information.
1745 * @ubi: UBI device description object
1746 * @si: scanning information
1747 @@ -1224,6 +1266,9 @@ int ubi_eba_init_scan(struct ubi_device
1748 if (ubi->avail_pebs < EBA_RESERVED_PEBS) {
1749 ubi_err("no enough physical eraseblocks (%d, need %d)",
1750 ubi->avail_pebs, EBA_RESERVED_PEBS);
1751 + if (ubi->corr_peb_count)
1752 + ubi_err("%d PEBs are corrupted and not used",
1753 + ubi->corr_peb_count);
1757 @@ -1236,9 +1281,7 @@ int ubi_eba_init_scan(struct ubi_device
1758 if (ubi->avail_pebs < ubi->beb_rsvd_level) {
1759 /* No enough free physical eraseblocks */
1760 ubi->beb_rsvd_pebs = ubi->avail_pebs;
1761 - ubi_warn("cannot reserve enough PEBs for bad PEB "
1762 - "handling, reserved %d, need %d",
1763 - ubi->beb_rsvd_pebs, ubi->beb_rsvd_level);
1764 + print_rsvd_warning(ubi, si);
1766 ubi->beb_rsvd_pebs = ubi->beb_rsvd_level;
1768 @@ -1254,6 +1297,7 @@ out_free:
1769 if (!ubi->volumes[i])
1771 kfree(ubi->volumes[i]->eba_tbl);
1772 + ubi->volumes[i]->eba_tbl = NULL;
1776 diff -uprN linux-2.6.28/drivers/mtd/ubi/gluebi.c ubifs-v2.6.28/drivers/mtd/ubi/gluebi.c
1777 --- linux-2.6.28/drivers/mtd/ubi/gluebi.c 2008-12-24 18:26:37.000000000 -0500
1778 +++ ubifs-v2.6.28/drivers/mtd/ubi/gluebi.c 2011-06-15 14:22:07.000000000 -0400
1783 - * This file includes implementation of fake MTD devices for each UBI volume.
1784 - * This sounds strange, but it is in fact quite useful to make MTD-oriented
1785 - * software (including all the legacy software) to work on top of UBI.
1786 + * This is a small driver which implements fake MTD devices on top of UBI
1787 + * volumes. This sounds strange, but it is in fact quite useful to make
1788 + * MTD-oriented software (including all the legacy software) work on top of
1791 * Gluebi emulates MTD devices of "MTD_UBIVOLUME" type. Their minimal I/O unit
1792 - * size (mtd->writesize) is equivalent to the UBI minimal I/O unit. The
1793 + * size (@mtd->writesize) is equivalent to the UBI minimal I/O unit. The
1794 * eraseblock size is equivalent to the logical eraseblock size of the volume.
1797 +#include <linux/err.h>
1798 +#include <linux/list.h>
1799 +#include <linux/sched.h>
1800 +#include <linux/module.h>
1801 +#include <linux/mutex.h>
1802 +#include <linux/math64.h>
1803 +#include <linux/mtd/ubi.h>
1804 +#include <linux/mtd/mtd.h>
1805 #include <asm/div64.h>
1807 +#include "ubi-media.h"
1809 +#define err_msg(fmt, ...) \
1810 + printk(KERN_DEBUG "gluebi (pid %d): %s: " fmt "\n", \
1811 + current->pid, __func__, ##__VA_ARGS__)
1813 +static inline uint32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd)
1815 + do_div(sz, mtd->erasesize);
1820 + * struct gluebi_device - a gluebi device description data structure.
1821 + * @mtd: emulated MTD device description object
1822 + * @refcnt: gluebi device reference count
1823 + * @desc: UBI volume descriptor
1824 + * @ubi_num: UBI device number this gluebi device works on
1825 + * @vol_id: ID of UBI volume this gluebi device works on
1826 + * @list: link in a list of gluebi devices
1828 +struct gluebi_device {
1829 + struct mtd_info mtd;
1831 + struct ubi_volume_desc *desc;
1834 + struct list_head list;
1837 +/* List of all gluebi devices */
1838 +static LIST_HEAD(gluebi_devices);
1839 +static DEFINE_MUTEX(devices_mutex);
1842 + * find_gluebi_nolock - find a gluebi device.
1843 + * @ubi_num: UBI device number
1844 + * @vol_id: volume ID
1846 + * This function seraches for gluebi device corresponding to UBI device
1847 + * @ubi_num and UBI volume @vol_id. Returns the gluebi device description
1848 + * object in case of success and %NULL in case of failure. The caller has to
1849 + * have the &devices_mutex locked.
1851 +static struct gluebi_device *find_gluebi_nolock(int ubi_num, int vol_id)
1853 + struct gluebi_device *gluebi;
1855 + list_for_each_entry(gluebi, &gluebi_devices, list)
1856 + if (gluebi->ubi_num == ubi_num && gluebi->vol_id == vol_id)
1862 * gluebi_get_device - get MTD device reference.
1863 @@ -41,15 +102,18 @@
1865 static int gluebi_get_device(struct mtd_info *mtd)
1867 - struct ubi_volume *vol;
1868 + struct gluebi_device *gluebi;
1869 + int ubi_mode = UBI_READONLY;
1871 - vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
1872 + if (!try_module_get(THIS_MODULE))
1876 - * We do not introduce locks for gluebi reference count because the
1877 - * get_device()/put_device() calls are already serialized at MTD.
1879 - if (vol->gluebi_refcount > 0) {
1880 + if (mtd->flags & MTD_WRITEABLE)
1881 + ubi_mode = UBI_READWRITE;
1883 + gluebi = container_of(mtd, struct gluebi_device, mtd);
1884 + mutex_lock(&devices_mutex);
1885 + if (gluebi->refcnt > 0) {
1887 * The MTD device is already referenced and this is just one
1888 * more reference. MTD allows many users to open the same
1889 @@ -58,7 +122,8 @@ static int gluebi_get_device(struct mtd_
1890 * open the UBI volume again - just increase the reference
1891 * counter and return.
1893 - vol->gluebi_refcount += 1;
1894 + gluebi->refcnt += 1;
1895 + mutex_unlock(&devices_mutex);
1899 @@ -66,11 +131,15 @@ static int gluebi_get_device(struct mtd_
1900 * This is the first reference to this UBI volume via the MTD device
1901 * interface. Open the corresponding volume in read-write mode.
1903 - vol->gluebi_desc = ubi_open_volume(vol->ubi->ubi_num, vol->vol_id,
1905 - if (IS_ERR(vol->gluebi_desc))
1906 - return PTR_ERR(vol->gluebi_desc);
1907 - vol->gluebi_refcount += 1;
1908 + gluebi->desc = ubi_open_volume(gluebi->ubi_num, gluebi->vol_id,
1910 + if (IS_ERR(gluebi->desc)) {
1911 + mutex_unlock(&devices_mutex);
1912 + module_put(THIS_MODULE);
1913 + return PTR_ERR(gluebi->desc);
1915 + gluebi->refcnt += 1;
1916 + mutex_unlock(&devices_mutex);
1920 @@ -83,13 +152,15 @@ static int gluebi_get_device(struct mtd_
1922 static void gluebi_put_device(struct mtd_info *mtd)
1924 - struct ubi_volume *vol;
1925 + struct gluebi_device *gluebi;
1927 - vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
1928 - vol->gluebi_refcount -= 1;
1929 - ubi_assert(vol->gluebi_refcount >= 0);
1930 - if (vol->gluebi_refcount == 0)
1931 - ubi_close_volume(vol->gluebi_desc);
1932 + gluebi = container_of(mtd, struct gluebi_device, mtd);
1933 + mutex_lock(&devices_mutex);
1934 + gluebi->refcnt -= 1;
1935 + if (gluebi->refcnt == 0)
1936 + ubi_close_volume(gluebi->desc);
1937 + module_put(THIS_MODULE);
1938 + mutex_unlock(&devices_mutex);
1942 @@ -107,21 +178,14 @@ static int gluebi_read(struct mtd_info *
1943 size_t *retlen, unsigned char *buf)
1945 int err = 0, lnum, offs, total_read;
1946 - struct ubi_volume *vol;
1947 - struct ubi_device *ubi;
1948 - uint64_t tmp = from;
1950 - dbg_gen("read %zd bytes from offset %lld", len, from);
1951 + struct gluebi_device *gluebi;
1953 if (len < 0 || from < 0 || from + len > mtd->size)
1956 - vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
1959 - offs = do_div(tmp, mtd->erasesize);
1961 + gluebi = container_of(mtd, struct gluebi_device, mtd);
1963 + lnum = div_u64_rem(from, mtd->erasesize, &offs);
1965 while (total_read) {
1966 size_t to_read = mtd->erasesize - offs;
1967 @@ -129,7 +193,7 @@ static int gluebi_read(struct mtd_info *
1968 if (to_read > total_read)
1969 to_read = total_read;
1971 - err = ubi_eba_read_leb(ubi, vol, lnum, buf, offs, to_read, 0);
1972 + err = ubi_read(gluebi->desc, lnum, buf, offs, to_read);
1976 @@ -155,26 +219,20 @@ static int gluebi_read(struct mtd_info *
1979 static int gluebi_write(struct mtd_info *mtd, loff_t to, size_t len,
1980 - size_t *retlen, const u_char *buf)
1981 + size_t *retlen, const u_char *buf)
1983 int err = 0, lnum, offs, total_written;
1984 - struct ubi_volume *vol;
1985 - struct ubi_device *ubi;
1986 - uint64_t tmp = to;
1988 - dbg_gen("write %zd bytes to offset %lld", len, to);
1989 + struct gluebi_device *gluebi;
1991 if (len < 0 || to < 0 || len + to > mtd->size)
1994 - vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
1996 + gluebi = container_of(mtd, struct gluebi_device, mtd);
1999 + if (!(mtd->flags & MTD_WRITEABLE))
2002 - offs = do_div(tmp, mtd->erasesize);
2004 + lnum = div_u64_rem(to, mtd->erasesize, &offs);
2006 if (len % mtd->writesize || offs % mtd->writesize)
2008 @@ -186,8 +244,7 @@ static int gluebi_write(struct mtd_info
2009 if (to_write > total_written)
2010 to_write = total_written;
2012 - err = ubi_eba_write_leb(ubi, vol, lnum, buf, offs, to_write,
2014 + err = ubi_write(gluebi->desc, lnum, buf, offs, to_write);
2018 @@ -212,40 +269,36 @@ static int gluebi_write(struct mtd_info
2019 static int gluebi_erase(struct mtd_info *mtd, struct erase_info *instr)
2021 int err, i, lnum, count;
2022 - struct ubi_volume *vol;
2023 - struct ubi_device *ubi;
2025 - dbg_gen("erase %u bytes at offset %u", instr->len, instr->addr);
2026 + struct gluebi_device *gluebi;
2028 if (instr->addr < 0 || instr->addr > mtd->size - mtd->erasesize)
2031 if (instr->len < 0 || instr->addr + instr->len > mtd->size)
2034 if (instr->addr % mtd->writesize || instr->len % mtd->writesize)
2037 - lnum = instr->addr / mtd->erasesize;
2038 - count = instr->len / mtd->erasesize;
2039 + lnum = mtd_div_by_eb(instr->addr, mtd);
2040 + count = mtd_div_by_eb(instr->len, mtd);
2042 - vol = container_of(mtd, struct ubi_volume, gluebi_mtd);
2044 + gluebi = container_of(mtd, struct gluebi_device, mtd);
2047 + if (!(mtd->flags & MTD_WRITEABLE))
2050 - for (i = 0; i < count; i++) {
2051 - err = ubi_eba_unmap_leb(ubi, vol, lnum + i);
2052 + for (i = 0; i < count - 1; i++) {
2053 + err = ubi_leb_unmap(gluebi->desc, lnum + i);
2059 * MTD erase operations are synchronous, so we have to make sure the
2060 * physical eraseblock is wiped out.
2062 + * Thus, perform leb_erase instead of leb_unmap operation - leb_erase
2063 + * will wait for the end of operations
2065 - err = ubi_wl_flush(ubi);
2066 + err = ubi_leb_erase(gluebi->desc, lnum + i);
2070 @@ -255,33 +308,44 @@ static int gluebi_erase(struct mtd_info
2073 instr->state = MTD_ERASE_FAILED;
2074 - instr->fail_addr = lnum * mtd->erasesize;
2075 + instr->fail_addr = (long long)lnum * mtd->erasesize;
2080 - * ubi_create_gluebi - initialize gluebi for an UBI volume.
2081 - * @ubi: UBI device description object
2082 - * @vol: volume description object
2083 + * gluebi_create - create a gluebi device for an UBI volume.
2084 + * @di: UBI device description object
2085 + * @vi: UBI volume description object
2087 - * This function is called when an UBI volume is created in order to create
2088 + * This function is called when a new UBI volume is created in order to create
2089 * corresponding fake MTD device. Returns zero in case of success and a
2090 * negative error code in case of failure.
2092 -int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol)
2093 +static int gluebi_create(struct ubi_device_info *di,
2094 + struct ubi_volume_info *vi)
2096 - struct mtd_info *mtd = &vol->gluebi_mtd;
2097 + struct gluebi_device *gluebi, *g;
2098 + struct mtd_info *mtd;
2100 - mtd->name = kmemdup(vol->name, vol->name_len + 1, GFP_KERNEL);
2102 + gluebi = kzalloc(sizeof(struct gluebi_device), GFP_KERNEL);
2106 + mtd = &gluebi->mtd;
2107 + mtd->name = kmemdup(vi->name, vi->name_len + 1, GFP_KERNEL);
2113 + gluebi->vol_id = vi->vol_id;
2114 + gluebi->ubi_num = vi->ubi_num;
2115 mtd->type = MTD_UBIVOLUME;
2116 - if (!ubi->ro_mode)
2118 mtd->flags = MTD_WRITEABLE;
2119 - mtd->writesize = ubi->min_io_size;
2120 mtd->owner = THIS_MODULE;
2121 - mtd->erasesize = vol->usable_leb_size;
2122 + mtd->writesize = di->min_io_size;
2123 + mtd->erasesize = vi->usable_leb_size;
2124 mtd->read = gluebi_read;
2125 mtd->write = gluebi_write;
2126 mtd->erase = gluebi_erase;
2127 @@ -289,60 +353,196 @@ int ubi_create_gluebi(struct ubi_device
2128 mtd->put_device = gluebi_put_device;
2131 - * In case of dynamic volume, MTD device size is just volume size. In
2132 + * In case of dynamic a volume, MTD device size is just volume size. In
2133 * case of a static volume the size is equivalent to the amount of data
2136 - if (vol->vol_type == UBI_DYNAMIC_VOLUME)
2137 - mtd->size = vol->usable_leb_size * vol->reserved_pebs;
2138 + if (vi->vol_type == UBI_DYNAMIC_VOLUME)
2139 + mtd->size = (unsigned long long)vi->usable_leb_size * vi->size;
2141 - mtd->size = vol->used_bytes;
2142 + mtd->size = vi->used_bytes;
2144 + /* Just a sanity check - make sure this gluebi device does not exist */
2145 + mutex_lock(&devices_mutex);
2146 + g = find_gluebi_nolock(vi->ubi_num, vi->vol_id);
2148 + err_msg("gluebi MTD device %d form UBI device %d volume %d "
2149 + "already exists", g->mtd.index, vi->ubi_num,
2151 + mutex_unlock(&devices_mutex);
2153 if (add_mtd_device(mtd)) {
2154 - ubi_err("cannot not add MTD device");
2155 + err_msg("cannot add MTD device");
2161 - dbg_gen("added mtd%d (\"%s\"), size %u, EB size %u",
2162 - mtd->index, mtd->name, mtd->size, mtd->erasesize);
2163 + mutex_lock(&devices_mutex);
2164 + list_add_tail(&gluebi->list, &gluebi_devices);
2165 + mutex_unlock(&devices_mutex);
2170 - * ubi_destroy_gluebi - close gluebi for an UBI volume.
2171 - * @vol: volume description object
2172 + * gluebi_remove - remove a gluebi device.
2173 + * @vi: UBI volume description object
2175 - * This function is called when an UBI volume is removed in order to remove
2176 + * This function is called when an UBI volume is removed and it removes
2177 * corresponding fake MTD device. Returns zero in case of success and a
2178 * negative error code in case of failure.
2180 -int ubi_destroy_gluebi(struct ubi_volume *vol)
2181 +static int gluebi_remove(struct ubi_volume_info *vi)
2184 - struct mtd_info *mtd = &vol->gluebi_mtd;
2186 + struct mtd_info *mtd;
2187 + struct gluebi_device *gluebi;
2189 + mutex_lock(&devices_mutex);
2190 + gluebi = find_gluebi_nolock(vi->ubi_num, vi->vol_id);
2192 + err_msg("got remove notification for unknown UBI device %d "
2193 + "volume %d", vi->ubi_num, vi->vol_id);
2195 + } else if (gluebi->refcnt)
2198 + list_del(&gluebi->list);
2199 + mutex_unlock(&devices_mutex);
2203 - dbg_gen("remove mtd%d", mtd->index);
2204 + mtd = &gluebi->mtd;
2205 err = del_mtd_device(mtd);
2208 + err_msg("cannot remove fake MTD device %d, UBI device %d, "
2209 + "volume %d, error %d", mtd->index, gluebi->ubi_num,
2210 + gluebi->vol_id, err);
2211 + mutex_lock(&devices_mutex);
2212 + list_add_tail(&gluebi->list, &gluebi_devices);
2213 + mutex_unlock(&devices_mutex);
2223 - * ubi_gluebi_updated - UBI volume was updated notifier.
2224 - * @vol: volume description object
2225 + * gluebi_updated - UBI volume was updated notifier.
2226 + * @vi: volume info structure
2228 - * This function is called every time an UBI volume is updated. This function
2229 - * does nothing if volume @vol is dynamic, and changes MTD device size if the
2230 + * This function is called every time an UBI volume is updated. It does nothing
2231 + * if te volume @vol is dynamic, and changes MTD device size if the
2232 * volume is static. This is needed because static volumes cannot be read past
2233 - * data they contain.
2234 + * data they contain. This function returns zero in case of success and a
2235 + * negative error code in case of error.
2237 -void ubi_gluebi_updated(struct ubi_volume *vol)
2238 +static int gluebi_updated(struct ubi_volume_info *vi)
2240 - struct mtd_info *mtd = &vol->gluebi_mtd;
2241 + struct gluebi_device *gluebi;
2243 + mutex_lock(&devices_mutex);
2244 + gluebi = find_gluebi_nolock(vi->ubi_num, vi->vol_id);
2246 + mutex_unlock(&devices_mutex);
2247 + err_msg("got update notification for unknown UBI device %d "
2248 + "volume %d", vi->ubi_num, vi->vol_id);
2252 - if (vol->vol_type == UBI_STATIC_VOLUME)
2253 - mtd->size = vol->used_bytes;
2254 + if (vi->vol_type == UBI_STATIC_VOLUME)
2255 + gluebi->mtd.size = vi->used_bytes;
2256 + mutex_unlock(&devices_mutex);
2261 + * gluebi_resized - UBI volume was re-sized notifier.
2262 + * @vi: volume info structure
2264 + * This function is called every time an UBI volume is re-size. It changes the
2265 + * corresponding fake MTD device size. This function returns zero in case of
2266 + * success and a negative error code in case of error.
2268 +static int gluebi_resized(struct ubi_volume_info *vi)
2270 + struct gluebi_device *gluebi;
2272 + mutex_lock(&devices_mutex);
2273 + gluebi = find_gluebi_nolock(vi->ubi_num, vi->vol_id);
2275 + mutex_unlock(&devices_mutex);
2276 + err_msg("got update notification for unknown UBI device %d "
2277 + "volume %d", vi->ubi_num, vi->vol_id);
2280 + gluebi->mtd.size = vi->used_bytes;
2281 + mutex_unlock(&devices_mutex);
2286 + * gluebi_notify - UBI notification handler.
2287 + * @nb: registered notifier block
2288 + * @l: notification type
2289 + * @ptr: pointer to the &struct ubi_notification object
2291 +static int gluebi_notify(struct notifier_block *nb, unsigned long l,
2294 + struct ubi_notification *nt = ns_ptr;
2297 + case UBI_VOLUME_ADDED:
2298 + gluebi_create(&nt->di, &nt->vi);
2300 + case UBI_VOLUME_REMOVED:
2301 + gluebi_remove(&nt->vi);
2303 + case UBI_VOLUME_RESIZED:
2304 + gluebi_resized(&nt->vi);
2306 + case UBI_VOLUME_UPDATED:
2307 + gluebi_updated(&nt->vi);
2315 +static struct notifier_block gluebi_notifier = {
2316 + .notifier_call = gluebi_notify,
2319 +static int __init ubi_gluebi_init(void)
2321 + return ubi_register_volume_notifier(&gluebi_notifier, 0);
2324 +static void __exit ubi_gluebi_exit(void)
2326 + struct gluebi_device *gluebi, *g;
2328 + list_for_each_entry_safe(gluebi, g, &gluebi_devices, list) {
2330 + struct mtd_info *mtd = &gluebi->mtd;
2332 + err = del_mtd_device(mtd);
2334 + err_msg("error %d while removing gluebi MTD device %d, "
2335 + "UBI device %d, volume %d - ignoring", err,
2336 + mtd->index, gluebi->ubi_num, gluebi->vol_id);
2340 + ubi_unregister_volume_notifier(&gluebi_notifier);
2343 +module_init(ubi_gluebi_init);
2344 +module_exit(ubi_gluebi_exit);
2345 +MODULE_DESCRIPTION("MTD emulation layer over UBI volumes");
2346 +MODULE_AUTHOR("Artem Bityutskiy, Joern Engel");
2347 +MODULE_LICENSE("GPL");
2348 diff -uprN linux-2.6.28/drivers/mtd/ubi/io.c ubifs-v2.6.28/drivers/mtd/ubi/io.c
2349 --- linux-2.6.28/drivers/mtd/ubi/io.c 2011-06-15 15:12:27.000000000 -0400
2350 +++ ubifs-v2.6.28/drivers/mtd/ubi/io.c 2011-06-15 14:22:07.000000000 -0400
2352 * device, e.g., make @ubi->min_io_size = 512 in the example above?
2354 * A: because when writing a sub-page, MTD still writes a full 2K page but the
2355 - * bytes which are no relevant to the sub-page are 0xFF. So, basically, writing
2356 - * 4x512 sub-pages is 4 times slower then writing one 2KiB NAND page. Thus, we
2357 - * prefer to use sub-pages only for EV and VID headers.
2358 + * bytes which are not relevant to the sub-page are 0xFF. So, basically,
2359 + * writing 4x512 sub-pages is 4 times slower than writing one 2KiB NAND page.
2360 + * Thus, we prefer to use sub-pages only for EC and VID headers.
2362 * As it was noted above, the VID header may start at a non-aligned offset.
2363 * For example, in case of a 2KiB page NAND flash with a 512 bytes sub-page,
2365 #include <linux/err.h>
2368 -#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
2369 +#ifdef CONFIG_MTD_UBI_DEBUG
2370 static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum);
2371 static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum);
2372 static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum,
2373 @@ -98,15 +98,12 @@ static int paranoid_check_ec_hdr(const s
2374 static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum);
2375 static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum,
2376 const struct ubi_vid_hdr *vid_hdr);
2377 -static int paranoid_check_all_ff(struct ubi_device *ubi, int pnum, int offset,
2380 #define paranoid_check_not_bad(ubi, pnum) 0
2381 #define paranoid_check_peb_ec_hdr(ubi, pnum) 0
2382 #define paranoid_check_ec_hdr(ubi, pnum, ec_hdr) 0
2383 #define paranoid_check_peb_vid_hdr(ubi, pnum) 0
2384 #define paranoid_check_vid_hdr(ubi, pnum, vid_hdr) 0
2385 -#define paranoid_check_all_ff(ubi, pnum, offset, len) 0
2389 @@ -146,12 +143,36 @@ int ubi_io_read(const struct ubi_device
2391 err = paranoid_check_not_bad(ubi, pnum);
2393 - return err > 0 ? -EINVAL : err;
2397 + * Deliberately corrupt the buffer to improve robustness. Indeed, if we
2398 + * do not do this, the following may happen:
2399 + * 1. The buffer contains data from previous operation, e.g., read from
2400 + * another PEB previously. The data looks like expected, e.g., if we
2401 + * just do not read anything and return - the caller would not
2402 + * notice this. E.g., if we are reading a VID header, the buffer may
2403 + * contain a valid VID header from another PEB.
2404 + * 2. The driver is buggy and returns us success or -EBADMSG or
2405 + * -EUCLEAN, but it does not actually put any data to the buffer.
2407 + * This may confuse UBI or upper layers - they may think the buffer
2408 + * contains valid data while in fact it is just old data. This is
2409 + * especially possible because UBI (and UBIFS) relies on CRC, and
2410 + * treats data as correct even in case of ECC errors if the CRC is
2413 + * Try to prevent this situation by changing the first byte of the
2416 + *((uint8_t *)buf) ^= 0xFF;
2418 addr = (loff_t)pnum * ubi->peb_size + offset;
2420 err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf);
2422 + const char *errstr = (err == -EBADMSG) ? " (ECC error)" : "";
2424 if (err == -EUCLEAN) {
2426 * -EUCLEAN is reported if there was a bit-flip which
2427 @@ -166,16 +187,16 @@ retry:
2428 return UBI_IO_BITFLIPS;
2431 - if (read != len && retries++ < UBI_IO_RETRIES) {
2432 - dbg_io("error %d while reading %d bytes from PEB %d:%d,"
2433 - " read only %zd bytes, retry",
2434 - err, len, pnum, offset, read);
2435 + if (retries++ < UBI_IO_RETRIES) {
2436 + dbg_io("error %d%s while reading %d bytes from PEB "
2437 + "%d:%d, read only %zd bytes, retry",
2438 + err, errstr, len, pnum, offset, read);
2443 - ubi_err("error %d while reading %d bytes from PEB %d:%d, "
2444 - "read %zd bytes", err, len, pnum, offset, read);
2445 + ubi_err("error %d%s while reading %d bytes from PEB %d:%d, "
2446 + "read %zd bytes", err, errstr, len, pnum, offset, read);
2447 ubi_dbg_dump_stack();
2450 @@ -239,12 +260,12 @@ int ubi_io_write(struct ubi_device *ubi,
2452 err = paranoid_check_not_bad(ubi, pnum);
2454 - return err > 0 ? -EINVAL : err;
2457 /* The area we are writing to has to contain all 0xFF bytes */
2458 - err = paranoid_check_all_ff(ubi, pnum, offset, len);
2459 + err = ubi_dbg_check_all_ff(ubi, pnum, offset, len);
2461 - return err > 0 ? -EINVAL : err;
2464 if (offset >= ubi->leb_start) {
2466 @@ -253,10 +274,10 @@ int ubi_io_write(struct ubi_device *ubi,
2468 err = paranoid_check_peb_ec_hdr(ubi, pnum);
2470 - return err > 0 ? -EINVAL : err;
2472 err = paranoid_check_peb_vid_hdr(ubi, pnum);
2474 - return err > 0 ? -EINVAL : err;
2478 if (ubi_dbg_is_write_failure()) {
2479 @@ -269,12 +290,28 @@ int ubi_io_write(struct ubi_device *ubi,
2480 addr = (loff_t)pnum * ubi->peb_size + offset;
2481 err = ubi->mtd->write(ubi->mtd, addr, len, &written, buf);
2483 - ubi_err("error %d while writing %d bytes to PEB %d:%d, written"
2484 - " %zd bytes", err, len, pnum, offset, written);
2485 + ubi_err("error %d while writing %d bytes to PEB %d:%d, written "
2486 + "%zd bytes", err, len, pnum, offset, written);
2487 ubi_dbg_dump_stack();
2488 + ubi_dbg_dump_flash(ubi, pnum, offset, len);
2490 ubi_assert(written == len);
2493 + err = ubi_dbg_check_write(ubi, buf, pnum, offset, len);
2498 + * Since we always write sequentially, the rest of the PEB has
2499 + * to contain only 0xFF bytes.
2502 + len = ubi->peb_size - offset;
2504 + err = ubi_dbg_check_all_ff(ubi, pnum, offset, len);
2510 @@ -306,6 +343,12 @@ static int do_sync_erase(struct ubi_devi
2511 wait_queue_head_t wq;
2513 dbg_io("erase PEB %d", pnum);
2514 + ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
2516 + if (ubi->ro_mode) {
2517 + ubi_err("read-only mode");
2522 init_waitqueue_head(&wq);
2523 @@ -348,11 +391,11 @@ retry:
2527 - err = paranoid_check_all_ff(ubi, pnum, 0, ubi->peb_size);
2528 + err = ubi_dbg_check_all_ff(ubi, pnum, 0, ubi->peb_size);
2530 - return err > 0 ? -EINVAL : err;
2533 - if (ubi_dbg_is_erase_failure() && !err) {
2534 + if (ubi_dbg_is_erase_failure()) {
2535 dbg_err("cannot erase PEB %d (emulated)", pnum);
2538 @@ -360,25 +403,6 @@ retry:
2543 - * check_pattern - check if buffer contains only a certain byte pattern.
2544 - * @buf: buffer to check
2545 - * @patt: the pattern to check
2546 - * @size: buffer size in bytes
2548 - * This function returns %1 in there are only @patt bytes in @buf, and %0 if
2549 - * something else was also found.
2551 -static int check_pattern(const void *buf, uint8_t patt, int size)
2555 - for (i = 0; i < size; i++)
2556 - if (((const uint8_t *)buf)[i] != patt)
2561 /* Patterns to write to a physical eraseblock when torturing it */
2562 static uint8_t patterns[] = {0xa5, 0x5a, 0x0};
2564 @@ -410,7 +434,7 @@ static int torture_peb(struct ubi_device
2568 - err = check_pattern(ubi->peb_buf1, 0xFF, ubi->peb_size);
2569 + err = ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->peb_size);
2571 ubi_err("erased PEB %d, but a non-0xFF byte found",
2573 @@ -429,7 +453,8 @@ static int torture_peb(struct ubi_device
2577 - err = check_pattern(ubi->peb_buf1, patterns[i], ubi->peb_size);
2578 + err = ubi_check_pattern(ubi->peb_buf1, patterns[i],
2581 ubi_err("pattern %x checking failed for PEB %d",
2583 @@ -439,7 +464,7 @@ static int torture_peb(struct ubi_device
2587 - ubi_msg("PEB %d passed torture test, do not mark it a bad", pnum);
2588 + ubi_msg("PEB %d passed torture test, do not mark it as bad", pnum);
2591 mutex_unlock(&ubi->buf_mutex);
2592 @@ -457,6 +482,92 @@ out:
2596 + * nor_erase_prepare - prepare a NOR flash PEB for erasure.
2597 + * @ubi: UBI device description object
2598 + * @pnum: physical eraseblock number to prepare
2600 + * NOR flash, or at least some of them, have peculiar embedded PEB erasure
2601 + * algorithm: the PEB is first filled with zeroes, then it is erased. And
2602 + * filling with zeroes starts from the end of the PEB. This was observed with
2603 + * Spansion S29GL512N NOR flash.
2605 + * This means that in case of a power cut we may end up with intact data at the
2606 + * beginning of the PEB, and all zeroes at the end of PEB. In other words, the
2607 + * EC and VID headers are OK, but a large chunk of data at the end of PEB is
2608 + * zeroed. This makes UBI mistakenly treat this PEB as used and associate it
2609 + * with an LEB, which leads to subsequent failures (e.g., UBIFS fails).
2611 + * This function is called before erasing NOR PEBs and it zeroes out EC and VID
2612 + * magic numbers in order to invalidate them and prevent the failures. Returns
2613 + * zero in case of success and a negative error code in case of failure.
2615 +static int nor_erase_prepare(struct ubi_device *ubi, int pnum)
2620 + uint32_t data = 0;
2622 + * Note, we cannot generally define VID header buffers on stack,
2623 + * because of the way we deal with these buffers (see the header
2624 + * comment in this file). But we know this is a NOR-specific piece of
2625 + * code, so we can do this. But yes, this is error-prone and we should
2626 + * (pre-)allocate VID header buffer instead.
2628 + struct ubi_vid_hdr vid_hdr;
2631 + * It is important to first invalidate the EC header, and then the VID
2632 + * header. Otherwise a power cut may lead to valid EC header and
2633 + * invalid VID header, in which case UBI will treat this PEB as
2634 + * corrupted and will try to preserve it, and print scary warnings (see
2635 + * the header comment in scan.c for more information).
2637 + addr = (loff_t)pnum * ubi->peb_size;
2638 + err = ubi->mtd->write(ubi->mtd, addr, 4, &written, (void *)&data);
2640 + addr += ubi->vid_hdr_aloffset;
2641 + err = ubi->mtd->write(ubi->mtd, addr, 4, &written,
2648 + * We failed to write to the media. This was observed with Spansion
2649 + * S29GL512N NOR flash. Most probably the previously eraseblock erasure
2650 + * was interrupted at a very inappropriate moment, so it became
2651 + * unwritable. In this case we probably anyway have garbage in this
2654 + err1 = ubi_io_read_vid_hdr(ubi, pnum, &vid_hdr, 0);
2655 + if (err1 == UBI_IO_BAD_HDR_EBADMSG || err1 == UBI_IO_BAD_HDR ||
2656 + err1 == UBI_IO_FF) {
2657 + struct ubi_ec_hdr ec_hdr;
2659 + err1 = ubi_io_read_ec_hdr(ubi, pnum, &ec_hdr, 0);
2660 + if (err1 == UBI_IO_BAD_HDR_EBADMSG || err1 == UBI_IO_BAD_HDR ||
2661 + err1 == UBI_IO_FF)
2663 + * Both VID and EC headers are corrupted, so we can
2664 + * safely erase this PEB and not afraid that it will be
2665 + * treated as a valid PEB in case of an unclean reboot.
2671 + * The PEB contains a valid VID header, but we cannot invalidate it.
2672 + * Supposedly the flash media or the driver is screwed up, so return an
2675 + ubi_err("cannot invalidate PEB %d, write returned %d read returned %d",
2677 + ubi_dbg_dump_flash(ubi, pnum, 0, ubi->peb_size);
2682 * ubi_io_sync_erase - synchronously erase a physical eraseblock.
2683 * @ubi: UBI device description object
2684 * @pnum: physical eraseblock number to erase
2685 @@ -465,7 +576,7 @@ out:
2686 * This function synchronously erases physical eraseblock @pnum. If @torture
2687 * flag is not zero, the physical eraseblock is checked by means of writing
2688 * different patterns to it and reading them back. If the torturing is enabled,
2689 - * the physical eraseblock is erased more then once.
2690 + * the physical eraseblock is erased more than once.
2692 * This function returns the number of erasures made in case of success, %-EIO
2693 * if the erasure failed or the torturing test failed, and other negative error
2694 @@ -480,13 +591,19 @@ int ubi_io_sync_erase(struct ubi_device
2696 err = paranoid_check_not_bad(ubi, pnum);
2698 - return err > 0 ? -EINVAL : err;
2702 ubi_err("read-only mode");
2706 + if (ubi->nor_flash) {
2707 + err = nor_erase_prepare(ubi, pnum);
2713 ret = torture_peb(ubi, pnum);
2715 @@ -566,16 +683,15 @@ int ubi_io_mark_bad(const struct ubi_dev
2716 * This function returns zero if the erase counter header is OK, and %1 if
2719 -static int validate_ec_hdr(struct ubi_device *ubi,
2720 +static int validate_ec_hdr(const struct ubi_device *ubi,
2721 const struct ubi_ec_hdr *ec_hdr)
2724 - int vid_hdr_offset, leb_start, image_seq;
2725 + int vid_hdr_offset, leb_start;
2727 ec = be64_to_cpu(ec_hdr->ec);
2728 vid_hdr_offset = be32_to_cpu(ec_hdr->vid_hdr_offset);
2729 leb_start = be32_to_cpu(ec_hdr->data_offset);
2730 - image_seq = be32_to_cpu(ec_hdr->image_seq);
2732 if (ec_hdr->version != UBI_VERSION) {
2733 ubi_err("node with incompatible UBI version found: "
2734 @@ -601,15 +717,6 @@ static int validate_ec_hdr(struct ubi_de
2738 - if (!ubi->image_seq_set) {
2739 - ubi->image_seq = image_seq;
2740 - ubi->image_seq_set = 1;
2741 - } else if (ubi->image_seq && image_seq && ubi->image_seq != image_seq) {
2742 - ubi_err("bad image sequence number %d, expected %d",
2743 - image_seq, ubi->image_seq);
2750 @@ -635,68 +742,58 @@ bad:
2751 * o %UBI_IO_BITFLIPS if the CRC is correct, but bit-flips were detected
2752 * and corrected by the flash driver; this is harmless but may indicate that
2753 * this eraseblock may become bad soon (but may be not);
2754 - * o %UBI_IO_BAD_EC_HDR if the erase counter header is corrupted (a CRC error);
2755 - * o %UBI_IO_PEB_EMPTY if the physical eraseblock is empty;
2756 + * o %UBI_IO_BAD_HDR if the erase counter header is corrupted (a CRC error);
2757 + * o %UBI_IO_BAD_HDR_EBADMSG is the same as %UBI_IO_BAD_HDR, but there also was
2758 + * a data integrity error (uncorrectable ECC error in case of NAND);
2759 + * o %UBI_IO_FF if only 0xFF bytes were read (the PEB is supposedly empty)
2760 * o a negative error code in case of failure.
2762 int ubi_io_read_ec_hdr(struct ubi_device *ubi, int pnum,
2763 struct ubi_ec_hdr *ec_hdr, int verbose)
2765 - int err, read_err = 0;
2766 + int err, read_err;
2767 uint32_t crc, magic, hdr_crc;
2769 dbg_io("read EC header from PEB %d", pnum);
2770 ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
2772 - err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE);
2774 - if (err != UBI_IO_BITFLIPS && err != -EBADMSG)
2776 + read_err = ubi_io_read(ubi, ec_hdr, pnum, 0, UBI_EC_HDR_SIZE);
2778 + if (read_err != UBI_IO_BITFLIPS && read_err != -EBADMSG)
2782 * We read all the data, but either a correctable bit-flip
2783 - * occurred, or MTD reported about some data integrity error,
2784 - * like an ECC error in case of NAND. The former is harmless,
2785 - * the later may mean that the read data is corrupted. But we
2786 - * have a CRC check-sum and we will detect this. If the EC
2787 - * header is still OK, we just report this as there was a
2789 + * occurred, or MTD reported a data integrity error
2790 + * (uncorrectable ECC error in case of NAND). The former is
2791 + * harmless, the later may mean that the read data is
2792 + * corrupted. But we have a CRC check-sum and we will detect
2793 + * this. If the EC header is still OK, we just report this as
2794 + * there was a bit-flip, to force scrubbing.
2799 magic = be32_to_cpu(ec_hdr->magic);
2800 if (magic != UBI_EC_HDR_MAGIC) {
2801 + if (read_err == -EBADMSG)
2802 + return UBI_IO_BAD_HDR_EBADMSG;
2805 * The magic field is wrong. Let's check if we have read all
2806 * 0xFF. If yes, this physical eraseblock is assumed to be
2809 - * But if there was a read error, we do not test it for all
2810 - * 0xFFs. Even if it does contain all 0xFFs, this error
2811 - * indicates that something is still wrong with this physical
2812 - * eraseblock and we anyway cannot treat it as empty.
2814 - if (read_err != -EBADMSG &&
2815 - check_pattern(ec_hdr, 0xFF, UBI_EC_HDR_SIZE)) {
2816 + if (ubi_check_pattern(ec_hdr, 0xFF, UBI_EC_HDR_SIZE)) {
2817 /* The physical eraseblock is supposedly empty */
2820 - * The below is just a paranoid check, it has to be
2821 - * compiled out if paranoid checks are disabled.
2823 - err = paranoid_check_all_ff(ubi, pnum, 0,
2826 - return err > 0 ? UBI_IO_BAD_EC_HDR : err;
2829 ubi_warn("no EC header found at PEB %d, "
2830 "only 0xFF bytes", pnum);
2831 - else if (UBI_IO_DEBUG)
2832 - dbg_msg("no EC header found at PEB %d, "
2833 - "only 0xFF bytes", pnum);
2834 - return UBI_IO_PEB_EMPTY;
2835 + dbg_bld("no EC header found at PEB %d, "
2836 + "only 0xFF bytes", pnum);
2840 + return UBI_IO_FF_BITFLIPS;
2844 @@ -707,10 +804,10 @@ int ubi_io_read_ec_hdr(struct ubi_device
2845 ubi_warn("bad magic number at PEB %d: %08x instead of "
2846 "%08x", pnum, magic, UBI_EC_HDR_MAGIC);
2847 ubi_dbg_dump_ec_hdr(ec_hdr);
2848 - } else if (UBI_IO_DEBUG)
2849 - dbg_msg("bad magic number at PEB %d: %08x instead of "
2850 - "%08x", pnum, magic, UBI_EC_HDR_MAGIC);
2851 - return UBI_IO_BAD_EC_HDR;
2853 + dbg_bld("bad magic number at PEB %d: %08x instead of "
2854 + "%08x", pnum, magic, UBI_EC_HDR_MAGIC);
2855 + return UBI_IO_BAD_HDR;
2858 crc = crc32(UBI_CRC32_INIT, ec_hdr, UBI_EC_HDR_SIZE_CRC);
2859 @@ -721,10 +818,14 @@ int ubi_io_read_ec_hdr(struct ubi_device
2860 ubi_warn("bad EC header CRC at PEB %d, calculated "
2861 "%#08x, read %#08x", pnum, crc, hdr_crc);
2862 ubi_dbg_dump_ec_hdr(ec_hdr);
2863 - } else if (UBI_IO_DEBUG)
2864 - dbg_msg("bad EC header CRC at PEB %d, calculated "
2865 - "%#08x, read %#08x", pnum, crc, hdr_crc);
2866 - return UBI_IO_BAD_EC_HDR;
2868 + dbg_bld("bad EC header CRC at PEB %d, calculated "
2869 + "%#08x, read %#08x", pnum, crc, hdr_crc);
2872 + return UBI_IO_BAD_HDR;
2874 + return UBI_IO_BAD_HDR_EBADMSG;
2877 /* And of course validate what has just been read from the media */
2878 @@ -734,6 +835,10 @@ int ubi_io_read_ec_hdr(struct ubi_device
2883 + * If there was %-EBADMSG, but the header CRC is still OK, report about
2884 + * a bit-flip to force scrubbing on this PEB.
2886 return read_err ? UBI_IO_BITFLIPS : 0;
2889 @@ -771,7 +876,7 @@ int ubi_io_write_ec_hdr(struct ubi_devic
2891 err = paranoid_check_ec_hdr(ubi, pnum, ec_hdr);
2896 err = ubi_io_write(ubi, ec_hdr, pnum, 0, ubi->ec_hdr_alsize);
2898 @@ -907,22 +1012,16 @@ bad:
2900 * This function reads the volume identifier header from physical eraseblock
2901 * @pnum and stores it in @vid_hdr. It also checks CRC checksum of the read
2902 - * volume identifier header. The following codes may be returned:
2903 + * volume identifier header. The error codes are the same as in
2904 + * 'ubi_io_read_ec_hdr()'.
2906 - * o %0 if the CRC checksum is correct and the header was successfully read;
2907 - * o %UBI_IO_BITFLIPS if the CRC is correct, but bit-flips were detected
2908 - * and corrected by the flash driver; this is harmless but may indicate that
2909 - * this eraseblock may become bad soon;
2910 - * o %UBI_IO_BAD_VID_HDR if the volume identifier header is corrupted (a CRC
2911 - * error detected);
2912 - * o %UBI_IO_PEB_FREE if the physical eraseblock is free (i.e., there is no VID
2914 - * o a negative error code in case of failure.
2915 + * Note, the implementation of this function is also very similar to
2916 + * 'ubi_io_read_ec_hdr()', so refer commentaries in 'ubi_io_read_ec_hdr()'.
2918 int ubi_io_read_vid_hdr(struct ubi_device *ubi, int pnum,
2919 struct ubi_vid_hdr *vid_hdr, int verbose)
2921 - int err, read_err = 0;
2922 + int err, read_err;
2923 uint32_t crc, magic, hdr_crc;
2926 @@ -930,68 +1029,36 @@ int ubi_io_read_vid_hdr(struct ubi_devic
2927 ubi_assert(pnum >= 0 && pnum < ubi->peb_count);
2929 p = (char *)vid_hdr - ubi->vid_hdr_shift;
2930 - err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset,
2931 + read_err = ubi_io_read(ubi, p, pnum, ubi->vid_hdr_aloffset,
2932 ubi->vid_hdr_alsize);
2934 - if (err != UBI_IO_BITFLIPS && err != -EBADMSG)
2938 - * We read all the data, but either a correctable bit-flip
2939 - * occurred, or MTD reported about some data integrity error,
2940 - * like an ECC error in case of NAND. The former is harmless,
2941 - * the later may mean the read data is corrupted. But we have a
2942 - * CRC check-sum and we will identify this. If the VID header is
2943 - * still OK, we just report this as there was a bit-flip.
2947 + if (read_err && read_err != UBI_IO_BITFLIPS && read_err != -EBADMSG)
2950 magic = be32_to_cpu(vid_hdr->magic);
2951 if (magic != UBI_VID_HDR_MAGIC) {
2953 - * If we have read all 0xFF bytes, the VID header probably does
2954 - * not exist and the physical eraseblock is assumed to be free.
2956 - * But if there was a read error, we do not test the data for
2957 - * 0xFFs. Even if it does contain all 0xFFs, this error
2958 - * indicates that something is still wrong with this physical
2959 - * eraseblock and it cannot be regarded as free.
2961 - if (read_err != -EBADMSG &&
2962 - check_pattern(vid_hdr, 0xFF, UBI_VID_HDR_SIZE)) {
2963 - /* The physical eraseblock is supposedly free */
2966 - * The below is just a paranoid check, it has to be
2967 - * compiled out if paranoid checks are disabled.
2969 - err = paranoid_check_all_ff(ubi, pnum, ubi->leb_start,
2972 - return err > 0 ? UBI_IO_BAD_VID_HDR : err;
2973 + if (read_err == -EBADMSG)
2974 + return UBI_IO_BAD_HDR_EBADMSG;
2976 + if (ubi_check_pattern(vid_hdr, 0xFF, UBI_VID_HDR_SIZE)) {
2978 ubi_warn("no VID header found at PEB %d, "
2979 "only 0xFF bytes", pnum);
2980 - else if (UBI_IO_DEBUG)
2981 - dbg_msg("no VID header found at PEB %d, "
2982 - "only 0xFF bytes", pnum);
2983 - return UBI_IO_PEB_FREE;
2984 + dbg_bld("no VID header found at PEB %d, "
2985 + "only 0xFF bytes", pnum);
2989 + return UBI_IO_FF_BITFLIPS;
2993 - * This is not a valid VID header, and these are not 0xFF
2994 - * bytes. Report that the header is corrupted.
2997 ubi_warn("bad magic number at PEB %d: %08x instead of "
2998 "%08x", pnum, magic, UBI_VID_HDR_MAGIC);
2999 ubi_dbg_dump_vid_hdr(vid_hdr);
3000 - } else if (UBI_IO_DEBUG)
3001 - dbg_msg("bad magic number at PEB %d: %08x instead of "
3002 - "%08x", pnum, magic, UBI_VID_HDR_MAGIC);
3003 - return UBI_IO_BAD_VID_HDR;
3005 + dbg_bld("bad magic number at PEB %d: %08x instead of "
3006 + "%08x", pnum, magic, UBI_VID_HDR_MAGIC);
3007 + return UBI_IO_BAD_HDR;
3010 crc = crc32(UBI_CRC32_INIT, vid_hdr, UBI_VID_HDR_SIZE_CRC);
3011 @@ -1002,13 +1069,15 @@ int ubi_io_read_vid_hdr(struct ubi_devic
3012 ubi_warn("bad CRC at PEB %d, calculated %#08x, "
3013 "read %#08x", pnum, crc, hdr_crc);
3014 ubi_dbg_dump_vid_hdr(vid_hdr);
3015 - } else if (UBI_IO_DEBUG)
3016 - dbg_msg("bad CRC at PEB %d, calculated %#08x, "
3017 - "read %#08x", pnum, crc, hdr_crc);
3018 - return UBI_IO_BAD_VID_HDR;
3020 + dbg_bld("bad CRC at PEB %d, calculated %#08x, "
3021 + "read %#08x", pnum, crc, hdr_crc);
3023 + return UBI_IO_BAD_HDR;
3025 + return UBI_IO_BAD_HDR_EBADMSG;
3028 - /* Validate the VID header that we have just read */
3029 err = validate_vid_hdr(ubi, vid_hdr);
3031 ubi_err("validation failed for PEB %d", pnum);
3032 @@ -1045,7 +1114,7 @@ int ubi_io_write_vid_hdr(struct ubi_devi
3034 err = paranoid_check_peb_ec_hdr(ubi, pnum);
3036 - return err > 0 ? -EINVAL : err;
3039 vid_hdr->magic = cpu_to_be32(UBI_VID_HDR_MAGIC);
3040 vid_hdr->version = UBI_VERSION;
3041 @@ -1054,7 +1123,7 @@ int ubi_io_write_vid_hdr(struct ubi_devi
3043 err = paranoid_check_vid_hdr(ubi, pnum, vid_hdr);
3048 p = (char *)vid_hdr - ubi->vid_hdr_shift;
3049 err = ubi_io_write(ubi, p, pnum, ubi->vid_hdr_aloffset,
3050 @@ -1062,27 +1131,30 @@ int ubi_io_write_vid_hdr(struct ubi_devi
3054 -#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
3055 +#ifdef CONFIG_MTD_UBI_DEBUG
3058 * paranoid_check_not_bad - ensure that a physical eraseblock is not bad.
3059 * @ubi: UBI device description object
3060 * @pnum: physical eraseblock number to check
3062 - * This function returns zero if the physical eraseblock is good, a positive
3063 - * number if it is bad and a negative error code if an error occurred.
3064 + * This function returns zero if the physical eraseblock is good, %-EINVAL if
3065 + * it is bad and a negative error code if an error occurred.
3067 static int paranoid_check_not_bad(const struct ubi_device *ubi, int pnum)
3071 + if (!(ubi_chk_flags & UBI_CHK_IO))
3074 err = ubi_io_is_bad(ubi, pnum);
3078 ubi_err("paranoid check failed for PEB %d", pnum);
3079 ubi_dbg_dump_stack();
3081 + return err > 0 ? -EINVAL : err;
3085 @@ -1092,7 +1164,7 @@ static int paranoid_check_not_bad(const
3086 * @ec_hdr: the erase counter header to check
3088 * This function returns zero if the erase counter header contains valid
3089 - * values, and %1 if not.
3090 + * values, and %-EINVAL if not.
3092 static int paranoid_check_ec_hdr(const struct ubi_device *ubi, int pnum,
3093 const struct ubi_ec_hdr *ec_hdr)
3094 @@ -1100,6 +1172,9 @@ static int paranoid_check_ec_hdr(const s
3098 + if (!(ubi_chk_flags & UBI_CHK_IO))
3101 magic = be32_to_cpu(ec_hdr->magic);
3102 if (magic != UBI_EC_HDR_MAGIC) {
3103 ubi_err("bad magic %#08x, must be %#08x",
3104 @@ -1118,7 +1193,7 @@ static int paranoid_check_ec_hdr(const s
3106 ubi_dbg_dump_ec_hdr(ec_hdr);
3107 ubi_dbg_dump_stack();
3113 @@ -1126,8 +1201,8 @@ fail:
3114 * @ubi: UBI device description object
3115 * @pnum: the physical eraseblock number to check
3117 - * This function returns zero if the erase counter header is all right, %1 if
3118 - * not, and a negative error code if an error occurred.
3119 + * This function returns zero if the erase counter header is all right and and
3120 + * a negative error code if not or if an error occurred.
3122 static int paranoid_check_peb_ec_hdr(const struct ubi_device *ubi, int pnum)
3124 @@ -1135,6 +1210,9 @@ static int paranoid_check_peb_ec_hdr(con
3125 uint32_t crc, hdr_crc;
3126 struct ubi_ec_hdr *ec_hdr;
3128 + if (!(ubi_chk_flags & UBI_CHK_IO))
3131 ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
3134 @@ -1150,7 +1228,7 @@ static int paranoid_check_peb_ec_hdr(con
3135 ubi_err("paranoid check failed for PEB %d", pnum);
3136 ubi_dbg_dump_ec_hdr(ec_hdr);
3137 ubi_dbg_dump_stack();
3143 @@ -1168,7 +1246,7 @@ exit:
3144 * @vid_hdr: the volume identifier header to check
3146 * This function returns zero if the volume identifier header is all right, and
3148 + * %-EINVAL if not.
3150 static int paranoid_check_vid_hdr(const struct ubi_device *ubi, int pnum,
3151 const struct ubi_vid_hdr *vid_hdr)
3152 @@ -1176,6 +1254,9 @@ static int paranoid_check_vid_hdr(const
3156 + if (!(ubi_chk_flags & UBI_CHK_IO))
3159 magic = be32_to_cpu(vid_hdr->magic);
3160 if (magic != UBI_VID_HDR_MAGIC) {
3161 ubi_err("bad VID header magic %#08x at PEB %d, must be %#08x",
3162 @@ -1195,7 +1276,7 @@ fail:
3163 ubi_err("paranoid check failed for PEB %d", pnum);
3164 ubi_dbg_dump_vid_hdr(vid_hdr);
3165 ubi_dbg_dump_stack();
3171 @@ -1205,7 +1286,7 @@ fail:
3172 * @pnum: the physical eraseblock number to check
3174 * This function returns zero if the volume identifier header is all right,
3175 - * %1 if not, and a negative error code if an error occurred.
3176 + * and a negative error code if not or if an error occurred.
3178 static int paranoid_check_peb_vid_hdr(const struct ubi_device *ubi, int pnum)
3180 @@ -1214,6 +1295,9 @@ static int paranoid_check_peb_vid_hdr(co
3181 struct ubi_vid_hdr *vid_hdr;
3184 + if (!(ubi_chk_flags & UBI_CHK_IO))
3187 vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
3190 @@ -1232,7 +1316,7 @@ static int paranoid_check_peb_vid_hdr(co
3191 ubi_err("paranoid check failed for PEB %d", pnum);
3192 ubi_dbg_dump_vid_hdr(vid_hdr);
3193 ubi_dbg_dump_stack();
3199 @@ -1244,51 +1328,124 @@ exit:
3203 - * paranoid_check_all_ff - check that a region of flash is empty.
3204 + * ubi_dbg_check_write - make sure write succeeded.
3205 + * @ubi: UBI device description object
3206 + * @buf: buffer with data which were written
3207 + * @pnum: physical eraseblock number the data were written to
3208 + * @offset: offset within the physical eraseblock the data were written to
3209 + * @len: how many bytes were written
3211 + * This functions reads data which were recently written and compares it with
3212 + * the original data buffer - the data have to match. Returns zero if the data
3213 + * match and a negative error code if not or in case of failure.
3215 +int ubi_dbg_check_write(struct ubi_device *ubi, const void *buf, int pnum,
3216 + int offset, int len)
3221 + loff_t addr = (loff_t)pnum * ubi->peb_size + offset;
3223 + if (!(ubi_chk_flags & UBI_CHK_IO))
3226 + buf1 = __vmalloc(len, GFP_NOFS, PAGE_KERNEL);
3228 + ubi_err("cannot allocate memory to check writes");
3232 + err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf1);
3233 + if (err && err != -EUCLEAN)
3236 + for (i = 0; i < len; i++) {
3237 + uint8_t c = ((uint8_t *)buf)[i];
3238 + uint8_t c1 = ((uint8_t *)buf1)[i];
3244 + ubi_err("paranoid check failed for PEB %d:%d, len %d",
3245 + pnum, offset, len);
3246 + ubi_msg("data differ at position %d", i);
3247 + dump_len = max_t(int, 128, len - i);
3248 + ubi_msg("hex dump of the original buffer from %d to %d",
3250 + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
3251 + buf + i, dump_len, 1);
3252 + ubi_msg("hex dump of the read buffer from %d to %d",
3254 + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
3255 + buf1 + i, dump_len, 1);
3256 + ubi_dbg_dump_stack();
3270 + * ubi_dbg_check_all_ff - check that a region of flash is empty.
3271 * @ubi: UBI device description object
3272 * @pnum: the physical eraseblock number to check
3273 * @offset: the starting offset within the physical eraseblock to check
3274 * @len: the length of the region to check
3276 * This function returns zero if only 0xFF bytes are present at offset
3277 - * @offset of the physical eraseblock @pnum, %1 if not, and a negative error
3278 - * code if an error occurred.
3279 + * @offset of the physical eraseblock @pnum, and a negative error code if not
3280 + * or if an error occurred.
3282 -static int paranoid_check_all_ff(struct ubi_device *ubi, int pnum, int offset,
3284 +int ubi_dbg_check_all_ff(struct ubi_device *ubi, int pnum, int offset, int len)
3289 loff_t addr = (loff_t)pnum * ubi->peb_size + offset;
3291 - mutex_lock(&ubi->dbg_buf_mutex);
3292 - err = ubi->mtd->read(ubi->mtd, addr, len, &read, ubi->dbg_peb_buf);
3293 + if (!(ubi_chk_flags & UBI_CHK_IO))
3296 + buf = __vmalloc(len, GFP_NOFS, PAGE_KERNEL);
3298 + ubi_err("cannot allocate memory to check for 0xFFs");
3302 + err = ubi->mtd->read(ubi->mtd, addr, len, &read, buf);
3303 if (err && err != -EUCLEAN) {
3304 ubi_err("error %d while reading %d bytes from PEB %d:%d, "
3305 "read %zd bytes", err, len, pnum, offset, read);
3309 - err = check_pattern(ubi->dbg_peb_buf, 0xFF, len);
3310 + err = ubi_check_pattern(buf, 0xFF, len);
3312 ubi_err("flash region at PEB %d:%d, length %d does not "
3313 "contain all 0xFF bytes", pnum, offset, len);
3316 - mutex_unlock(&ubi->dbg_buf_mutex);
3322 ubi_err("paranoid check failed for PEB %d", pnum);
3323 ubi_msg("hex dump of the %d-%d region", offset, offset + len);
3324 - print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
3325 - ubi->dbg_peb_buf, len, 1);
3327 + print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1, buf, len, 1);
3330 ubi_dbg_dump_stack();
3331 - mutex_unlock(&ubi->dbg_buf_mutex);
3336 -#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */
3337 +#endif /* CONFIG_MTD_UBI_DEBUG */
3338 diff -uprN linux-2.6.28/drivers/mtd/ubi/kapi.c ubifs-v2.6.28/drivers/mtd/ubi/kapi.c
3339 --- linux-2.6.28/drivers/mtd/ubi/kapi.c 2008-12-24 18:26:37.000000000 -0500
3340 +++ ubifs-v2.6.28/drivers/mtd/ubi/kapi.c 2011-06-15 14:22:07.000000000 -0400
3343 #include <linux/module.h>
3344 #include <linux/err.h>
3345 +#include <linux/namei.h>
3346 +#include <linux/fs.h>
3347 #include <asm/div64.h>
3351 + * ubi_do_get_device_info - get information about UBI device.
3352 + * @ubi: UBI device description object
3353 + * @di: the information is stored here
3355 + * This function is the same as 'ubi_get_device_info()', but it assumes the UBI
3356 + * device is locked and cannot disappear.
3358 +void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di)
3360 + di->ubi_num = ubi->ubi_num;
3361 + di->leb_size = ubi->leb_size;
3362 + di->leb_start = ubi->leb_start;
3363 + di->min_io_size = ubi->min_io_size;
3364 + di->max_write_size = ubi->max_write_size;
3365 + di->ro_mode = ubi->ro_mode;
3366 + di->cdev = ubi->cdev.dev;
3368 +EXPORT_SYMBOL_GPL(ubi_do_get_device_info);
3371 * ubi_get_device_info - get information about UBI device.
3372 * @ubi_num: UBI device number
3373 * @di: the information is stored here
3374 @@ -39,33 +61,24 @@ int ubi_get_device_info(int ubi_num, str
3376 if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES)
3379 ubi = ubi_get_device(ubi_num);
3383 - di->ubi_num = ubi->ubi_num;
3384 - di->leb_size = ubi->leb_size;
3385 - di->min_io_size = ubi->min_io_size;
3386 - di->ro_mode = ubi->ro_mode;
3387 - di->cdev = ubi->cdev.dev;
3389 + ubi_do_get_device_info(ubi, di);
3390 ubi_put_device(ubi);
3393 EXPORT_SYMBOL_GPL(ubi_get_device_info);
3396 - * ubi_get_volume_info - get information about UBI volume.
3397 - * @desc: volume descriptor
3398 + * ubi_do_get_volume_info - get information about UBI volume.
3399 + * @ubi: UBI device description object
3400 + * @vol: volume description object
3401 * @vi: the information is stored here
3403 -void ubi_get_volume_info(struct ubi_volume_desc *desc,
3404 - struct ubi_volume_info *vi)
3405 +void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol,
3406 + struct ubi_volume_info *vi)
3408 - const struct ubi_volume *vol = desc->vol;
3409 - const struct ubi_device *ubi = vol->ubi;
3411 vi->vol_id = vol->vol_id;
3412 vi->ubi_num = ubi->ubi_num;
3413 vi->size = vol->reserved_pebs;
3414 @@ -79,6 +92,17 @@ void ubi_get_volume_info(struct ubi_volu
3415 vi->name = vol->name;
3416 vi->cdev = vol->cdev.dev;
3420 + * ubi_get_volume_info - get information about UBI volume.
3421 + * @desc: volume descriptor
3422 + * @vi: the information is stored here
3424 +void ubi_get_volume_info(struct ubi_volume_desc *desc,
3425 + struct ubi_volume_info *vi)
3427 + ubi_do_get_volume_info(desc->vol->ubi, desc->vol, vi);
3429 EXPORT_SYMBOL_GPL(ubi_get_volume_info);
3432 @@ -106,7 +130,7 @@ struct ubi_volume_desc *ubi_open_volume(
3433 struct ubi_device *ubi;
3434 struct ubi_volume *vol;
3436 - dbg_gen("open device %d volume %d, mode %d", ubi_num, vol_id, mode);
3437 + dbg_gen("open device %d, volume %d, mode %d", ubi_num, vol_id, mode);
3439 if (ubi_num < 0 || ubi_num >= UBI_MAX_DEVICES)
3440 return ERR_PTR(-EINVAL);
3441 @@ -196,6 +220,8 @@ out_free:
3444 ubi_put_device(ubi);
3445 + dbg_err("cannot open device %d, volume %d, error %d",
3446 + ubi_num, vol_id, err);
3447 return ERR_PTR(err);
3449 EXPORT_SYMBOL_GPL(ubi_open_volume);
3450 @@ -215,7 +241,7 @@ struct ubi_volume_desc *ubi_open_volume_
3451 struct ubi_device *ubi;
3452 struct ubi_volume_desc *ret;
3454 - dbg_gen("open volume %s, mode %d", name, mode);
3455 + dbg_gen("open device %d, volume %s, mode %d", ubi_num, name, mode);
3458 return ERR_PTR(-EINVAL);
3459 @@ -258,6 +284,43 @@ struct ubi_volume_desc *ubi_open_volume_
3460 EXPORT_SYMBOL_GPL(ubi_open_volume_nm);
3463 + * ubi_open_volume_path - open UBI volume by its character device node path.
3464 + * @pathname: volume character device node path
3465 + * @mode: open mode
3467 + * This function is similar to 'ubi_open_volume()', but opens a volume the path
3468 + * to its character device node.
3470 +struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode)
3472 + int error, ubi_num, vol_id, mod;
3473 + struct inode *inode;
3476 + dbg_gen("open volume %s, mode %d", pathname, mode);
3478 + if (!pathname || !*pathname)
3479 + return ERR_PTR(-EINVAL);
3481 + error = kern_path(pathname, LOOKUP_FOLLOW, &path);
3483 + return ERR_PTR(error);
3485 + inode = path.dentry->d_inode;
3486 + mod = inode->i_mode;
3487 + ubi_num = ubi_major2num(imajor(inode));
3488 + vol_id = iminor(inode) - 1;
3491 + if (!S_ISCHR(mod))
3492 + return ERR_PTR(-EINVAL);
3493 + if (vol_id >= 0 && ubi_num >= 0)
3494 + return ubi_open_volume(ubi_num, vol_id, mode);
3495 + return ERR_PTR(-ENODEV);
3497 +EXPORT_SYMBOL_GPL(ubi_open_volume_path);
3500 * ubi_close_volume - close UBI volume.
3501 * @desc: volume descriptor
3503 @@ -266,7 +329,8 @@ void ubi_close_volume(struct ubi_volume_
3504 struct ubi_volume *vol = desc->vol;
3505 struct ubi_device *ubi = vol->ubi;
3507 - dbg_gen("close volume %d, mode %d", vol->vol_id, desc->mode);
3508 + dbg_gen("close device %d, volume %d, mode %d",
3509 + ubi->ubi_num, vol->vol_id, desc->mode);
3511 spin_lock(&ubi->volumes_lock);
3512 switch (desc->mode) {
3513 @@ -425,7 +489,7 @@ EXPORT_SYMBOL_GPL(ubi_leb_write);
3515 * This function changes the contents of a logical eraseblock atomically. @buf
3516 * has to contain new logical eraseblock data, and @len - the length of the
3517 - * data, which has to be aligned. The length may be shorter then the logical
3518 + * data, which has to be aligned. The length may be shorter than the logical
3519 * eraseblock size, ant the logical eraseblock may be appended to more times
3520 * later on. This function guarantees that in case of an unclean reboot the old
3521 * contents is preserved. Returns zero in case of success and a negative error
3522 @@ -508,7 +572,7 @@ EXPORT_SYMBOL_GPL(ubi_leb_erase);
3524 * This function un-maps logical eraseblock @lnum and schedules the
3525 * corresponding physical eraseblock for erasure, so that it will eventually be
3526 - * physically erased in background. This operation is much faster then the
3527 + * physically erased in background. This operation is much faster than the
3530 * Unlike erase, the un-map operation does not guarantee that the logical
3531 @@ -527,7 +591,7 @@ EXPORT_SYMBOL_GPL(ubi_leb_erase);
3533 * The main and obvious use-case of this function is when the contents of a
3534 * logical eraseblock has to be re-written. Then it is much more efficient to
3535 - * first un-map it, then write new data, rather then first erase it, then write
3536 + * first un-map it, then write new data, rather than first erase it, then write
3537 * new data. Note, once new data has been written to the logical eraseblock,
3538 * UBI guarantees that the old contents has gone forever. In other words, if an
3539 * unclean reboot happens after the logical eraseblock has been un-mapped and
3540 @@ -558,13 +622,13 @@ int ubi_leb_unmap(struct ubi_volume_desc
3541 EXPORT_SYMBOL_GPL(ubi_leb_unmap);
3544 - * ubi_leb_map - map logical erasblock to a physical eraseblock.
3545 + * ubi_leb_map - map logical eraseblock to a physical eraseblock.
3546 * @desc: volume descriptor
3547 * @lnum: logical eraseblock number
3548 * @dtype: expected data type
3550 * This function maps an un-mapped logical eraseblock @lnum to a physical
3551 - * eraseblock. This means, that after a successfull invocation of this
3552 + * eraseblock. This means, that after a successful invocation of this
3553 * function the logical eraseblock @lnum will be empty (contain only %0xFF
3554 * bytes) and be mapped to a physical eraseblock, even if an unclean reboot
3556 @@ -656,3 +720,59 @@ int ubi_sync(int ubi_num)
3559 EXPORT_SYMBOL_GPL(ubi_sync);
3561 +BLOCKING_NOTIFIER_HEAD(ubi_notifiers);
3564 + * ubi_register_volume_notifier - register a volume notifier.
3565 + * @nb: the notifier description object
3566 + * @ignore_existing: if non-zero, do not send "added" notification for all
3567 + * already existing volumes
3569 + * This function registers a volume notifier, which means that
3570 + * 'nb->notifier_call()' will be invoked when an UBI volume is created,
3571 + * removed, re-sized, re-named, or updated. The first argument of the function
3572 + * is the notification type. The second argument is pointer to a
3573 + * &struct ubi_notification object which describes the notification event.
3574 + * Using UBI API from the volume notifier is prohibited.
3576 + * This function returns zero in case of success and a negative error code
3577 + * in case of failure.
3579 +int ubi_register_volume_notifier(struct notifier_block *nb,
3580 + int ignore_existing)
3584 + err = blocking_notifier_chain_register(&ubi_notifiers, nb);
3587 + if (ignore_existing)
3591 + * We are going to walk all UBI devices and all volumes, and
3592 + * notify the user about existing volumes by the %UBI_VOLUME_ADDED
3593 + * event. We have to lock the @ubi_devices_mutex to make sure UBI
3594 + * devices do not disappear.
3596 + mutex_lock(&ubi_devices_mutex);
3597 + ubi_enumerate_volumes(nb);
3598 + mutex_unlock(&ubi_devices_mutex);
3602 +EXPORT_SYMBOL_GPL(ubi_register_volume_notifier);
3605 + * ubi_unregister_volume_notifier - unregister the volume notifier.
3606 + * @nb: the notifier description object
3608 + * This function unregisters volume notifier @nm and returns zero in case of
3609 + * success and a negative error code in case of failure.
3611 +int ubi_unregister_volume_notifier(struct notifier_block *nb)
3613 + return blocking_notifier_chain_unregister(&ubi_notifiers, nb);
3615 +EXPORT_SYMBOL_GPL(ubi_unregister_volume_notifier);
3616 diff -uprN linux-2.6.28/drivers/mtd/ubi/Kconfig ubifs-v2.6.28/drivers/mtd/ubi/Kconfig
3617 --- linux-2.6.28/drivers/mtd/ubi/Kconfig 2008-12-24 18:26:37.000000000 -0500
3618 +++ ubifs-v2.6.28/drivers/mtd/ubi/Kconfig 2011-06-15 14:22:07.000000000 -0400
3620 # drivers/mtd/ubi/Kconfig
3622 -menu "UBI - Unsorted block images"
3626 - tristate "Enable UBI"
3629 + tristate "Enable UBI - Unsorted block images"
3632 UBI is a software layer above MTD layer which admits of LVM-like
3633 @@ -14,11 +10,12 @@ config MTD_UBI
3634 capabilities. Please, consult the MTD web site for more details
3635 (www.linux-mtd.infradead.org).
3639 config MTD_UBI_WL_THRESHOLD
3640 int "UBI wear-leveling threshold"
3643 - depends on MTD_UBI
3645 This parameter defines the maximum difference between the highest
3646 erase counter value and the lowest erase counter value of eraseblocks
3647 @@ -29,14 +26,13 @@ config MTD_UBI_WL_THRESHOLD
3648 The default value should be OK for SLC NAND flashes, NOR flashes and
3649 other flashes which have eraseblock life-cycle 100000 or more.
3650 However, in case of MLC NAND flashes which typically have eraseblock
3651 - life-cycle less then 10000, the threshold should be lessened (e.g.,
3652 + life-cycle less than 10000, the threshold should be lessened (e.g.,
3653 to 128 or 256, although it does not have to be power of 2).
3655 config MTD_UBI_BEB_RESERVE
3656 int "Percentage of reserved eraseblocks for bad eraseblocks handling"
3659 - depends on MTD_UBI
3661 If the MTD device admits of bad eraseblocks (e.g. NAND flash), UBI
3662 reserves some amount of physical eraseblocks to handle new bad
3663 @@ -49,15 +45,21 @@ config MTD_UBI_BEB_RESERVE
3664 reserved. Leave the default value if unsure.
3666 config MTD_UBI_GLUEBI
3667 - bool "Emulate MTD devices"
3669 - depends on MTD_UBI
3670 + tristate "MTD devices emulation driver (gluebi)"
3672 + This option enables gluebi - an additional driver which emulates MTD
3673 + devices on top of UBI volumes: for each UBI volumes an MTD device is
3674 + created, and all I/O to this MTD device is redirected to the UBI
3675 + volume. This is handy to make MTD-oriented software (like JFFS2)
3676 + work on top of UBI. Do not enable this unless you use legacy
3679 +config MTD_UBI_DEBUG
3680 + bool "UBI debugging"
3685 - This option enables MTD devices emulation on top of UBI volumes: for
3686 - each UBI volumes an MTD device is created, and all I/O to this MTD
3687 - device is redirected to the UBI volume. This is handy to make
3688 - MTD-oriented software (like JFFS2) work on top of UBI. Do not enable
3689 - this if no legacy software will be used.
3690 + This option enables UBI debugging.
3692 -source "drivers/mtd/ubi/Kconfig.debug"
3695 diff -uprN linux-2.6.28/drivers/mtd/ubi/Kconfig.debug ubifs-v2.6.28/drivers/mtd/ubi/Kconfig.debug
3696 --- linux-2.6.28/drivers/mtd/ubi/Kconfig.debug 2008-12-24 18:26:37.000000000 -0500
3697 +++ ubifs-v2.6.28/drivers/mtd/ubi/Kconfig.debug 1969-12-31 19:00:00.000000000 -0500
3699 -comment "UBI debugging options"
3700 - depends on MTD_UBI
3702 -config MTD_UBI_DEBUG
3703 - bool "UBI debugging"
3705 - depends on MTD_UBI
3707 - select KALLSYMS_ALL
3709 - This option enables UBI debugging.
3711 -config MTD_UBI_DEBUG_MSG
3712 - bool "UBI debugging messages"
3713 - depends on MTD_UBI_DEBUG
3716 - This option enables UBI debugging messages.
3718 -config MTD_UBI_DEBUG_PARANOID
3719 - bool "Extra self-checks"
3721 - depends on MTD_UBI_DEBUG
3723 - This option enables extra checks in UBI code. Note this slows UBI down
3726 -config MTD_UBI_DEBUG_DISABLE_BGT
3727 - bool "Do not enable the UBI background thread"
3728 - depends on MTD_UBI_DEBUG
3731 - This option switches the background thread off by default. The thread
3732 - may be also be enabled/disabled via UBI sysfs.
3734 -config MTD_UBI_DEBUG_USERSPACE_IO
3735 - bool "Direct user-space write/erase support"
3737 - depends on MTD_UBI_DEBUG
3739 - By default, users cannot directly write and erase individual
3740 - eraseblocks of dynamic volumes, and have to use update operation
3741 - instead. This option enables this capability - it is very useful for
3742 - debugging and testing.
3744 -config MTD_UBI_DEBUG_EMULATE_BITFLIPS
3745 - bool "Emulate flash bit-flips"
3746 - depends on MTD_UBI_DEBUG
3749 - This option emulates bit-flips with probability 1/50, which in turn
3750 - causes scrubbing. Useful for debugging and stressing UBI.
3752 -config MTD_UBI_DEBUG_EMULATE_WRITE_FAILURES
3753 - bool "Emulate flash write failures"
3754 - depends on MTD_UBI_DEBUG
3757 - This option emulates write failures with probability 1/100. Useful for
3758 - debugging and testing how UBI handlines errors.
3760 -config MTD_UBI_DEBUG_EMULATE_ERASE_FAILURES
3761 - bool "Emulate flash erase failures"
3762 - depends on MTD_UBI_DEBUG
3765 - This option emulates erase failures with probability 1/100. Useful for
3766 - debugging and testing how UBI handlines errors.
3768 -menu "Additional UBI debugging messages"
3769 - depends on MTD_UBI_DEBUG
3771 -config MTD_UBI_DEBUG_MSG_BLD
3772 - bool "Additional UBI initialization and build messages"
3774 - depends on MTD_UBI_DEBUG
3776 - This option enables detailed UBI initialization and device build
3777 - debugging messages.
3779 -config MTD_UBI_DEBUG_MSG_EBA
3780 - bool "Eraseblock association unit messages"
3782 - depends on MTD_UBI_DEBUG
3784 - This option enables debugging messages from the UBI eraseblock
3787 -config MTD_UBI_DEBUG_MSG_WL
3788 - bool "Wear-leveling unit messages"
3790 - depends on MTD_UBI_DEBUG
3792 - This option enables debugging messages from the UBI wear-leveling
3795 -config MTD_UBI_DEBUG_MSG_IO
3796 - bool "Input/output unit messages"
3798 - depends on MTD_UBI_DEBUG
3800 - This option enables debugging messages from the UBI input/output unit.
3802 -endmenu # UBI debugging messages
3803 diff -uprN linux-2.6.28/drivers/mtd/ubi/Makefile ubifs-v2.6.28/drivers/mtd/ubi/Makefile
3804 --- linux-2.6.28/drivers/mtd/ubi/Makefile 2008-12-24 18:26:37.000000000 -0500
3805 +++ ubifs-v2.6.28/drivers/mtd/ubi/Makefile 2011-06-15 14:22:07.000000000 -0400
3806 @@ -4,4 +4,4 @@ ubi-y += vtbl.o vmt.o upd.o build.o cdev
3809 ubi-$(CONFIG_MTD_UBI_DEBUG) += debug.o
3810 -ubi-$(CONFIG_MTD_UBI_GLUEBI) += gluebi.o
3811 +obj-$(CONFIG_MTD_UBI_GLUEBI) += gluebi.o
3812 diff -uprN linux-2.6.28/drivers/mtd/ubi/misc.c ubifs-v2.6.28/drivers/mtd/ubi/misc.c
3813 --- linux-2.6.28/drivers/mtd/ubi/misc.c 2008-12-24 18:26:37.000000000 -0500
3814 +++ ubifs-v2.6.28/drivers/mtd/ubi/misc.c 2011-06-15 14:22:07.000000000 -0400
3815 @@ -103,3 +103,22 @@ void ubi_calculate_reserved(struct ubi_d
3816 if (ubi->beb_rsvd_level < MIN_RESEVED_PEBS)
3817 ubi->beb_rsvd_level = MIN_RESEVED_PEBS;
3821 + * ubi_check_pattern - check if buffer contains only a certain byte pattern.
3822 + * @buf: buffer to check
3823 + * @patt: the pattern to check
3824 + * @size: buffer size in bytes
3826 + * This function returns %1 in there are only @patt bytes in @buf, and %0 if
3827 + * something else was also found.
3829 +int ubi_check_pattern(const void *buf, uint8_t patt, int size)
3833 + for (i = 0; i < size; i++)
3834 + if (((const uint8_t *)buf)[i] != patt)
3838 diff -uprN linux-2.6.28/drivers/mtd/ubi/scan.c ubifs-v2.6.28/drivers/mtd/ubi/scan.c
3839 --- linux-2.6.28/drivers/mtd/ubi/scan.c 2011-06-15 15:12:27.000000000 -0400
3840 +++ ubifs-v2.6.28/drivers/mtd/ubi/scan.c 2011-06-15 14:22:07.000000000 -0400
3842 * objects which are kept in volume RB-tree with root at the @volumes field.
3843 * The RB-tree is indexed by the volume ID.
3845 - * Found logical eraseblocks are represented by &struct ubi_scan_leb objects.
3846 + * Scanned logical eraseblocks are represented by &struct ubi_scan_leb objects.
3847 * These objects are kept in per-volume RB-trees with the root at the
3848 * corresponding &struct ubi_scan_volume object. To put it differently, we keep
3849 * an RB-tree of per-volume objects and each of these objects is the root of
3851 * Corrupted physical eraseblocks are put to the @corr list, free physical
3852 * eraseblocks are put to the @free list and the physical eraseblock to be
3853 * erased are put to the @erase list.
3855 + * About corruptions
3856 + * ~~~~~~~~~~~~~~~~~
3858 + * UBI protects EC and VID headers with CRC-32 checksums, so it can detect
3859 + * whether the headers are corrupted or not. Sometimes UBI also protects the
3860 + * data with CRC-32, e.g., when it executes the atomic LEB change operation, or
3861 + * when it moves the contents of a PEB for wear-leveling purposes.
3863 + * UBI tries to distinguish between 2 types of corruptions.
3865 + * 1. Corruptions caused by power cuts. These are expected corruptions and UBI
3866 + * tries to handle them gracefully, without printing too many warnings and
3867 + * error messages. The idea is that we do not lose important data in these case
3868 + * - we may lose only the data which was being written to the media just before
3869 + * the power cut happened, and the upper layers (e.g., UBIFS) are supposed to
3870 + * handle such data losses (e.g., by using the FS journal).
3872 + * When UBI detects a corruption (CRC-32 mismatch) in a PEB, and it looks like
3873 + * the reason is a power cut, UBI puts this PEB to the @erase list, and all
3874 + * PEBs in the @erase list are scheduled for erasure later.
3876 + * 2. Unexpected corruptions which are not caused by power cuts. During
3877 + * scanning, such PEBs are put to the @corr list and UBI preserves them.
3878 + * Obviously, this lessens the amount of available PEBs, and if at some point
3879 + * UBI runs out of free PEBs, it switches to R/O mode. UBI also loudly informs
3880 + * about such PEBs every time the MTD device is attached.
3882 + * However, it is difficult to reliably distinguish between these types of
3883 + * corruptions and UBI's strategy is as follows. UBI assumes corruption type 2
3884 + * if the VID header is corrupted and the data area does not contain all 0xFFs,
3885 + * and there were no bit-flips or integrity errors while reading the data area.
3886 + * Otherwise UBI assumes corruption type 1. So the decision criteria are as
3888 + * o If the data area contains only 0xFFs, there is no data, and it is safe
3889 + * to just erase this PEB - this is corruption type 1.
3890 + * o If the data area has bit-flips or data integrity errors (ECC errors on
3891 + * NAND), it is probably a PEB which was being erased when power cut
3892 + * happened, so this is corruption type 1. However, this is just a guess,
3893 + * which might be wrong.
3894 + * o Otherwise this it corruption type 2.
3897 #include <linux/err.h>
3898 #include <linux/crc32.h>
3899 -#include <asm/div64.h>
3900 +#include <linux/math64.h>
3901 +#include <linux/random.h>
3904 -#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
3905 +#ifdef CONFIG_MTD_UBI_DEBUG
3906 static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si);
3908 #define paranoid_check_si(ubi, si) 0
3909 @@ -60,35 +102,69 @@ static struct ubi_vid_hdr *vidh;
3910 * @si: scanning information
3911 * @pnum: physical eraseblock number to add
3912 * @ec: erase counter of the physical eraseblock
3913 + * @to_head: if not zero, add to the head of the list
3914 * @list: the list to add to
3916 - * This function adds physical eraseblock @pnum to free, erase, corrupted or
3917 - * alien lists. Returns zero in case of success and a negative error code in
3918 - * case of failure.
3919 + * This function adds physical eraseblock @pnum to free, erase, or alien lists.
3920 + * If @to_head is not zero, PEB will be added to the head of the list, which
3921 + * basically means it will be processed first later. E.g., we add corrupted
3922 + * PEBs (corrupted due to power cuts) to the head of the erase list to make
3923 + * sure we erase them first and get rid of corruptions ASAP. This function
3924 + * returns zero in case of success and a negative error code in case of
3927 -static int add_to_list(struct ubi_scan_info *si, int pnum, int ec,
3928 +static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, int to_head,
3929 struct list_head *list)
3931 struct ubi_scan_leb *seb;
3933 - if (list == &si->free)
3934 + if (list == &si->free) {
3935 dbg_bld("add to free: PEB %d, EC %d", pnum, ec);
3936 - else if (list == &si->erase)
3937 + } else if (list == &si->erase) {
3938 dbg_bld("add to erase: PEB %d, EC %d", pnum, ec);
3939 - else if (list == &si->corr)
3940 - dbg_bld("add to corrupted: PEB %d, EC %d", pnum, ec);
3941 - else if (list == &si->alien)
3942 + } else if (list == &si->alien) {
3943 dbg_bld("add to alien: PEB %d, EC %d", pnum, ec);
3945 + si->alien_peb_count += 1;
3949 - seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL);
3950 + seb = kmem_cache_alloc(si->scan_leb_slab, GFP_KERNEL);
3957 + list_add(&seb->u.list, list);
3959 + list_add_tail(&seb->u.list, list);
3964 + * add_corrupted - add a corrupted physical eraseblock.
3965 + * @si: scanning information
3966 + * @pnum: physical eraseblock number to add
3967 + * @ec: erase counter of the physical eraseblock
3969 + * This function adds corrupted physical eraseblock @pnum to the 'corr' list.
3970 + * The corruption was presumably not caused by a power cut. Returns zero in
3971 + * case of success and a negative error code in case of failure.
3973 +static int add_corrupted(struct ubi_scan_info *si, int pnum, int ec)
3975 + struct ubi_scan_leb *seb;
3977 + dbg_bld("add to corrupted: PEB %d, EC %d", pnum, ec);
3979 + seb = kmem_cache_alloc(si->scan_leb_slab, GFP_KERNEL);
3983 + si->corr_peb_count += 1;
3986 - list_add_tail(&seb->u.list, list);
3987 + list_add(&seb->u.list, &si->corr);
3991 @@ -229,7 +305,7 @@ static struct ubi_scan_volume *add_volum
3992 * case of success this function returns a positive value, in case of failure, a
3993 * negative error code is returned. The success return codes use the following
3995 - * o bit 0 is cleared: the first PEB (described by @seb) is newer then the
3996 + * o bit 0 is cleared: the first PEB (described by @seb) is newer than the
3997 * second PEB (described by @pnum and @vid_hdr);
3998 * o bit 0 is set: the second PEB is newer;
3999 * o bit 1 is cleared: no bit-flips were detected in the newer LEB;
4000 @@ -252,8 +328,8 @@ static int compare_lebs(struct ubi_devic
4001 * created before sequence numbers support has been added. At
4002 * that times we used 32-bit LEB versions stored in logical
4003 * eraseblocks. That was before UBI got into mainline. We do not
4004 - * support these images anymore. Well, those images will work
4005 - * still work, but only if no unclean reboots happened.
4006 + * support these images anymore. Well, those images still work,
4007 + * but only if no unclean reboots happened.
4009 ubi_err("unsupported on-flash UBI format\n");
4011 @@ -279,19 +355,25 @@ static int compare_lebs(struct ubi_devic
4016 + if (!seb->copy_flag) {
4017 + /* It is not a copy, so it is newer */
4018 + dbg_bld("first PEB %d is newer, copy_flag is unset",
4020 + return bitflips << 1;
4023 vh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL);
4028 err = ubi_io_read_vid_hdr(ubi, pnum, vh, 0);
4030 if (err == UBI_IO_BITFLIPS)
4033 dbg_err("VID of PEB %d header is bad, but it "
4034 - "was OK earlier", pnum);
4035 + "was OK earlier, err %d", pnum, err);
4039 @@ -299,14 +381,6 @@ static int compare_lebs(struct ubi_devic
4043 - if (!vh->copy_flag) {
4044 - /* It is not a copy, so it is newer */
4045 - dbg_bld("first PEB %d is newer, copy_flag is unset",
4047 - err = bitflips << 1;
4048 - goto out_free_vidh;
4054 @@ -450,25 +524,22 @@ int ubi_scan_add_used(struct ubi_device
4058 - * This logical eraseblock is newer then the one
4059 + * This logical eraseblock is newer than the one
4062 err = validate_vid_hdr(vid_hdr, sv, pnum);
4067 - err = add_to_list(si, seb->pnum, seb->ec,
4070 - err = add_to_list(si, seb->pnum, seb->ec,
4072 + err = add_to_list(si, seb->pnum, seb->ec, cmp_res & 4,
4079 seb->scrub = ((cmp_res & 2) || bitflips);
4080 + seb->copy_flag = vid_hdr->copy_flag;
4083 if (sv->highest_lnum == lnum)
4084 @@ -478,13 +549,11 @@ int ubi_scan_add_used(struct ubi_device
4088 - * This logical eraseblock is older then the one found
4089 + * This logical eraseblock is older than the one found
4093 - return add_to_list(si, pnum, ec, &si->corr);
4095 - return add_to_list(si, pnum, ec, &si->erase);
4096 + return add_to_list(si, pnum, ec, cmp_res & 4,
4101 @@ -497,15 +566,16 @@ int ubi_scan_add_used(struct ubi_device
4105 - seb = kmalloc(sizeof(struct ubi_scan_leb), GFP_KERNEL);
4106 + seb = kmem_cache_alloc(si->scan_leb_slab, GFP_KERNEL);
4113 - seb->sqnum = sqnum;
4114 seb->scrub = bitflips;
4115 + seb->copy_flag = vid_hdr->copy_flag;
4116 + seb->sqnum = sqnum;
4118 if (sv->highest_lnum <= lnum) {
4119 sv->highest_lnum = lnum;
4120 @@ -661,8 +731,8 @@ out_free:
4121 struct ubi_scan_leb *ubi_scan_get_free_peb(struct ubi_device *ubi,
4122 struct ubi_scan_info *si)
4125 - struct ubi_scan_leb *seb;
4127 + struct ubi_scan_leb *seb, *tmp_seb;
4129 if (!list_empty(&si->free)) {
4130 seb = list_entry(si->free.next, struct ubi_scan_leb, u.list);
4131 @@ -671,38 +741,88 @@ struct ubi_scan_leb *ubi_scan_get_free_p
4135 - for (i = 0; i < 2; i++) {
4136 - struct list_head *head;
4137 - struct ubi_scan_leb *tmp_seb;
4139 + * We try to erase the first physical eraseblock from the erase list
4140 + * and pick it if we succeed, or try to erase the next one if not. And
4141 + * so forth. We don't want to take care about bad eraseblocks here -
4142 + * they'll be handled later.
4144 + list_for_each_entry_safe(seb, tmp_seb, &si->erase, u.list) {
4145 + if (seb->ec == UBI_SCAN_UNKNOWN_EC)
4146 + seb->ec = si->mean_ec;
4149 - head = &si->erase;
4152 + err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1);
4157 + list_del(&seb->u.list);
4158 + dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec);
4162 + ubi_err("no free eraseblocks");
4163 + return ERR_PTR(-ENOSPC);
4167 + * check_corruption - check the data area of PEB.
4168 + * @ubi: UBI device description object
4169 + * @vid_hrd: the (corrupted) VID header of this PEB
4170 + * @pnum: the physical eraseblock number to check
4172 + * This is a helper function which is used to distinguish between VID header
4173 + * corruptions caused by power cuts and other reasons. If the PEB contains only
4174 + * 0xFF bytes in the data area, the VID header is most probably corrupted
4175 + * because of a power cut (%0 is returned in this case). Otherwise, it was
4176 + * probably corrupted for some other reasons (%1 is returned in this case). A
4177 + * negative error code is returned if a read error occurred.
4179 + * If the corruption reason was a power cut, UBI can safely erase this PEB.
4180 + * Otherwise, it should preserve it to avoid possibly destroying important
4183 +static int check_corruption(struct ubi_device *ubi, struct ubi_vid_hdr *vid_hdr,
4188 + mutex_lock(&ubi->buf_mutex);
4189 + memset(ubi->peb_buf1, 0x00, ubi->leb_size);
4191 + err = ubi_io_read(ubi, ubi->peb_buf1, pnum, ubi->leb_start,
4193 + if (err == UBI_IO_BITFLIPS || err == -EBADMSG) {
4195 - * We try to erase the first physical eraseblock from the @head
4196 - * list and pick it if we succeed, or try to erase the
4197 - * next one if not. And so forth. We don't want to take care
4198 - * about bad eraseblocks here - they'll be handled later.
4199 + * Bit-flips or integrity errors while reading the data area.
4200 + * It is difficult to say for sure what type of corruption is
4201 + * this, but presumably a power cut happened while this PEB was
4202 + * erased, so it became unstable and corrupted, and should be
4205 - list_for_each_entry_safe(seb, tmp_seb, head, u.list) {
4206 - if (seb->ec == UBI_SCAN_UNKNOWN_EC)
4207 - seb->ec = si->mean_ec;
4212 - err = ubi_scan_erase_peb(ubi, si, seb->pnum, seb->ec+1);
4219 - list_del(&seb->u.list);
4220 - dbg_bld("return PEB %d, EC %d", seb->pnum, seb->ec);
4224 + if (ubi_check_pattern(ubi->peb_buf1, 0xFF, ubi->leb_size))
4227 - ubi_err("no eraseblocks found");
4228 - return ERR_PTR(-ENOSPC);
4229 + ubi_err("PEB %d contains corrupted VID header, and the data does not "
4230 + "contain all 0xFF, this may be a non-UBI PEB or a severe VID "
4231 + "header corruption which requires manual inspection", pnum);
4232 + ubi_dbg_dump_vid_hdr(vid_hdr);
4233 + dbg_msg("hexdump of PEB %d offset %d, length %d",
4234 + pnum, ubi->leb_start, ubi->leb_size);
4235 + ubi_dbg_print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
4236 + ubi->peb_buf1, ubi->leb_size, 1);
4240 + mutex_unlock(&ubi->buf_mutex);
4245 @@ -718,7 +838,7 @@ static int process_eb(struct ubi_device
4248 long long uninitialized_var(ec);
4249 - int err, bitflips = 0, vol_id, ec_corr = 0;
4250 + int err, bitflips = 0, vol_id, ec_err = 0;
4252 dbg_bld("scan PEB %d", pnum);
4254 @@ -739,24 +859,39 @@ static int process_eb(struct ubi_device
4255 err = ubi_io_read_ec_hdr(ubi, pnum, ech, 0);
4258 - else if (err == UBI_IO_BITFLIPS)
4262 + case UBI_IO_BITFLIPS:
4264 - else if (err == UBI_IO_PEB_EMPTY)
4265 - return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, &si->erase);
4266 - else if (err == UBI_IO_BAD_EC_HDR) {
4269 + si->empty_peb_count += 1;
4270 + return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, 0,
4272 + case UBI_IO_FF_BITFLIPS:
4273 + si->empty_peb_count += 1;
4274 + return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, 1,
4276 + case UBI_IO_BAD_HDR_EBADMSG:
4277 + case UBI_IO_BAD_HDR:
4279 * We have to also look at the VID header, possibly it is not
4280 * corrupted. Set %bitflips flag in order to make this PEB be
4281 * moved and EC be re-created.
4285 ec = UBI_SCAN_UNKNOWN_EC;
4289 + ubi_err("'ubi_io_read_ec_hdr()' returned unknown code %d", err);
4298 /* Make sure UBI version is OK */
4299 if (ech->version != UBI_VERSION) {
4300 ubi_err("this UBI version is %d, image version is %d",
4301 @@ -778,6 +913,28 @@ static int process_eb(struct ubi_device
4302 ubi_dbg_dump_ec_hdr(ech);
4307 + * Make sure that all PEBs have the same image sequence number.
4308 + * This allows us to detect situations when users flash UBI
4309 + * images incorrectly, so that the flash has the new UBI image
4310 + * and leftovers from the old one. This feature was added
4311 + * relatively recently, and the sequence number was always
4312 + * zero, because old UBI implementations always set it to zero.
4313 + * For this reasons, we do not panic if some PEBs have zero
4314 + * sequence number, while other PEBs have non-zero sequence
4317 + image_seq = be32_to_cpu(ech->image_seq);
4318 + if (!ubi->image_seq && image_seq)
4319 + ubi->image_seq = image_seq;
4320 + if (ubi->image_seq && image_seq &&
4321 + ubi->image_seq != image_seq) {
4322 + ubi_err("bad image sequence number %d in PEB %d, "
4323 + "expected %d", image_seq, pnum, ubi->image_seq);
4324 + ubi_dbg_dump_ec_hdr(ech);
4329 /* OK, we've done with the EC header, let's look at the VID header */
4330 @@ -785,21 +942,71 @@ static int process_eb(struct ubi_device
4331 err = ubi_io_read_vid_hdr(ubi, pnum, vidh, 0);
4334 - else if (err == UBI_IO_BITFLIPS)
4338 + case UBI_IO_BITFLIPS:
4340 - else if (err == UBI_IO_BAD_VID_HDR ||
4341 - (err == UBI_IO_PEB_FREE && ec_corr)) {
4342 - /* VID header is corrupted */
4343 - err = add_to_list(si, pnum, ec, &si->corr);
4345 + case UBI_IO_BAD_HDR_EBADMSG:
4346 + if (ec_err == UBI_IO_BAD_HDR_EBADMSG)
4348 + * Both EC and VID headers are corrupted and were read
4349 + * with data integrity error, probably this is a bad
4350 + * PEB, bit it is not marked as bad yet. This may also
4351 + * be a result of power cut during erasure.
4353 + si->maybe_bad_peb_count += 1;
4354 + case UBI_IO_BAD_HDR:
4357 + * Both headers are corrupted. There is a possibility
4358 + * that this a valid UBI PEB which has corresponding
4359 + * LEB, but the headers are corrupted. However, it is
4360 + * impossible to distinguish it from a PEB which just
4361 + * contains garbage because of a power cut during erase
4362 + * operation. So we just schedule this PEB for erasure.
4364 + * Besides, in case of NOR flash, we deliberatly
4365 + * corrupt both headers because NOR flash erasure is
4366 + * slow and can start from the end.
4371 + * The EC was OK, but the VID header is corrupted. We
4372 + * have to check what is in the data area.
4374 + err = check_corruption(ubi, vidh, pnum);
4379 + /* This corruption is caused by a power cut */
4380 + err = add_to_list(si, pnum, ec, 1, &si->erase);
4382 + /* This is an unexpected corruption */
4383 + err = add_corrupted(si, pnum, ec);
4386 goto adjust_mean_ec;
4387 - } else if (err == UBI_IO_PEB_FREE) {
4388 - /* No VID header - the physical eraseblock is free */
4389 - err = add_to_list(si, pnum, ec, &si->free);
4390 + case UBI_IO_FF_BITFLIPS:
4391 + err = add_to_list(si, pnum, ec, 1, &si->erase);
4394 goto adjust_mean_ec;
4397 + err = add_to_list(si, pnum, ec, 1, &si->erase);
4399 + err = add_to_list(si, pnum, ec, 0, &si->free);
4402 + goto adjust_mean_ec;
4404 + ubi_err("'ubi_io_read_vid_hdr()' returned unknown code %d",
4409 vol_id = be32_to_cpu(vidh->vol_id);
4410 @@ -810,11 +1017,11 @@ static int process_eb(struct ubi_device
4411 switch (vidh->compat) {
4412 case UBI_COMPAT_DELETE:
4413 ubi_msg("\"delete\" compatible internal volume %d:%d"
4414 - " found, remove it", vol_id, lnum);
4415 - err = add_to_list(si, pnum, ec, &si->corr);
4416 + " found, will remove it", vol_id, lnum);
4417 + err = add_to_list(si, pnum, ec, 1, &si->erase);
4424 ubi_msg("read-only compatible internal volume %d:%d"
4425 @@ -826,10 +1033,9 @@ static int process_eb(struct ubi_device
4426 case UBI_COMPAT_PRESERVE:
4427 ubi_msg("\"preserve\" compatible internal volume %d:%d"
4428 " found", vol_id, lnum);
4429 - err = add_to_list(si, pnum, ec, &si->alien);
4430 + err = add_to_list(si, pnum, ec, 0, &si->alien);
4433 - si->alien_peb_count += 1;
4436 case UBI_COMPAT_REJECT:
4437 @@ -839,13 +1045,15 @@ static int process_eb(struct ubi_device
4441 - /* Both UBI headers seem to be fine */
4443 + ubi_warn("valid VID header but corrupted EC header at PEB %d",
4445 err = ubi_scan_add_used(ubi, si, pnum, ec, vidh, bitflips);
4454 if (ec > si->max_ec)
4455 @@ -858,6 +1066,80 @@ adjust_mean_ec:
4459 + * check_what_we_have - check what PEB were found by scanning.
4460 + * @ubi: UBI device description object
4461 + * @si: scanning information
4463 + * This is a helper function which takes a look what PEBs were found by
4464 + * scanning, and decides whether the flash is empty and should be formatted and
4465 + * whether there are too many corrupted PEBs and we should not attach this
4466 + * MTD device. Returns zero if we should proceed with attaching the MTD device,
4467 + * and %-EINVAL if we should not.
4469 +static int check_what_we_have(struct ubi_device *ubi, struct ubi_scan_info *si)
4471 + struct ubi_scan_leb *seb;
4472 + int max_corr, peb_count;
4474 + peb_count = ubi->peb_count - si->bad_peb_count - si->alien_peb_count;
4475 + max_corr = peb_count / 20 ?: 8;
4478 + * Few corrupted PEBs is not a problem and may be just a result of
4479 + * unclean reboots. However, many of them may indicate some problems
4480 + * with the flash HW or driver.
4482 + if (si->corr_peb_count) {
4483 + ubi_err("%d PEBs are corrupted and preserved",
4484 + si->corr_peb_count);
4485 + printk(KERN_ERR "Corrupted PEBs are:");
4486 + list_for_each_entry(seb, &si->corr, u.list)
4487 + printk(KERN_CONT " %d", seb->pnum);
4488 + printk(KERN_CONT "\n");
4491 + * If too many PEBs are corrupted, we refuse attaching,
4492 + * otherwise, only print a warning.
4494 + if (si->corr_peb_count >= max_corr) {
4495 + ubi_err("too many corrupted PEBs, refusing");
4500 + if (si->empty_peb_count + si->maybe_bad_peb_count == peb_count) {
4502 + * All PEBs are empty, or almost all - a couple PEBs look like
4503 + * they may be bad PEBs which were not marked as bad yet.
4505 + * This piece of code basically tries to distinguish between
4506 + * the following situations:
4508 + * 1. Flash is empty, but there are few bad PEBs, which are not
4509 + * marked as bad so far, and which were read with error. We
4510 + * want to go ahead and format this flash. While formatting,
4511 + * the faulty PEBs will probably be marked as bad.
4513 + * 2. Flash contains non-UBI data and we do not want to format
4514 + * it and destroy possibly important information.
4516 + if (si->maybe_bad_peb_count <= 2) {
4518 + ubi_msg("empty MTD device detected");
4519 + get_random_bytes(&ubi->image_seq,
4520 + sizeof(ubi->image_seq));
4522 + ubi_err("MTD device is not UBI-formatted and possibly "
4523 + "contains non-UBI data - refusing it");
4533 * ubi_scan - scan an MTD device.
4534 * @ubi: UBI device description object
4536 @@ -881,12 +1163,17 @@ struct ubi_scan_info *ubi_scan(struct ub
4537 INIT_LIST_HEAD(&si->erase);
4538 INIT_LIST_HEAD(&si->alien);
4539 si->volumes = RB_ROOT;
4543 + si->scan_leb_slab = kmem_cache_create("ubi_scan_leb_slab",
4544 + sizeof(struct ubi_scan_leb),
4546 + if (!si->scan_leb_slab)
4549 ech = kzalloc(ubi->ec_hdr_alsize, GFP_KERNEL);
4554 vidh = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL);
4556 @@ -904,15 +1191,12 @@ struct ubi_scan_info *ubi_scan(struct ub
4557 dbg_msg("scanning is finished");
4559 /* Calculate mean erase counter */
4560 - if (si->ec_count) {
4561 - do_div(si->ec_sum, si->ec_count);
4562 - si->mean_ec = si->ec_sum;
4565 + si->mean_ec = div_u64(si->ec_sum, si->ec_count);
4568 - ubi_msg("empty MTD device detected");
4570 - ubi->image_seq_set = 1;
4571 + err = check_what_we_have(ubi, si);
4576 * In case of unknown erase counter we use the mean erase counter
4577 @@ -938,11 +1222,8 @@ struct ubi_scan_info *ubi_scan(struct ub
4578 seb->ec = si->mean_ec;
4580 err = paranoid_check_si(ubi, si);
4588 ubi_free_vid_hdr(ubi, vidh);
4590 @@ -953,6 +1234,8 @@ out_vidh:
4591 ubi_free_vid_hdr(ubi, vidh);
4595 + kmem_cache_destroy(si->scan_leb_slab);
4597 ubi_scan_destroy_si(si);
4598 return ERR_PTR(err);
4599 @@ -961,11 +1244,12 @@ out_si:
4601 * destroy_sv - free the scanning volume information
4602 * @sv: scanning volume information
4603 + * @si: scanning information
4605 * This function destroys the volume RB-tree (@sv->root) and the scanning
4606 * volume information.
4608 -static void destroy_sv(struct ubi_scan_volume *sv)
4609 +static void destroy_sv(struct ubi_scan_info *si, struct ubi_scan_volume *sv)
4611 struct ubi_scan_leb *seb;
4612 struct rb_node *this = sv->root.rb_node;
4613 @@ -985,7 +1269,7 @@ static void destroy_sv(struct ubi_scan_v
4614 this->rb_right = NULL;
4618 + kmem_cache_free(si->scan_leb_slab, seb);
4622 @@ -1003,19 +1287,19 @@ void ubi_scan_destroy_si(struct ubi_scan
4624 list_for_each_entry_safe(seb, seb_tmp, &si->alien, u.list) {
4625 list_del(&seb->u.list);
4627 + kmem_cache_free(si->scan_leb_slab, seb);
4629 list_for_each_entry_safe(seb, seb_tmp, &si->erase, u.list) {
4630 list_del(&seb->u.list);
4632 + kmem_cache_free(si->scan_leb_slab, seb);
4634 list_for_each_entry_safe(seb, seb_tmp, &si->corr, u.list) {
4635 list_del(&seb->u.list);
4637 + kmem_cache_free(si->scan_leb_slab, seb);
4639 list_for_each_entry_safe(seb, seb_tmp, &si->free, u.list) {
4640 list_del(&seb->u.list);
4642 + kmem_cache_free(si->scan_leb_slab, seb);
4645 /* Destroy the volume RB-tree */
4646 @@ -1036,22 +1320,23 @@ void ubi_scan_destroy_si(struct ubi_scan
4647 rb->rb_right = NULL;
4651 + destroy_sv(si, sv);
4655 + kmem_cache_destroy(si->scan_leb_slab);
4659 -#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
4660 +#ifdef CONFIG_MTD_UBI_DEBUG
4663 * paranoid_check_si - check the scanning information.
4664 * @ubi: UBI device description object
4665 * @si: scanning information
4667 - * This function returns zero if the scanning information is all right, %1 if
4668 - * not and a negative error code if an error occurred.
4669 + * This function returns zero if the scanning information is all right, and a
4670 + * negative error code if not or if an error occurred.
4672 static int paranoid_check_si(struct ubi_device *ubi, struct ubi_scan_info *si)
4674 @@ -1061,6 +1346,9 @@ static int paranoid_check_si(struct ubi_
4675 struct ubi_scan_leb *seb, *last_seb;
4678 + if (!(ubi_chk_flags & UBI_CHK_GEN))
4682 * At first, check that scanning information is OK.
4684 @@ -1310,7 +1598,7 @@ bad_vid_hdr:
4687 ubi_dbg_dump_stack();
4692 -#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */
4693 +#endif /* CONFIG_MTD_UBI_DEBUG */
4694 diff -uprN linux-2.6.28/drivers/mtd/ubi/scan.h ubifs-v2.6.28/drivers/mtd/ubi/scan.h
4695 --- linux-2.6.28/drivers/mtd/ubi/scan.h 2008-12-24 18:26:37.000000000 -0500
4696 +++ ubifs-v2.6.28/drivers/mtd/ubi/scan.h 2011-06-15 14:22:07.000000000 -0400
4698 * @pnum: physical eraseblock number
4699 * @lnum: logical eraseblock number
4700 * @scrub: if this physical eraseblock needs scrubbing
4701 + * @copy_flag: this LEB is a copy (@copy_flag is set in VID header of this LEB)
4702 * @sqnum: sequence number
4703 * @u: unions RB-tree or @list links
4704 * @u.rb: link in the per-volume RB-tree of &struct ubi_scan_leb objects
4705 @@ -42,7 +43,8 @@ struct ubi_scan_leb {
4710 + unsigned int scrub:1;
4711 + unsigned int copy_flag:1;
4712 unsigned long long sqnum;
4715 @@ -91,10 +93,15 @@ struct ubi_scan_volume {
4716 * @erase: list of physical eraseblocks which have to be erased
4717 * @alien: list of physical eraseblocks which should not be used by UBI (e.g.,
4718 * those belonging to "preserve"-compatible internal volumes)
4719 + * @corr_peb_count: count of PEBs in the @corr list
4720 + * @empty_peb_count: count of PEBs which are presumably empty (contain only
4722 + * @alien_peb_count: count of PEBs in the @alien list
4723 * @bad_peb_count: count of bad physical eraseblocks
4724 + * @maybe_bad_peb_count: count of bad physical eraseblocks which are not marked
4725 + * as bad yet, but which look like bad
4726 * @vols_found: number of volumes found during scanning
4727 * @highest_vol_id: highest volume ID
4728 - * @alien_peb_count: count of physical eraseblocks in the @alien list
4729 * @is_empty: flag indicating whether the MTD device is empty or not
4730 * @min_ec: lowest erase counter value
4731 * @max_ec: highest erase counter value
4732 @@ -102,6 +109,7 @@ struct ubi_scan_volume {
4733 * @mean_ec: mean erase counter value
4734 * @ec_sum: a temporary variable used when calculating @mean_ec
4735 * @ec_count: a temporary variable used when calculating @mean_ec
4736 + * @scan_leb_slab: slab cache for &struct ubi_scan_leb objects
4738 * This data structure contains the result of scanning and may be used by other
4739 * UBI sub-systems to build final UBI data structures, further error-recovery
4740 @@ -113,10 +121,13 @@ struct ubi_scan_info {
4741 struct list_head free;
4742 struct list_head erase;
4743 struct list_head alien;
4744 + int corr_peb_count;
4745 + int empty_peb_count;
4746 + int alien_peb_count;
4748 + int maybe_bad_peb_count;
4751 - int alien_peb_count;
4755 @@ -124,6 +135,7 @@ struct ubi_scan_info {
4759 + struct kmem_cache *scan_leb_slab;
4763 @@ -133,7 +145,7 @@ struct ubi_vid_hdr;
4764 * ubi_scan_move_to_list - move a PEB from the volume tree to a list.
4766 * @sv: volume scanning information
4767 - * @seb: scanning eraseblock infprmation
4768 + * @seb: scanning eraseblock information
4769 * @list: the list to move to
4771 static inline void ubi_scan_move_to_list(struct ubi_scan_volume *sv,
4772 diff -uprN linux-2.6.28/drivers/mtd/ubi/ubi.h ubifs-v2.6.28/drivers/mtd/ubi/ubi.h
4773 --- linux-2.6.28/drivers/mtd/ubi/ubi.h 2011-06-15 15:12:27.000000000 -0400
4774 +++ ubifs-v2.6.28/drivers/mtd/ubi/ubi.h 2011-06-15 14:22:07.000000000 -0400
4776 #include <linux/device.h>
4777 #include <linux/string.h>
4778 #include <linux/vmalloc.h>
4779 +#include <linux/notifier.h>
4780 #include <linux/mtd/mtd.h>
4781 #include <linux/mtd/ubi.h>
4782 +#include <asm/pgtable.h>
4784 #include "ubi-media.h"
4788 * Error codes returned by the I/O sub-system.
4790 - * UBI_IO_PEB_EMPTY: the physical eraseblock is empty, i.e. it contains only
4792 - * UBI_IO_PEB_FREE: the physical eraseblock is free, i.e. it contains only a
4793 - * valid erase counter header, and the rest are %0xFF bytes
4794 - * UBI_IO_BAD_EC_HDR: the erase counter header is corrupted (bad magic or CRC)
4795 - * UBI_IO_BAD_VID_HDR: the volume identifier header is corrupted (bad magic or
4797 + * UBI_IO_FF: the read region of flash contains only 0xFFs
4798 + * UBI_IO_FF_BITFLIPS: the same as %UBI_IO_FF, but also also there was a data
4799 + * integrity error reported by the MTD driver
4800 + * (uncorrectable ECC error in case of NAND)
4801 + * UBI_IO_BAD_HDR: the EC or VID header is corrupted (bad magic or CRC)
4802 + * UBI_IO_BAD_HDR_EBADMSG: the same as %UBI_IO_BAD_HDR, but also there was a
4803 + * data integrity error reported by the MTD driver
4804 + * (uncorrectable ECC error in case of NAND)
4805 * UBI_IO_BITFLIPS: bit-flips were detected and corrected
4807 + * Note, it is probably better to have bit-flip and ebadmsg as flags which can
4808 + * be or'ed with other error code. But this is a big change because there are
4809 + * may callers, so it does not worth the risk of introducing a bug
4812 - UBI_IO_PEB_EMPTY = 1,
4814 - UBI_IO_BAD_EC_HDR,
4815 - UBI_IO_BAD_VID_HDR,
4818 + UBI_IO_FF_BITFLIPS,
4820 + UBI_IO_BAD_HDR_EBADMSG,
4825 @@ -228,10 +235,7 @@ struct ubi_volume_desc;
4826 * @upd_marker: %1 if the update marker is set for this volume
4827 * @updating: %1 if the volume is being updated
4828 * @changing_leb: %1 if the atomic LEB change ioctl command is in progress
4830 - * @gluebi_desc: gluebi UBI volume descriptor
4831 - * @gluebi_refcount: reference count of the gluebi MTD device
4832 - * @gluebi_mtd: MTD device description object of the gluebi MTD device
4833 + * @direct_writes: %1 if direct writes are enabled for this volume
4835 * The @corrupted field indicates that the volume's contents is corrupted.
4836 * Since UBI protects only static volumes, this field is not relevant to
4837 @@ -275,17 +279,7 @@ struct ubi_volume {
4838 unsigned int upd_marker:1;
4839 unsigned int updating:1;
4840 unsigned int changing_leb:1;
4842 -#ifdef CONFIG_MTD_UBI_GLUEBI
4844 - * Gluebi-related stuff may be compiled out.
4845 - * Note: this should not be built into UBI but should be a separate
4846 - * ubimtd driver which works on top of UBI and emulates MTD devices.
4848 - struct ubi_volume_desc *gluebi_desc;
4849 - int gluebi_refcount;
4850 - struct mtd_info gluebi_mtd;
4852 + unsigned int direct_writes:1;
4856 @@ -314,7 +308,6 @@ struct ubi_wl_entry;
4857 * @vol->ref_count, @vol->mapping and @vol->eba_tbl.
4858 * @ref_count: count of references on the UBI device
4859 * @image_seq: image sequence number recorded on EC headers
4860 - * @image_seq_set: indicates @image_seq is known
4862 * @rsvd_pebs: count of reserved physical eraseblocks
4863 * @avail_pebs: count of available physical eraseblocks
4864 @@ -327,8 +320,9 @@ struct ubi_wl_entry;
4865 * @vtbl_slots: how many slots are available in the volume table
4866 * @vtbl_size: size of the volume table in bytes
4867 * @vtbl: in-RAM volume table copy
4868 - * @volumes_mutex: protects on-flash volume table and serializes volume
4869 - * changes, like creation, deletion, update, re-size and re-name
4870 + * @device_mutex: protects on-flash volume table and serializes volume
4871 + * creation, deletion, update, re-size, re-name and set
4874 * @max_ec: current highest erase counter value
4875 * @mean_ec: current mean erase counter value
4876 @@ -346,8 +340,8 @@ struct ubi_wl_entry;
4877 * protected from the wear-leveling worker)
4878 * @pq_head: protection queue head
4879 * @wl_lock: protects the @used, @free, @pq, @pq_head, @lookuptbl, @move_from,
4880 - * @move_to, @move_to_put @erase_pending, @wl_scheduled, @works,
4881 - * @erroneous, and @erroneous_peb_count fields
4882 + * @move_to, @move_to_put @erase_pending, @wl_scheduled, @works,
4883 + * @erroneous, and @erroneous_peb_count fields
4884 * @move_mutex: serializes eraseblock moves
4885 * @work_sem: synchronizes the WL worker with use tasks
4886 * @wl_scheduled: non-zero if the wear-leveling was scheduled
4887 @@ -367,6 +361,8 @@ struct ubi_wl_entry;
4888 * @peb_size: physical eraseblock size
4889 * @bad_peb_count: count of bad physical eraseblocks
4890 * @good_peb_count: count of good physical eraseblocks
4891 + * @corr_peb_count: count of corrupted physical eraseblocks (preserved and not
4893 * @erroneous_peb_count: count of erroneous physical eraseblocks in @erroneous
4894 * @max_erroneous: maximum allowed amount of erroneous physical eraseblocks
4895 * @min_io_size: minimal input/output unit size of the underlying MTD device
4896 @@ -384,15 +380,15 @@ struct ubi_wl_entry;
4897 * @vid_hdr_shift: contains @vid_hdr_offset - @vid_hdr_aloffset
4898 * @bad_allowed: whether the MTD device admits of bad physical eraseblocks or
4900 + * @nor_flash: non-zero if working on top of NOR flash
4901 + * @max_write_size: maximum amount of bytes the underlying flash can write at a
4902 + * time (MTD write buffer size)
4903 * @mtd: MTD device descriptor
4905 * @peb_buf1: a buffer of PEB size used for different purposes
4906 * @peb_buf2: another buffer of PEB size used for different purposes
4907 * @buf_mutex: protects @peb_buf1 and @peb_buf2
4908 * @ckvol_mutex: serializes static volume checking when opening
4909 - * @mult_mutex: serializes operations on multiple volumes, like re-naming
4910 - * @dbg_peb_buf: buffer of PEB size used for debugging
4911 - * @dbg_buf_mutex: protects @dbg_peb_buf
4915 @@ -404,7 +400,6 @@ struct ubi_device {
4916 spinlock_t volumes_lock;
4919 - int image_seq_set;
4923 @@ -415,7 +410,7 @@ struct ubi_device {
4926 struct ubi_vtbl_record *vtbl;
4927 - struct mutex volumes_mutex;
4928 + struct mutex device_mutex;
4931 /* Note, mean_ec is not updated run-time - should be fixed */
4932 @@ -454,6 +449,7 @@ struct ubi_device {
4936 + int corr_peb_count;
4937 int erroneous_peb_count;
4940 @@ -466,26 +462,24 @@ struct ubi_device {
4942 int vid_hdr_aloffset;
4945 + unsigned int bad_allowed:1;
4946 + unsigned int nor_flash:1;
4947 + int max_write_size;
4948 struct mtd_info *mtd;
4952 struct mutex buf_mutex;
4953 struct mutex ckvol_mutex;
4954 - struct mutex mult_mutex;
4955 -#ifdef CONFIG_MTD_UBI_DEBUG
4956 - void *dbg_peb_buf;
4957 - struct mutex dbg_buf_mutex;
4961 extern struct kmem_cache *ubi_wl_entry_slab;
4962 -extern struct file_operations ubi_ctrl_cdev_operations;
4963 -extern struct file_operations ubi_cdev_operations;
4964 -extern struct file_operations ubi_vol_cdev_operations;
4965 +extern const struct file_operations ubi_ctrl_cdev_operations;
4966 +extern const struct file_operations ubi_cdev_operations;
4967 +extern const struct file_operations ubi_vol_cdev_operations;
4968 extern struct class *ubi_class;
4969 extern struct mutex ubi_devices_mutex;
4970 +extern struct blocking_notifier_head ubi_notifiers;
4973 int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
4974 @@ -517,17 +511,7 @@ int ubi_calc_data_len(const struct ubi_d
4976 int ubi_check_volume(struct ubi_device *ubi, int vol_id);
4977 void ubi_calculate_reserved(struct ubi_device *ubi);
4980 -#ifdef CONFIG_MTD_UBI_GLUEBI
4981 -int ubi_create_gluebi(struct ubi_device *ubi, struct ubi_volume *vol);
4982 -int ubi_destroy_gluebi(struct ubi_volume *vol);
4983 -void ubi_gluebi_updated(struct ubi_volume *vol);
4985 -#define ubi_create_gluebi(ubi, vol) 0
4986 -#define ubi_destroy_gluebi(vol) 0
4987 -#define ubi_gluebi_updated(vol)
4989 +int ubi_check_pattern(const void *buf, uint8_t patt, int size);
4992 int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol,
4993 @@ -578,6 +562,16 @@ struct ubi_device *ubi_get_device(int ub
4994 void ubi_put_device(struct ubi_device *ubi);
4995 struct ubi_device *ubi_get_by_major(int major);
4996 int ubi_major2num(int major);
4997 +int ubi_volume_notify(struct ubi_device *ubi, struct ubi_volume *vol,
4999 +int ubi_notify_all(struct ubi_device *ubi, int ntype,
5000 + struct notifier_block *nb);
5001 +int ubi_enumerate_volumes(struct notifier_block *nb);
5004 +void ubi_do_get_device_info(struct ubi_device *ubi, struct ubi_device_info *di);
5005 +void ubi_do_get_volume_info(struct ubi_device *ubi, struct ubi_volume *vol,
5006 + struct ubi_volume_info *vi);
5009 * ubi_rb_for_each_entry - walk an RB-tree.
5010 @@ -590,7 +584,8 @@ int ubi_major2num(int major);
5011 for (rb = rb_first(root), \
5012 pos = (rb ? container_of(rb, typeof(*pos), member) : NULL); \
5014 - rb = rb_next(rb), pos = container_of(rb, typeof(*pos), member))
5015 + rb = rb_next(rb), \
5016 + pos = (rb ? container_of(rb, typeof(*pos), member) : NULL))
5019 * ubi_zalloc_vid_hdr - allocate a volume identifier header object.
5020 diff -uprN linux-2.6.28/drivers/mtd/ubi/ubi-media.h ubifs-v2.6.28/drivers/mtd/ubi/ubi-media.h
5021 --- linux-2.6.28/drivers/mtd/ubi/ubi-media.h 2011-06-15 15:12:27.000000000 -0400
5022 +++ ubifs-v2.6.28/drivers/mtd/ubi/ubi-media.h 2011-06-15 14:22:07.000000000 -0400
5023 @@ -136,7 +136,7 @@ enum {
5024 * The erase counter header takes 64 bytes and has a plenty of unused space for
5025 * future usage. The unused fields are zeroed. The @version field is used to
5026 * indicate the version of UBI implementation which is supposed to be able to
5027 - * work with this UBI image. If @version is greater then the current UBI
5028 + * work with this UBI image. If @version is greater than the current UBI
5029 * version, the image is rejected. This may be useful in future if something
5030 * is changed radically. This field is duplicated in the volume identifier
5032 @@ -164,7 +164,7 @@ struct ubi_ec_hdr {
5036 -} __attribute__ ((packed));
5040 * struct ubi_vid_hdr - on-flash UBI volume identifier header.
5041 @@ -197,7 +197,7 @@ struct ubi_ec_hdr {
5042 * (sequence number) is used to distinguish between older and newer versions of
5043 * logical eraseblocks.
5045 - * There are 2 situations when there may be more then one physical eraseblock
5046 + * There are 2 situations when there may be more than one physical eraseblock
5047 * corresponding to the same logical eraseblock, i.e., having the same @vol_id
5048 * and @lnum values in the volume identifier header. Suppose we have a logical
5049 * eraseblock L and it is mapped to the physical eraseblock P.
5050 @@ -292,7 +292,7 @@ struct ubi_vid_hdr {
5054 -} __attribute__ ((packed));
5057 /* Internal UBI volumes count */
5058 #define UBI_INT_VOL_COUNT 1
5059 @@ -373,6 +373,6 @@ struct ubi_vtbl_record {
5063 -} __attribute__ ((packed));
5066 #endif /* !__UBI_MEDIA_H__ */
5067 diff -uprN linux-2.6.28/drivers/mtd/ubi/upd.c ubifs-v2.6.28/drivers/mtd/ubi/upd.c
5068 --- linux-2.6.28/drivers/mtd/ubi/upd.c 2008-12-24 18:26:37.000000000 -0500
5069 +++ ubifs-v2.6.28/drivers/mtd/ubi/upd.c 2011-06-15 14:22:07.000000000 -0400
5072 #include <linux/err.h>
5073 #include <linux/uaccess.h>
5074 -#include <asm/div64.h>
5075 +#include <linux/math64.h>
5079 @@ -68,10 +68,10 @@ static int set_update_marker(struct ubi_
5080 sizeof(struct ubi_vtbl_record));
5081 vtbl_rec.upd_marker = 1;
5083 - mutex_lock(&ubi->volumes_mutex);
5084 + mutex_lock(&ubi->device_mutex);
5085 err = ubi_change_vtbl_record(ubi, vol->vol_id, &vtbl_rec);
5086 - mutex_unlock(&ubi->volumes_mutex);
5087 vol->upd_marker = 1;
5088 + mutex_unlock(&ubi->device_mutex);
5092 @@ -89,7 +89,6 @@ static int clear_update_marker(struct ub
5097 struct ubi_vtbl_record vtbl_rec;
5099 dbg_gen("clear update marker for volume %d", vol->vol_id);
5100 @@ -101,19 +100,19 @@ static int clear_update_marker(struct ub
5102 if (vol->vol_type == UBI_STATIC_VOLUME) {
5104 - vol->used_bytes = tmp = bytes;
5105 - vol->last_eb_bytes = do_div(tmp, vol->usable_leb_size);
5106 - vol->used_ebs = tmp;
5107 + vol->used_bytes = bytes;
5108 + vol->used_ebs = div_u64_rem(bytes, vol->usable_leb_size,
5109 + &vol->last_eb_bytes);
5110 if (vol->last_eb_bytes)
5113 vol->last_eb_bytes = vol->usable_leb_size;
5116 - mutex_lock(&ubi->volumes_mutex);
5117 + mutex_lock(&ubi->device_mutex);
5118 err = ubi_change_vtbl_record(ubi, vol->vol_id, &vtbl_rec);
5119 - mutex_unlock(&ubi->volumes_mutex);
5120 vol->upd_marker = 0;
5121 + mutex_unlock(&ubi->device_mutex);
5125 @@ -131,7 +130,6 @@ int ubi_start_update(struct ubi_device *
5131 dbg_gen("start update of volume %d, %llu bytes", vol->vol_id, bytes);
5132 ubi_assert(!vol->updating && !vol->changing_leb);
5133 @@ -149,21 +147,23 @@ int ubi_start_update(struct ubi_device *
5137 + err = ubi_wl_flush(ubi);
5141 err = clear_update_marker(ubi, vol, 0);
5144 - err = ubi_wl_flush(ubi);
5146 - vol->updating = 0;
5147 + vol->updating = 0;
5151 vol->upd_buf = vmalloc(ubi->leb_size);
5156 - vol->upd_ebs = !!do_div(tmp, vol->usable_leb_size);
5157 - vol->upd_ebs += tmp;
5158 + vol->upd_ebs = div_u64(bytes + vol->usable_leb_size - 1,
5159 + vol->usable_leb_size);
5160 vol->upd_bytes = bytes;
5161 vol->upd_received = 0;
5163 @@ -282,7 +282,6 @@ static int write_leb(struct ubi_device *
5164 int ubi_more_update_data(struct ubi_device *ubi, struct ubi_volume *vol,
5165 const void __user *buf, int count)
5168 int lnum, offs, err = 0, len, to_write = count;
5170 dbg_gen("write %d of %lld bytes, %lld already passed",
5171 @@ -291,10 +290,7 @@ int ubi_more_update_data(struct ubi_devi
5175 - tmp = vol->upd_received;
5176 - offs = do_div(tmp, vol->usable_leb_size);
5179 + lnum = div_u64_rem(vol->upd_received, vol->usable_leb_size, &offs);
5180 if (vol->upd_received + count > vol->upd_bytes)
5181 to_write = count = vol->upd_bytes - vol->upd_received;
5183 @@ -369,16 +365,16 @@ int ubi_more_update_data(struct ubi_devi
5185 ubi_assert(vol->upd_received <= vol->upd_bytes);
5186 if (vol->upd_received == vol->upd_bytes) {
5187 + err = ubi_wl_flush(ubi);
5190 /* The update is finished, clear the update marker */
5191 err = clear_update_marker(ubi, vol, vol->upd_bytes);
5194 - err = ubi_wl_flush(ubi);
5196 - vol->updating = 0;
5198 - vfree(vol->upd_buf);
5200 + vol->updating = 0;
5202 + vfree(vol->upd_buf);
5206 diff -uprN linux-2.6.28/drivers/mtd/ubi/vmt.c ubifs-v2.6.28/drivers/mtd/ubi/vmt.c
5207 --- linux-2.6.28/drivers/mtd/ubi/vmt.c 2008-12-24 18:26:37.000000000 -0500
5208 +++ ubifs-v2.6.28/drivers/mtd/ubi/vmt.c 2011-06-15 14:22:07.000000000 -0400
5212 #include <linux/err.h>
5213 -#include <asm/div64.h>
5214 +#include <linux/math64.h>
5217 -#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
5218 +#ifdef CONFIG_MTD_UBI_DEBUG
5219 static int paranoid_check_volumes(struct ubi_device *ubi);
5221 #define paranoid_check_volumes(ubi) 0
5222 @@ -198,14 +198,13 @@ static void volume_sysfs_close(struct ub
5223 * %UBI_VOL_NUM_AUTO, this function automatically assign ID to the new volume
5224 * and saves it in @req->vol_id. Returns zero in case of success and a negative
5225 * error code in case of failure. Note, the caller has to have the
5226 - * @ubi->volumes_mutex locked.
5227 + * @ubi->device_mutex locked.
5229 int ubi_create_volume(struct ubi_device *ubi, struct ubi_mkvol_req *req)
5231 int i, err, vol_id = req->vol_id, do_free = 1;
5232 struct ubi_volume *vol;
5233 struct ubi_vtbl_record vtbl_rec;
5238 @@ -233,8 +232,8 @@ int ubi_create_volume(struct ubi_device
5239 req->vol_id = vol_id;
5242 - dbg_gen("volume ID %d, %llu bytes, type %d, name %s",
5243 - vol_id, (unsigned long long)req->bytes,
5244 + dbg_gen("create device %d, volume %d, %llu bytes, type %d, name %s",
5245 + ubi->ubi_num, vol_id, (unsigned long long)req->bytes,
5246 (int)req->vol_type, req->name);
5248 /* Ensure that this volume does not exist */
5249 @@ -255,14 +254,15 @@ int ubi_create_volume(struct ubi_device
5251 /* Calculate how many eraseblocks are requested */
5252 vol->usable_leb_size = ubi->leb_size - ubi->leb_size % req->alignment;
5253 - bytes = req->bytes;
5254 - if (do_div(bytes, vol->usable_leb_size))
5255 - vol->reserved_pebs = 1;
5256 - vol->reserved_pebs += bytes;
5257 + vol->reserved_pebs += div_u64(req->bytes + vol->usable_leb_size - 1,
5258 + vol->usable_leb_size);
5260 /* Reserve physical eraseblocks */
5261 if (vol->reserved_pebs > ubi->avail_pebs) {
5262 dbg_err("not enough PEBs, only %d available", ubi->avail_pebs);
5263 + if (ubi->corr_peb_count)
5264 + dbg_err("%d PEBs are corrupted and not used",
5265 + ubi->corr_peb_count);
5269 @@ -301,10 +301,10 @@ int ubi_create_volume(struct ubi_device
5271 (long long)vol->used_ebs * vol->usable_leb_size;
5273 - bytes = vol->used_bytes;
5274 - vol->last_eb_bytes = do_div(bytes, vol->usable_leb_size);
5275 - vol->used_ebs = bytes;
5276 - if (vol->last_eb_bytes)
5277 + vol->used_ebs = div_u64_rem(vol->used_bytes,
5278 + vol->usable_leb_size,
5279 + &vol->last_eb_bytes);
5280 + if (vol->last_eb_bytes != 0)
5283 vol->last_eb_bytes = vol->usable_leb_size;
5284 @@ -320,10 +320,6 @@ int ubi_create_volume(struct ubi_device
5288 - err = ubi_create_gluebi(ubi, vol);
5292 vol->dev.release = vol_release;
5293 vol->dev.parent = &ubi->dev;
5294 vol->dev.devt = dev;
5295 @@ -333,7 +329,7 @@ int ubi_create_volume(struct ubi_device
5296 err = device_register(&vol->dev);
5298 ubi_err("cannot register device");
5303 err = volume_sysfs_init(ubi, vol);
5304 @@ -361,7 +357,9 @@ int ubi_create_volume(struct ubi_device
5305 ubi->vol_count += 1;
5306 spin_unlock(&ubi->volumes_lock);
5308 - err = paranoid_check_volumes(ubi);
5309 + ubi_volume_notify(ubi, vol, UBI_VOLUME_ADDED);
5310 + if (paranoid_check_volumes(ubi))
5311 + dbg_err("check failed while creating volume %d", vol_id);
5315 @@ -376,10 +374,6 @@ out_sysfs:
5317 get_device(&vol->dev);
5318 volume_sysfs_close(vol);
5320 - if (ubi_destroy_gluebi(vol))
5321 - dbg_err("cannot destroy gluebi for volume %d:%d",
5322 - ubi->ubi_num, vol_id);
5324 cdev_del(&vol->cdev);
5326 @@ -406,7 +400,7 @@ out_unlock:
5328 * This function removes volume described by @desc. The volume has to be opened
5329 * in "exclusive" mode. Returns zero in case of success and a negative error
5330 - * code in case of failure. The caller has to have the @ubi->volumes_mutex
5331 + * code in case of failure. The caller has to have the @ubi->device_mutex
5334 int ubi_remove_volume(struct ubi_volume_desc *desc, int no_vtbl)
5335 @@ -415,7 +409,7 @@ int ubi_remove_volume(struct ubi_volume_
5336 struct ubi_device *ubi = vol->ubi;
5337 int i, err, vol_id = vol->vol_id, reserved_pebs = vol->reserved_pebs;
5339 - dbg_gen("remove UBI volume %d", vol_id);
5340 + dbg_gen("remove device %d, volume %d", ubi->ubi_num, vol_id);
5341 ubi_assert(desc->mode == UBI_EXCLUSIVE);
5342 ubi_assert(vol == ubi->volumes[vol_id]);
5344 @@ -434,10 +428,6 @@ int ubi_remove_volume(struct ubi_volume_
5345 ubi->volumes[vol_id] = NULL;
5346 spin_unlock(&ubi->volumes_lock);
5348 - err = ubi_destroy_gluebi(vol);
5353 err = ubi_change_vtbl_record(ubi, vol_id, NULL);
5355 @@ -468,8 +458,10 @@ int ubi_remove_volume(struct ubi_volume_
5356 ubi->vol_count -= 1;
5357 spin_unlock(&ubi->volumes_lock);
5360 - err = paranoid_check_volumes(ubi);
5361 + ubi_volume_notify(ubi, vol, UBI_VOLUME_REMOVED);
5362 + if (!no_vtbl && paranoid_check_volumes(ubi))
5363 + dbg_err("check failed while removing volume %d", vol_id);
5368 @@ -488,7 +480,7 @@ out_unlock:
5370 * This function re-sizes the volume and returns zero in case of success, and a
5371 * negative error code in case of failure. The caller has to have the
5372 - * @ubi->volumes_mutex locked.
5373 + * @ubi->device_mutex locked.
5375 int ubi_resize_volume(struct ubi_volume_desc *desc, int reserved_pebs)
5377 @@ -501,8 +493,8 @@ int ubi_resize_volume(struct ubi_volume_
5381 - dbg_gen("re-size volume %d to from %d to %d PEBs",
5382 - vol_id, vol->reserved_pebs, reserved_pebs);
5383 + dbg_gen("re-size device %d, volume %d to from %d to %d PEBs",
5384 + ubi->ubi_num, vol_id, vol->reserved_pebs, reserved_pebs);
5386 if (vol->vol_type == UBI_STATIC_VOLUME &&
5387 reserved_pebs < vol->used_ebs) {
5388 @@ -537,6 +529,9 @@ int ubi_resize_volume(struct ubi_volume_
5389 if (pebs > ubi->avail_pebs) {
5390 dbg_err("not enough PEBs: requested %d, available %d",
5391 pebs, ubi->avail_pebs);
5392 + if (ubi->corr_peb_count)
5393 + dbg_err("%d PEBs are corrupted and not used",
5394 + ubi->corr_peb_count);
5395 spin_unlock(&ubi->volumes_lock);
5398 @@ -590,7 +585,9 @@ int ubi_resize_volume(struct ubi_volume_
5399 (long long)vol->used_ebs * vol->usable_leb_size;
5402 - err = paranoid_check_volumes(ubi);
5403 + ubi_volume_notify(ubi, vol, UBI_VOLUME_RESIZED);
5404 + if (paranoid_check_volumes(ubi))
5405 + dbg_err("check failed while re-sizing volume %d", vol_id);
5409 @@ -635,11 +632,12 @@ int ubi_rename_volumes(struct ubi_device
5410 vol->name_len = re->new_name_len;
5411 memcpy(vol->name, re->new_name, re->new_name_len + 1);
5412 spin_unlock(&ubi->volumes_lock);
5413 + ubi_volume_notify(ubi, vol, UBI_VOLUME_RENAMED);
5418 - err = paranoid_check_volumes(ubi);
5419 + if (!err && paranoid_check_volumes(ubi))
5424 @@ -670,10 +668,6 @@ int ubi_add_volume(struct ubi_device *ub
5428 - err = ubi_create_gluebi(ubi, vol);
5432 vol->dev.release = vol_release;
5433 vol->dev.parent = &ubi->dev;
5434 vol->dev.devt = dev;
5435 @@ -681,21 +675,19 @@ int ubi_add_volume(struct ubi_device *ub
5436 sprintf(&vol->dev.bus_id[0], "%s_%d", ubi->ubi_name, vol->vol_id);
5437 err = device_register(&vol->dev);
5442 err = volume_sysfs_init(ubi, vol);
5444 cdev_del(&vol->cdev);
5445 - err = ubi_destroy_gluebi(vol);
5446 volume_sysfs_close(vol);
5450 - err = paranoid_check_volumes(ubi);
5451 + if (paranoid_check_volumes(ubi))
5452 + dbg_err("check failed while adding volume %d", vol_id);
5456 - err = ubi_destroy_gluebi(vol);
5458 cdev_del(&vol->cdev);
5460 @@ -711,17 +703,14 @@ out_cdev:
5462 void ubi_free_volume(struct ubi_device *ubi, struct ubi_volume *vol)
5466 dbg_gen("free volume %d", vol->vol_id);
5468 ubi->volumes[vol->vol_id] = NULL;
5469 - err = ubi_destroy_gluebi(vol);
5470 cdev_del(&vol->cdev);
5471 volume_sysfs_close(vol);
5474 -#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
5475 +#ifdef CONFIG_MTD_UBI_DEBUG
5478 * paranoid_check_volume - check volume information.
5479 @@ -800,11 +789,6 @@ static int paranoid_check_volume(struct
5484 - ubi_err("NULL volume name");
5488 n = strnlen(vol->name, vol->name_len + 1);
5489 if (n != vol->name_len) {
5490 ubi_err("bad name_len %lld", n);
5491 @@ -871,6 +855,7 @@ fail:
5493 ubi_dbg_dump_vol_info(vol);
5494 ubi_dbg_dump_vtbl_record(&ubi->vtbl[vol_id], vol_id);
5496 spin_unlock(&ubi->volumes_lock);
5499 @@ -885,6 +870,9 @@ static int paranoid_check_volumes(struct
5503 + if (!(ubi_chk_flags & UBI_CHK_GEN))
5506 for (i = 0; i < ubi->vtbl_slots; i++) {
5507 err = paranoid_check_volume(ubi, i);
5509 diff -uprN linux-2.6.28/drivers/mtd/ubi/vtbl.c ubifs-v2.6.28/drivers/mtd/ubi/vtbl.c
5510 --- linux-2.6.28/drivers/mtd/ubi/vtbl.c 2008-12-24 18:26:37.000000000 -0500
5511 +++ ubifs-v2.6.28/drivers/mtd/ubi/vtbl.c 2011-06-15 14:22:07.000000000 -0400
5513 #include <asm/div64.h>
5516 -#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
5517 +#ifdef CONFIG_MTD_UBI_DEBUG
5518 static void paranoid_vtbl_check(const struct ubi_device *ubi);
5520 #define paranoid_vtbl_check(ubi)
5521 @@ -365,7 +365,7 @@ write_error:
5522 * Probably this physical eraseblock went bad, try to pick
5525 - list_add_tail(&new_seb->u.list, &si->corr);
5526 + list_add(&new_seb->u.list, &si->erase);
5530 @@ -413,7 +413,7 @@ static struct ubi_vtbl_record *process_l
5531 * 0 contains more recent information.
5533 * So the plan is to first check LEB 0. Then
5534 - * a. if LEB 0 is OK, it must be containing the most resent data; then
5535 + * a. if LEB 0 is OK, it must be containing the most recent data; then
5536 * we compare it with LEB 1, and if they are different, we copy LEB
5538 * b. if LEB 0 is corrupted, but LEB 1 has to be OK, and we copy LEB 1
5539 @@ -566,6 +566,7 @@ static int init_volumes(struct ubi_devic
5540 vol->reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs);
5541 vol->alignment = be32_to_cpu(vtbl[i].alignment);
5542 vol->data_pad = be32_to_cpu(vtbl[i].data_pad);
5543 + vol->upd_marker = vtbl[i].upd_marker;
5544 vol->vol_type = vtbl[i].vol_type == UBI_VID_DYNAMIC ?
5545 UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME;
5546 vol->name_len = be16_to_cpu(vtbl[i].name_len);
5547 @@ -577,7 +578,7 @@ static int init_volumes(struct ubi_devic
5548 if (vtbl[i].flags & UBI_VTBL_AUTORESIZE_FLG) {
5549 /* Auto re-size flag may be set only for one volume */
5550 if (ubi->autoresize_vol_id != -1) {
5551 - ubi_err("more then one auto-resize volume (%d "
5552 + ubi_err("more than one auto-resize volume (%d "
5553 "and %d)", ubi->autoresize_vol_id, i);
5556 @@ -660,9 +661,13 @@ static int init_volumes(struct ubi_devic
5557 ubi->vol_count += 1;
5560 - if (reserved_pebs > ubi->avail_pebs)
5561 + if (reserved_pebs > ubi->avail_pebs) {
5562 ubi_err("not enough PEBs, required %d, available %d",
5563 reserved_pebs, ubi->avail_pebs);
5564 + if (ubi->corr_peb_count)
5565 + ubi_err("%d PEBs are corrupted and not used",
5566 + ubi->corr_peb_count);
5568 ubi->rsvd_pebs += reserved_pebs;
5569 ubi->avail_pebs -= reserved_pebs;
5571 @@ -835,7 +840,7 @@ int ubi_read_volume_table(struct ubi_dev
5572 return PTR_ERR(ubi->vtbl);
5575 - ubi->avail_pebs = ubi->good_peb_count;
5576 + ubi->avail_pebs = ubi->good_peb_count - ubi->corr_peb_count;
5579 * The layout volume is OK, initialize the corresponding in-RAM data
5580 @@ -846,7 +851,7 @@ int ubi_read_volume_table(struct ubi_dev
5584 - * Get sure that the scanning information is consistent to the
5585 + * Make sure that the scanning information is consistent to the
5586 * information stored in the volume table.
5588 err = check_scanning_info(ubi, si);
5589 @@ -864,7 +869,7 @@ out_free:
5593 -#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
5594 +#ifdef CONFIG_MTD_UBI_DEBUG
5597 * paranoid_vtbl_check - check volume table.
5598 @@ -872,10 +877,13 @@ out_free:
5600 static void paranoid_vtbl_check(const struct ubi_device *ubi)
5602 + if (!(ubi_chk_flags & UBI_CHK_GEN))
5605 if (vtbl_check(ubi, ubi->vtbl)) {
5606 ubi_err("paranoid check failed");
5611 -#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */
5612 +#endif /* CONFIG_MTD_UBI_DEBUG */
5613 diff -uprN linux-2.6.28/drivers/mtd/ubi/wl.c ubifs-v2.6.28/drivers/mtd/ubi/wl.c
5614 --- linux-2.6.28/drivers/mtd/ubi/wl.c 2011-06-15 15:12:27.000000000 -0400
5615 +++ ubifs-v2.6.28/drivers/mtd/ubi/wl.c 2011-06-15 14:22:07.000000000 -0400
5617 * situation when the picked physical eraseblock is constantly erased after the
5618 * data is written to it. So, we have a constant which limits the highest erase
5619 * counter of the free physical eraseblock to pick. Namely, the WL sub-system
5620 - * does not pick eraseblocks with erase counter greater then the lowest erase
5621 + * does not pick eraseblocks with erase counter greater than the lowest erase
5622 * counter plus %WL_FREE_MAX_DIFF.
5624 #define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
5625 @@ -161,7 +161,7 @@ struct ubi_work {
5629 -#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
5630 +#ifdef CONFIG_MTD_UBI_DEBUG
5631 static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec);
5632 static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
5633 struct rb_root *root);
5634 @@ -350,7 +350,7 @@ static void prot_queue_add(struct ubi_de
5635 * @max: highest possible erase counter
5637 * This function looks for a wear leveling entry with erase counter closest to
5638 - * @max and less then @max.
5639 + * @max and less than @max.
5641 static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max)
5643 @@ -459,6 +459,14 @@ retry:
5644 dbg_wl("PEB %d EC %d", e->pnum, e->ec);
5645 prot_queue_add(ubi, e);
5646 spin_unlock(&ubi->wl_lock);
5648 + err = ubi_dbg_check_all_ff(ubi, e->pnum, ubi->vid_hdr_aloffset,
5649 + ubi->peb_size - ubi->vid_hdr_aloffset);
5651 + ubi_err("new PEB %d does not contain all 0xFF bytes", e->pnum);
5658 @@ -505,7 +513,7 @@ static int sync_erase(struct ubi_device
5659 dbg_wl("erase PEB %d, old EC %llu", e->pnum, ec);
5661 err = paranoid_check_ec(ubi, e->pnum, e->ec);
5666 ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
5667 @@ -605,7 +613,7 @@ static void schedule_ubi_work(struct ubi
5668 list_add_tail(&wrk->list, &ubi->works);
5669 ubi_assert(ubi->works_count >= 0);
5670 ubi->works_count += 1;
5671 - if (ubi->thread_enabled)
5672 + if (ubi->thread_enabled && !ubi_dbg_is_bgt_disabled())
5673 wake_up_process(ubi->bgt_thread);
5674 spin_unlock(&ubi->wl_lock);
5676 @@ -656,6 +664,7 @@ static int wear_leveling_worker(struct u
5679 int err, scrubbing = 0, torture = 0, protect = 0, erroneous = 0;
5680 + int vol_id = -1, uninitialized_var(lnum);
5681 struct ubi_wl_entry *e1, *e2;
5682 struct ubi_vid_hdr *vid_hdr;
5684 @@ -736,7 +745,7 @@ static int wear_leveling_worker(struct u
5686 err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0);
5687 if (err && err != UBI_IO_BITFLIPS) {
5688 - if (err == UBI_IO_PEB_FREE) {
5689 + if (err == UBI_IO_FF) {
5691 * We are trying to move PEB without a VID header. UBI
5692 * always write VID headers shortly after the PEB was
5693 @@ -750,6 +759,16 @@ static int wear_leveling_worker(struct u
5694 dbg_wl("PEB %d has no VID header", e1->pnum);
5697 + } else if (err == UBI_IO_FF_BITFLIPS) {
5699 + * The same situation as %UBI_IO_FF, but bit-flips were
5700 + * detected. It is better to schedule this PEB for
5703 + dbg_wl("PEB %d has no VID header but has bit-flips",
5706 + goto out_not_moved;
5709 ubi_err("error %d while reading VID header from PEB %d",
5710 @@ -757,6 +776,9 @@ static int wear_leveling_worker(struct u
5714 + vol_id = be32_to_cpu(vid_hdr->vol_id);
5715 + lnum = be32_to_cpu(vid_hdr->lnum);
5717 err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr);
5719 if (err == MOVE_CANCEL_RACE) {
5720 @@ -773,7 +795,9 @@ static int wear_leveling_worker(struct u
5722 if (err == MOVE_CANCEL_BITFLIPS || err == MOVE_TARGET_WR_ERR ||
5723 err == MOVE_TARGET_RD_ERR) {
5724 - /* Target PEB bit-flips or write error, torture it */
5726 + * Target PEB had bit-flips or write error - torture it.
5731 @@ -803,10 +827,10 @@ static int wear_leveling_worker(struct u
5734 /* The PEB has been successfully moved */
5735 - ubi_free_vid_hdr(ubi, vid_hdr);
5737 - ubi_msg("scrubbed PEB %d, data moved to PEB %d",
5738 - e1->pnum, e2->pnum);
5739 + ubi_msg("scrubbed PEB %d (LEB %d:%d), data moved to PEB %d",
5740 + e1->pnum, vol_id, lnum, e2->pnum);
5741 + ubi_free_vid_hdr(ubi, vid_hdr);
5743 spin_lock(&ubi->wl_lock);
5744 if (!ubi->move_to_put) {
5745 @@ -830,7 +854,8 @@ static int wear_leveling_worker(struct u
5746 * Well, the target PEB was put meanwhile, schedule it for
5749 - dbg_wl("PEB %d was put meanwhile, erase", e2->pnum);
5750 + dbg_wl("PEB %d (LEB %d:%d) was put meanwhile, erase",
5751 + e2->pnum, vol_id, lnum);
5752 err = schedule_erase(ubi, e2, 0);
5754 kmem_cache_free(ubi_wl_entry_slab, e2);
5755 @@ -848,8 +873,12 @@ static int wear_leveling_worker(struct u
5756 * have been changed, schedule it for erasure.
5759 - dbg_wl("cancel moving PEB %d to PEB %d (%d)",
5760 - e1->pnum, e2->pnum, err);
5762 + dbg_wl("cancel moving PEB %d (LEB %d:%d) to PEB %d (%d)",
5763 + e1->pnum, vol_id, lnum, e2->pnum, err);
5765 + dbg_wl("cancel moving PEB %d to PEB %d (%d)",
5766 + e1->pnum, e2->pnum, err);
5767 spin_lock(&ubi->wl_lock);
5769 prot_queue_add(ubi, e1);
5770 @@ -875,8 +904,12 @@ out_not_moved:
5774 - ubi_err("error %d while moving PEB %d to PEB %d",
5775 - err, e1->pnum, e2->pnum);
5777 + ubi_err("error %d while moving PEB %d to PEB %d",
5778 + err, e1->pnum, e2->pnum);
5780 + ubi_err("error %d while moving PEB %d (LEB %d:%d) to PEB %d",
5781 + err, e1->pnum, vol_id, lnum, e2->pnum);
5782 spin_lock(&ubi->wl_lock);
5783 ubi->move_from = ubi->move_to = NULL;
5784 ubi->move_to_put = ubi->wl_scheduled = 0;
5785 @@ -932,7 +965,7 @@ static int ensure_wear_leveling(struct u
5787 * We schedule wear-leveling only if the difference between the
5788 * lowest erase counter of used physical eraseblocks and a high
5789 - * erase counter of free physical eraseblocks is greater then
5790 + * erase counter of free physical eraseblocks is greater than
5791 * %UBI_WL_THRESHOLD.
5793 e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
5794 @@ -1058,10 +1091,9 @@ static int erase_worker(struct ubi_devic
5795 ubi_err("no reserved physical eraseblocks");
5799 spin_unlock(&ubi->volumes_lock);
5800 - ubi_msg("mark PEB %d as bad", pnum);
5802 + ubi_msg("mark PEB %d as bad", pnum);
5803 err = ubi_io_mark_bad(ubi, pnum);
5806 @@ -1071,7 +1103,9 @@ static int erase_worker(struct ubi_devic
5807 ubi->bad_peb_count += 1;
5808 ubi->good_peb_count -= 1;
5809 ubi_calculate_reserved(ubi);
5810 - if (ubi->beb_rsvd_pebs == 0)
5811 + if (ubi->beb_rsvd_pebs)
5812 + ubi_msg("%d PEBs left in the reserve", ubi->beb_rsvd_pebs);
5814 ubi_warn("last PEB from the reserved pool was used");
5815 spin_unlock(&ubi->volumes_lock);
5817 @@ -1188,7 +1222,8 @@ int ubi_wl_scrub_peb(struct ubi_device *
5819 spin_lock(&ubi->wl_lock);
5820 e = ubi->lookuptbl[pnum];
5821 - if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub)) {
5822 + if (e == ubi->move_from || in_wl_tree(e, &ubi->scrub) ||
5823 + in_wl_tree(e, &ubi->erroneous)) {
5824 spin_unlock(&ubi->wl_lock);
5827 @@ -1329,7 +1364,7 @@ int ubi_thread(void *u)
5829 spin_lock(&ubi->wl_lock);
5830 if (list_empty(&ubi->works) || ubi->ro_mode ||
5831 - !ubi->thread_enabled) {
5832 + !ubi->thread_enabled || ubi_dbg_is_bgt_disabled()) {
5833 set_current_state(TASK_INTERRUPTIBLE);
5834 spin_unlock(&ubi->wl_lock);
5836 @@ -1443,22 +1478,6 @@ int ubi_wl_init_scan(struct ubi_device *
5837 ubi->lookuptbl[e->pnum] = e;
5840 - list_for_each_entry(seb, &si->corr, u.list) {
5843 - e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
5847 - e->pnum = seb->pnum;
5849 - ubi->lookuptbl[e->pnum] = e;
5850 - if (schedule_erase(ubi, e, 0)) {
5851 - kmem_cache_free(ubi_wl_entry_slab, e);
5856 ubi_rb_for_each_entry(rb1, sv, &si->volumes, rb) {
5857 ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) {
5859 @@ -1485,6 +1504,9 @@ int ubi_wl_init_scan(struct ubi_device *
5860 if (ubi->avail_pebs < WL_RESERVED_PEBS) {
5861 ubi_err("no enough physical eraseblocks (%d, need %d)",
5862 ubi->avail_pebs, WL_RESERVED_PEBS);
5863 + if (ubi->corr_peb_count)
5864 + ubi_err("%d PEBs are corrupted and not used",
5865 + ubi->corr_peb_count);
5868 ubi->avail_pebs -= WL_RESERVED_PEBS;
5869 @@ -1539,7 +1561,7 @@ void ubi_wl_close(struct ubi_device *ubi
5870 kfree(ubi->lookuptbl);
5873 -#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
5874 +#ifdef CONFIG_MTD_UBI_DEBUG
5877 * paranoid_check_ec - make sure that the erase counter of a PEB is correct.
5878 @@ -1548,7 +1570,7 @@ void ubi_wl_close(struct ubi_device *ubi
5879 * @ec: the erase counter to check
5881 * This function returns zero if the erase counter of physical eraseblock @pnum
5882 - * is equivalent to @ec, %1 if not, and a negative error code if an error
5883 + * is equivalent to @ec, and a negative error code if not or if an error
5886 static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec)
5887 @@ -1557,6 +1579,9 @@ static int paranoid_check_ec(struct ubi_
5889 struct ubi_ec_hdr *ec_hdr;
5891 + if (!(ubi_chk_flags & UBI_CHK_GEN))
5894 ec_hdr = kzalloc(ubi->ec_hdr_alsize, GFP_NOFS);
5897 @@ -1587,19 +1612,22 @@ out_free:
5898 * @e: the wear-leveling entry to check
5899 * @root: the root of the tree
5901 - * This function returns zero if @e is in the @root RB-tree and %1 if it is
5903 + * This function returns zero if @e is in the @root RB-tree and %-EINVAL if it
5906 static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
5907 struct rb_root *root)
5909 + if (!(ubi_chk_flags & UBI_CHK_GEN))
5912 if (in_wl_tree(e, root))
5915 ubi_err("paranoid check failed for PEB %d, EC %d, RB-tree %p ",
5916 e->pnum, e->ec, root);
5917 ubi_dbg_dump_stack();
5923 @@ -1608,13 +1636,16 @@ static int paranoid_check_in_wl_tree(str
5924 * @ubi: UBI device description object
5925 * @e: the wear-leveling entry to check
5927 - * This function returns zero if @e is in @ubi->pq and %1 if it is not.
5928 + * This function returns zero if @e is in @ubi->pq and %-EINVAL if it is not.
5930 static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e)
5932 struct ubi_wl_entry *p;
5935 + if (!(ubi_chk_flags & UBI_CHK_GEN))
5938 for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i)
5939 list_for_each_entry(p, &ubi->pq[i], u.list)
5941 @@ -1623,6 +1654,7 @@ static int paranoid_check_in_pq(struct u
5942 ubi_err("paranoid check failed for PEB %d, EC %d, Protect queue",
5944 ubi_dbg_dump_stack();
5948 -#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */
5950 +#endif /* CONFIG_MTD_UBI_DEBUG */
5951 diff -uprN linux-2.6.28/fs/ubifs/budget.c ubifs-v2.6.28/fs/ubifs/budget.c
5952 --- linux-2.6.28/fs/ubifs/budget.c 2011-06-15 15:12:27.000000000 -0400
5953 +++ ubifs-v2.6.28/fs/ubifs/budget.c 2011-06-15 14:22:09.000000000 -0400
5954 @@ -91,7 +91,6 @@ static int shrink_liability(struct ubifs
5960 * run_gc - run garbage collector.
5961 * @c: UBIFS file-system description object
5962 @@ -131,7 +130,7 @@ static long long get_liability(struct ub
5965 spin_lock(&c->space_lock);
5966 - liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth;
5967 + liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth;
5968 spin_unlock(&c->space_lock);
5971 @@ -142,7 +141,7 @@ static long long get_liability(struct ub
5973 * This function is called when an operation cannot be budgeted because there
5974 * is supposedly no free space. But in most cases there is some free space:
5975 - * o budgeting is pessimistic, so it always budgets more then it is actually
5976 + * o budgeting is pessimistic, so it always budgets more than it is actually
5977 * needed, so shrinking the liability is one way to make free space - the
5978 * cached data will take less space then it was budgeted for;
5979 * o GC may turn some dark space into free space (budgeting treats dark space
5980 @@ -194,29 +193,26 @@ static int make_free_space(struct ubifs_
5984 - * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index.
5985 + * ubifs_calc_min_idx_lebs - calculate amount of LEBs for the index.
5986 * @c: UBIFS file-system description object
5988 - * This function calculates and returns the number of eraseblocks which should
5989 - * be kept for index usage.
5990 + * This function calculates and returns the number of LEBs which should be kept
5991 + * for index usage.
5993 int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
5995 - int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz;
5999 - idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
6001 + idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx;
6002 /* And make sure we have thrice the index size of space reserved */
6003 - idx_size = idx_size + (idx_size << 1);
6005 + idx_size += idx_size << 1;
6007 * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes'
6008 * pair, nor similarly the two variables for the new index size, so we
6009 * have to do this costly 64-bit division on fast-path.
6011 - idx_size += eff_leb_size - 1;
6012 - idx_lebs = div_u64(idx_size, eff_leb_size);
6013 + idx_lebs = div_u64(idx_size + c->idx_leb_size - 1, c->idx_leb_size);
6015 * The index head is not available for the in-the-gaps method, so add an
6016 * extra LEB to compensate.
6017 @@ -300,7 +296,7 @@ long long ubifs_calc_available(const str
6019 static int can_use_rp(struct ubifs_info *c)
6021 - if (current->fsuid == c->rp_uid || capable(CAP_SYS_RESOURCE) ||
6022 + if (current_fsuid() == c->rp_uid || capable(CAP_SYS_RESOURCE) ||
6023 (c->rp_gid != 0 && in_group_p(c->rp_gid)))
6026 @@ -310,23 +306,23 @@ static int can_use_rp(struct ubifs_info
6027 * do_budget_space - reserve flash space for index and data growth.
6028 * @c: UBIFS file-system description object
6030 - * This function makes sure UBIFS has enough free eraseblocks for index growth
6032 + * This function makes sure UBIFS has enough free LEBs for index growth and
6035 * When budgeting index space, UBIFS reserves thrice as many LEBs as the index
6036 * would take if it was consolidated and written to the flash. This guarantees
6037 * that the "in-the-gaps" commit method always succeeds and UBIFS will always
6038 * be able to commit dirty index. So this function basically adds amount of
6039 * budgeted index space to the size of the current index, multiplies this by 3,
6040 - * and makes sure this does not exceed the amount of free eraseblocks.
6041 + * and makes sure this does not exceed the amount of free LEBs.
6043 - * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables:
6044 + * Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables:
6045 * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might
6046 * be large, because UBIFS does not do any index consolidation as long as
6047 * there is free space. IOW, the index may take a lot of LEBs, but the LEBs
6048 * will contain a lot of dirt.
6049 - * o @c->min_idx_lebs is the the index presumably takes. IOW, the index may be
6050 - * consolidated to take up to @c->min_idx_lebs LEBs.
6051 + * o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW,
6052 + * the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs.
6054 * This function returns zero in case of success, and %-ENOSPC in case of
6056 @@ -371,13 +367,13 @@ static int do_budget_space(struct ubifs_
6057 c->lst.taken_empty_lebs;
6058 if (unlikely(rsvd_idx_lebs > lebs)) {
6059 dbg_budg("out of indexing space: min_idx_lebs %d (old %d), "
6060 - "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs,
6061 + "rsvd_idx_lebs %d", min_idx_lebs, c->bi.min_idx_lebs,
6066 available = ubifs_calc_available(c, min_idx_lebs);
6067 - outstanding = c->budg_data_growth + c->budg_dd_growth;
6068 + outstanding = c->bi.data_growth + c->bi.dd_growth;
6070 if (unlikely(available < outstanding)) {
6071 dbg_budg("out of data space: available %lld, outstanding %lld",
6072 @@ -388,7 +384,7 @@ static int do_budget_space(struct ubifs_
6073 if (available - outstanding <= c->rp_size && !can_use_rp(c))
6076 - c->min_idx_lebs = min_idx_lebs;
6077 + c->bi.min_idx_lebs = min_idx_lebs;
6081 @@ -421,11 +417,11 @@ static int calc_data_growth(const struct
6085 - data_growth = req->new_ino ? c->inode_budget : 0;
6086 + data_growth = req->new_ino ? c->bi.inode_budget : 0;
6088 - data_growth += c->page_budget;
6089 + data_growth += c->bi.page_budget;
6091 - data_growth += c->dent_budget;
6092 + data_growth += c->bi.dent_budget;
6093 data_growth += req->new_ino_d;
6096 @@ -441,12 +437,12 @@ static int calc_dd_growth(const struct u
6100 - dd_growth = req->dirtied_page ? c->page_budget : 0;
6101 + dd_growth = req->dirtied_page ? c->bi.page_budget : 0;
6103 if (req->dirtied_ino)
6104 - dd_growth += c->inode_budget << (req->dirtied_ino - 1);
6105 + dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1);
6107 - dd_growth += c->dent_budget;
6108 + dd_growth += c->bi.dent_budget;
6109 dd_growth += req->dirtied_ino_d;
6112 @@ -488,19 +484,19 @@ int ubifs_budget_space(struct ubifs_info
6115 spin_lock(&c->space_lock);
6116 - ubifs_assert(c->budg_idx_growth >= 0);
6117 - ubifs_assert(c->budg_data_growth >= 0);
6118 - ubifs_assert(c->budg_dd_growth >= 0);
6119 + ubifs_assert(c->bi.idx_growth >= 0);
6120 + ubifs_assert(c->bi.data_growth >= 0);
6121 + ubifs_assert(c->bi.dd_growth >= 0);
6123 - if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) {
6124 + if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) {
6125 dbg_budg("no space");
6126 spin_unlock(&c->space_lock);
6130 - c->budg_idx_growth += idx_growth;
6131 - c->budg_data_growth += data_growth;
6132 - c->budg_dd_growth += dd_growth;
6133 + c->bi.idx_growth += idx_growth;
6134 + c->bi.data_growth += data_growth;
6135 + c->bi.dd_growth += dd_growth;
6137 err = do_budget_space(c);
6139 @@ -512,9 +508,9 @@ again:
6142 /* Restore the old values */
6143 - c->budg_idx_growth -= idx_growth;
6144 - c->budg_data_growth -= data_growth;
6145 - c->budg_dd_growth -= dd_growth;
6146 + c->bi.idx_growth -= idx_growth;
6147 + c->bi.data_growth -= data_growth;
6148 + c->bi.dd_growth -= dd_growth;
6149 spin_unlock(&c->space_lock);
6152 @@ -534,9 +530,9 @@ again:
6155 dbg_budg("FS is full, -ENOSPC");
6157 + c->bi.nospace = 1;
6158 if (can_use_rp(c) || c->rp_size == 0)
6159 - c->nospace_rp = 1;
6160 + c->bi.nospace_rp = 1;
6163 ubifs_err("cannot budget space, error %d", err);
6164 @@ -551,8 +547,8 @@ again:
6165 * This function releases the space budgeted by 'ubifs_budget_space()'. Note,
6166 * since the index changes (which were budgeted for in @req->idx_growth) will
6167 * only be written to the media on commit, this function moves the index budget
6168 - * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be
6169 - * zeroed by the commit operation.
6170 + * from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed
6171 + * by the commit operation.
6173 void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
6175 @@ -581,23 +577,23 @@ void ubifs_release_budget(struct ubifs_i
6176 if (!req->data_growth && !req->dd_growth)
6179 - c->nospace = c->nospace_rp = 0;
6180 + c->bi.nospace = c->bi.nospace_rp = 0;
6183 spin_lock(&c->space_lock);
6184 - c->budg_idx_growth -= req->idx_growth;
6185 - c->budg_uncommitted_idx += req->idx_growth;
6186 - c->budg_data_growth -= req->data_growth;
6187 - c->budg_dd_growth -= req->dd_growth;
6188 - c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
6190 - ubifs_assert(c->budg_idx_growth >= 0);
6191 - ubifs_assert(c->budg_data_growth >= 0);
6192 - ubifs_assert(c->budg_dd_growth >= 0);
6193 - ubifs_assert(c->min_idx_lebs < c->main_lebs);
6194 - ubifs_assert(!(c->budg_idx_growth & 7));
6195 - ubifs_assert(!(c->budg_data_growth & 7));
6196 - ubifs_assert(!(c->budg_dd_growth & 7));
6197 + c->bi.idx_growth -= req->idx_growth;
6198 + c->bi.uncommitted_idx += req->idx_growth;
6199 + c->bi.data_growth -= req->data_growth;
6200 + c->bi.dd_growth -= req->dd_growth;
6201 + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
6203 + ubifs_assert(c->bi.idx_growth >= 0);
6204 + ubifs_assert(c->bi.data_growth >= 0);
6205 + ubifs_assert(c->bi.dd_growth >= 0);
6206 + ubifs_assert(c->bi.min_idx_lebs < c->main_lebs);
6207 + ubifs_assert(!(c->bi.idx_growth & 7));
6208 + ubifs_assert(!(c->bi.data_growth & 7));
6209 + ubifs_assert(!(c->bi.dd_growth & 7));
6210 spin_unlock(&c->space_lock);
6213 @@ -606,7 +602,7 @@ void ubifs_release_budget(struct ubifs_i
6214 * @c: UBIFS file-system description object
6216 * This function converts budget which was allocated for a new page of data to
6217 - * the budget of changing an existing page of data. The latter is smaller then
6218 + * the budget of changing an existing page of data. The latter is smaller than
6219 * the former, so this function only does simple re-calculation and does not
6220 * involve any write-back.
6222 @@ -614,13 +610,13 @@ void ubifs_convert_page_budget(struct ub
6224 spin_lock(&c->space_lock);
6225 /* Release the index growth reservation */
6226 - c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
6227 + c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
6228 /* Release the data growth reservation */
6229 - c->budg_data_growth -= c->page_budget;
6230 + c->bi.data_growth -= c->bi.page_budget;
6231 /* Increase the dirty data growth reservation instead */
6232 - c->budg_dd_growth += c->page_budget;
6233 + c->bi.dd_growth += c->bi.page_budget;
6234 /* And re-calculate the indexing space reservation */
6235 - c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
6236 + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
6237 spin_unlock(&c->space_lock);
6240 @@ -640,7 +636,7 @@ void ubifs_release_dirty_inode_budget(st
6242 memset(&req, 0, sizeof(struct ubifs_budget_req));
6243 /* The "no space" flags will be cleared because dd_growth is > 0 */
6244 - req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8);
6245 + req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8);
6246 ubifs_release_budget(c, &req);
6249 @@ -696,12 +692,12 @@ long long ubifs_reported_space(const str
6250 * This function calculates amount of free space to report to user-space.
6252 * Because UBIFS may introduce substantial overhead (the index, node headers,
6253 - * alignment, wastage at the end of eraseblocks, etc), it cannot report real
6254 - * amount of free flash space it has (well, because not all dirty space is
6255 - * reclaimable, UBIFS does not actually know the real amount). If UBIFS did so,
6256 - * it would bread user expectations about what free space is. Users seem to
6257 - * accustomed to assume that if the file-system reports N bytes of free space,
6258 - * they would be able to fit a file of N bytes to the FS. This almost works for
6259 + * alignment, wastage at the end of LEBs, etc), it cannot report real amount of
6260 + * free flash space it has (well, because not all dirty space is reclaimable,
6261 + * UBIFS does not actually know the real amount). If UBIFS did so, it would
6262 + * bread user expectations about what free space is. Users seem to accustomed
6263 + * to assume that if the file-system reports N bytes of free space, they would
6264 + * be able to fit a file of N bytes to the FS. This almost works for
6265 * traditional file-systems, because they have way less overhead than UBIFS.
6266 * So, to keep users happy, UBIFS tries to take the overhead into account.
6268 @@ -710,9 +706,9 @@ long long ubifs_get_free_space_nolock(st
6269 int rsvd_idx_lebs, lebs;
6270 long long available, outstanding, free;
6272 - ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c));
6273 - outstanding = c->budg_data_growth + c->budg_dd_growth;
6274 - available = ubifs_calc_available(c, c->min_idx_lebs);
6275 + ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
6276 + outstanding = c->bi.data_growth + c->bi.dd_growth;
6277 + available = ubifs_calc_available(c, c->bi.min_idx_lebs);
6280 * When reporting free space to user-space, UBIFS guarantees that it is
6281 @@ -725,8 +721,8 @@ long long ubifs_get_free_space_nolock(st
6282 * Note, the calculations below are similar to what we have in
6283 * 'do_budget_space()', so refer there for comments.
6285 - if (c->min_idx_lebs > c->lst.idx_lebs)
6286 - rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
6287 + if (c->bi.min_idx_lebs > c->lst.idx_lebs)
6288 + rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
6291 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
6292 @@ -745,7 +741,7 @@ long long ubifs_get_free_space_nolock(st
6293 * ubifs_get_free_space - return amount of free space.
6294 * @c: UBIFS file-system description object
6296 - * This function calculates and retuns amount of free space to report to
6297 + * This function calculates and returns amount of free space to report to
6300 long long ubifs_get_free_space(struct ubifs_info *c)
6301 diff -uprN linux-2.6.28/fs/ubifs/commit.c ubifs-v2.6.28/fs/ubifs/commit.c
6302 --- linux-2.6.28/fs/ubifs/commit.c 2011-06-15 15:12:27.000000000 -0400
6303 +++ ubifs-v2.6.28/fs/ubifs/commit.c 2011-06-15 14:22:09.000000000 -0400
6305 #include <linux/kthread.h>
6309 + * nothing_to_commit - check if there is nothing to commit.
6310 + * @c: UBIFS file-system description object
6312 + * This is a helper function which checks if there is anything to commit. It is
6313 + * used as an optimization to avoid starting the commit if it is not really
6314 + * necessary. Indeed, the commit operation always assumes flash I/O (e.g.,
6315 + * writing the commit start node to the log), and it is better to avoid doing
6316 + * this unnecessarily. E.g., 'ubifs_sync_fs()' runs the commit, but if there is
6317 + * nothing to commit, it is more optimal to avoid any flash I/O.
6319 + * This function has to be called with @c->commit_sem locked for writing -
6320 + * this function does not take LPT/TNC locks because the @c->commit_sem
6321 + * guarantees that we have exclusive access to the TNC and LPT data structures.
6323 + * This function returns %1 if there is nothing to commit and %0 otherwise.
6325 +static int nothing_to_commit(struct ubifs_info *c)
6328 + * During mounting or remounting from R/O mode to R/W mode we may
6329 + * commit for various recovery-related reasons.
6331 + if (c->mounting || c->remounting_rw)
6335 + * If the root TNC node is dirty, we definitely have something to
6338 + if (c->zroot.znode && test_bit(DIRTY_ZNODE, &c->zroot.znode->flags))
6342 + * Even though the TNC is clean, the LPT tree may have dirty nodes. For
6343 + * example, this may happen if the budgeting subsystem invoked GC to
6344 + * make some free space, and the GC found an LEB with only dirty and
6345 + * free space. In this case GC would just change the lprops of this
6346 + * LEB (by turning all space into free space) and unmap it.
6348 + if (c->nroot && test_bit(DIRTY_CNODE, &c->nroot->flags))
6351 + ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0);
6352 + ubifs_assert(c->dirty_pn_cnt == 0);
6353 + ubifs_assert(c->dirty_nn_cnt == 0);
6359 * do_commit - commit the journal.
6360 * @c: UBIFS file-system description object
6361 @@ -62,11 +112,19 @@ static int do_commit(struct ubifs_info *
6362 struct ubifs_lp_stats lst;
6365 - if (c->ro_media) {
6366 + ubifs_assert(!c->ro_media && !c->ro_mount);
6368 + if (c->ro_error) {
6373 + if (nothing_to_commit(c)) {
6374 + up_write(&c->commit_sem);
6379 /* Sync all write buffers (necessary for recovery) */
6380 for (i = 0; i < c->jhead_cnt; i++) {
6381 err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
6382 @@ -123,7 +181,7 @@ static int do_commit(struct ubifs_info *
6383 c->mst_node->root_len = cpu_to_le32(zroot.len);
6384 c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum);
6385 c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs);
6386 - c->mst_node->index_size = cpu_to_le64(c->old_idx_sz);
6387 + c->mst_node->index_size = cpu_to_le64(c->bi.old_idx_sz);
6388 c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum);
6389 c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs);
6390 c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum);
6391 @@ -159,12 +217,12 @@ static int do_commit(struct ubifs_info *
6396 spin_lock(&c->cs_lock);
6397 c->cmt_state = COMMIT_RESTING;
6398 wake_up(&c->cmt_wq);
6399 dbg_cmt("commit end");
6400 spin_unlock(&c->cs_lock);
6405 @@ -510,7 +568,7 @@ int dbg_check_old_index(struct ubifs_inf
6406 int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt;
6408 struct ubifs_debug_info *d = c->dbg;
6409 - union ubifs_key lower_key, upper_key, l_key, u_key;
6410 + union ubifs_key uninitialized_var(lower_key), upper_key, l_key, u_key;
6411 unsigned long long uninitialized_var(last_sqnum);
6412 struct ubifs_idx_node *idx;
6413 struct list_head list;
6414 @@ -518,7 +576,7 @@ int dbg_check_old_index(struct ubifs_inf
6417 if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX))
6421 INIT_LIST_HEAD(&list);
6423 diff -uprN linux-2.6.28/fs/ubifs/compress.c ubifs-v2.6.28/fs/ubifs/compress.c
6424 --- linux-2.6.28/fs/ubifs/compress.c 2011-06-15 15:12:27.000000000 -0400
6425 +++ ubifs-v2.6.28/fs/ubifs/compress.c 2011-06-15 14:22:09.000000000 -0400
6426 @@ -46,24 +46,11 @@ static struct ubifs_compressor lzo_compr
6431 -static DEFINE_MUTEX(lzo999_mutex);
6433 -static struct ubifs_compressor lzo999_compr = {
6434 - .compr_type = UBIFS_COMPR_LZO999,
6435 - .comp_mutex = &lzo999_mutex,
6437 - .capi_name = "lzo999",
6440 static struct ubifs_compressor lzo_compr = {
6441 .compr_type = UBIFS_COMPR_LZO,
6444 -static struct ubifs_compressor lzo_compr = {
6445 - .compr_type = UBIFS_COMPR_LZO999,
6450 #ifdef CONFIG_UBIFS_FS_ZLIB
6451 @@ -138,9 +125,6 @@ void ubifs_compress(const void *in_buf,
6452 if (in_len - *out_len < UBIFS_MIN_COMPRESS_DIFF)
6455 - if (*compr_type == UBIFS_COMPR_LZO999)
6456 - *compr_type = UBIFS_COMPR_LZO;
6461 @@ -245,19 +229,13 @@ int __init ubifs_compressors_init(void)
6465 - err = compr_init(&lzo999_compr);
6469 err = compr_init(&zlib_compr);
6474 ubifs_compressors[UBIFS_COMPR_NONE] = &none_compr;
6478 - compr_exit(&lzo999_compr);
6480 compr_exit(&lzo_compr);
6482 @@ -268,7 +246,6 @@ out_lzo:
6484 void ubifs_compressors_exit(void)
6486 - compr_exit(&lzo999_compr);
6487 compr_exit(&lzo_compr);
6488 compr_exit(&zlib_compr);
6490 diff -uprN linux-2.6.28/fs/ubifs/debug.c ubifs-v2.6.28/fs/ubifs/debug.c
6491 --- linux-2.6.28/fs/ubifs/debug.c 2011-06-15 15:12:27.000000000 -0400
6492 +++ ubifs-v2.6.28/fs/ubifs/debug.c 2011-06-15 14:22:09.000000000 -0400
6493 @@ -42,15 +42,12 @@ DEFINE_SPINLOCK(dbg_lock);
6494 static char dbg_key_buf0[128];
6495 static char dbg_key_buf1[128];
6497 -unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT;
6498 -unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT;
6499 +unsigned int ubifs_chk_flags;
6500 unsigned int ubifs_tst_flags;
6502 -module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR);
6503 module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR);
6504 module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR);
6506 -MODULE_PARM_DESC(debug_msgs, "Debug message type flags");
6507 MODULE_PARM_DESC(debug_chks, "Debug check flags");
6508 MODULE_PARM_DESC(debug_tsts, "Debug special test flags");
6510 @@ -210,6 +207,20 @@ const char *dbg_cstate(int cmt_state)
6514 +const char *dbg_jhead(int jhead)
6520 + return "1 (base)";
6522 + return "2 (data)";
6524 + return "unknown journal head";
6528 static void dump_ch(const struct ubifs_ch *ch)
6530 printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic));
6531 @@ -302,6 +313,8 @@ void dbg_dump_node(const struct ubifs_in
6532 printk(KERN_DEBUG "\tflags %#x\n", sup_flags);
6533 printk(KERN_DEBUG "\t big_lpt %u\n",
6534 !!(sup_flags & UBIFS_FLG_BIGLPT));
6535 + printk(KERN_DEBUG "\t space_fixup %u\n",
6536 + !!(sup_flags & UBIFS_FLG_SPACE_FIXUP));
6537 printk(KERN_DEBUG "\tmin_io_size %u\n",
6538 le32_to_cpu(sup->min_io_size));
6539 printk(KERN_DEBUG "\tleb_size %u\n",
6540 @@ -479,9 +492,9 @@ void dbg_dump_node(const struct ubifs_in
6541 "bad or corrupted node)");
6543 for (i = 0; i < nlen && dent->name[i]; i++)
6544 - printk("%c", dent->name[i]);
6545 + printk(KERN_CONT "%c", dent->name[i]);
6548 + printk(KERN_CONT "\n");
6552 @@ -592,7 +605,7 @@ void dbg_dump_lstats(const struct ubifs_
6553 spin_unlock(&dbg_lock);
6556 -void dbg_dump_budg(struct ubifs_info *c)
6557 +void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
6561 @@ -600,31 +613,48 @@ void dbg_dump_budg(struct ubifs_info *c)
6562 struct ubifs_gced_idx_leb *idx_gc;
6563 long long available, outstanding, free;
6565 - ubifs_assert(spin_is_locked(&c->space_lock));
6566 + spin_lock(&c->space_lock);
6567 spin_lock(&dbg_lock);
6568 - printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, "
6569 - "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid,
6570 - c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth);
6571 - printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, "
6572 - "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth,
6573 - c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth,
6575 - printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, "
6576 - "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs,
6577 - c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt);
6578 + printk(KERN_DEBUG "(pid %d) Budgeting info: data budget sum %lld, "
6579 + "total budget sum %lld\n", current->pid,
6580 + bi->data_growth + bi->dd_growth,
6581 + bi->data_growth + bi->dd_growth + bi->idx_growth);
6582 + printk(KERN_DEBUG "\tbudg_data_growth %lld, budg_dd_growth %lld, "
6583 + "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth,
6585 + printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %llu, "
6586 + "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz,
6587 + bi->uncommitted_idx);
6588 + printk(KERN_DEBUG "\tpage_budget %d, inode_budget %d, dent_budget %d\n",
6589 + bi->page_budget, bi->inode_budget, bi->dent_budget);
6590 + printk(KERN_DEBUG "\tnospace %u, nospace_rp %u\n",
6591 + bi->nospace, bi->nospace_rp);
6592 + printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
6593 + c->dark_wm, c->dead_wm, c->max_idx_node_sz);
6597 + * If we are dumping saved budgeting data, do not print
6598 + * additional information which is about the current state, not
6599 + * the old one which corresponded to the saved budgeting data.
6603 + printk(KERN_DEBUG "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n",
6604 + c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt);
6605 printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, "
6606 "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt),
6607 atomic_long_read(&c->dirty_zn_cnt),
6608 atomic_long_read(&c->clean_zn_cnt));
6609 - printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
6610 - c->dark_wm, c->dead_wm, c->max_idx_node_sz);
6611 printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n",
6612 c->gc_lnum, c->ihead_lnum);
6614 /* If we are in R/O mode, journal heads do not exist */
6616 for (i = 0; i < c->jhead_cnt; i++)
6617 - printk(KERN_DEBUG "\tjhead %d\t LEB %d\n",
6618 - c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum);
6619 + printk(KERN_DEBUG "\tjhead %s\t LEB %d\n",
6620 + dbg_jhead(c->jheads[i].wbuf.jhead),
6621 + c->jheads[i].wbuf.lnum);
6622 for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) {
6623 bud = rb_entry(rb, struct ubifs_bud, rb);
6624 printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum);
6625 @@ -637,20 +667,109 @@ void dbg_dump_budg(struct ubifs_info *c)
6626 printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state);
6628 /* Print budgeting predictions */
6629 - available = ubifs_calc_available(c, c->min_idx_lebs);
6630 - outstanding = c->budg_data_growth + c->budg_dd_growth;
6631 + available = ubifs_calc_available(c, c->bi.min_idx_lebs);
6632 + outstanding = c->bi.data_growth + c->bi.dd_growth;
6633 free = ubifs_get_free_space_nolock(c);
6634 printk(KERN_DEBUG "Budgeting predictions:\n");
6635 printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n",
6636 available, outstanding, free);
6638 spin_unlock(&dbg_lock);
6639 + spin_unlock(&c->space_lock);
6642 void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
6644 - printk(KERN_DEBUG "LEB %d lprops: free %d, dirty %d (used %d), "
6645 - "flags %#x\n", lp->lnum, lp->free, lp->dirty,
6646 - c->leb_size - lp->free - lp->dirty, lp->flags);
6647 + int i, spc, dark = 0, dead = 0;
6648 + struct rb_node *rb;
6649 + struct ubifs_bud *bud;
6651 + spc = lp->free + lp->dirty;
6652 + if (spc < c->dead_wm)
6655 + dark = ubifs_calc_dark(c, spc);
6657 + if (lp->flags & LPROPS_INDEX)
6658 + printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d "
6659 + "free + dirty %-8d flags %#x (", lp->lnum, lp->free,
6660 + lp->dirty, c->leb_size - spc, spc, lp->flags);
6662 + printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d "
6663 + "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d "
6664 + "flags %#-4x (", lp->lnum, lp->free, lp->dirty,
6665 + c->leb_size - spc, spc, dark, dead,
6666 + (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags);
6668 + if (lp->flags & LPROPS_TAKEN) {
6669 + if (lp->flags & LPROPS_INDEX)
6670 + printk(KERN_CONT "index, taken");
6672 + printk(KERN_CONT "taken");
6676 + if (lp->flags & LPROPS_INDEX) {
6677 + switch (lp->flags & LPROPS_CAT_MASK) {
6678 + case LPROPS_DIRTY_IDX:
6679 + s = "dirty index";
6681 + case LPROPS_FRDI_IDX:
6682 + s = "freeable index";
6688 + switch (lp->flags & LPROPS_CAT_MASK) {
6689 + case LPROPS_UNCAT:
6690 + s = "not categorized";
6692 + case LPROPS_DIRTY:
6698 + case LPROPS_EMPTY:
6701 + case LPROPS_FREEABLE:
6709 + printk(KERN_CONT "%s", s);
6712 + for (rb = rb_first((struct rb_root *)&c->buds); rb; rb = rb_next(rb)) {
6713 + bud = rb_entry(rb, struct ubifs_bud, rb);
6714 + if (bud->lnum == lp->lnum) {
6716 + for (i = 0; i < c->jhead_cnt; i++) {
6718 + * Note, if we are in R/O mode or in the middle
6719 + * of mounting/re-mounting, the write-buffers do
6723 + lp->lnum == c->jheads[i].wbuf.lnum) {
6724 + printk(KERN_CONT ", jhead %s",
6730 + printk(KERN_CONT ", bud of jhead %s",
6731 + dbg_jhead(bud->jhead));
6734 + if (lp->lnum == c->gc_lnum)
6735 + printk(KERN_CONT ", GC LEB");
6736 + printk(KERN_CONT ")\n");
6739 void dbg_dump_lprops(struct ubifs_info *c)
6740 @@ -718,16 +837,24 @@ void dbg_dump_leb(const struct ubifs_inf
6742 struct ubifs_scan_leb *sleb;
6743 struct ubifs_scan_node *snod;
6746 if (dbg_failure_mode)
6749 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
6750 current->pid, lnum);
6751 - sleb = ubifs_scan(c, lnum, 0, c->dbg->buf);
6753 + buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
6755 + ubifs_err("cannot allocate memory for dumping LEB %d", lnum);
6759 + sleb = ubifs_scan(c, lnum, 0, buf, 0);
6761 ubifs_err("scan error %d", (int)PTR_ERR(sleb));
6766 printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum,
6767 @@ -743,6 +870,9 @@ void dbg_dump_leb(const struct ubifs_inf
6768 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n",
6769 current->pid, lnum);
6770 ubifs_scan_destroy(sleb);
6777 @@ -869,11 +999,41 @@ void dbg_dump_index(struct ubifs_info *c
6778 void dbg_save_space_info(struct ubifs_info *c)
6780 struct ubifs_debug_info *d = c->dbg;
6782 - ubifs_get_lp_stats(c, &d->saved_lst);
6785 spin_lock(&c->space_lock);
6786 + memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats));
6787 + memcpy(&d->saved_bi, &c->bi, sizeof(struct ubifs_budg_info));
6788 + d->saved_idx_gc_cnt = c->idx_gc_cnt;
6791 + * We use a dirty hack here and zero out @c->freeable_cnt, because it
6792 + * affects the free space calculations, and UBIFS might not know about
6793 + * all freeable eraseblocks. Indeed, we know about freeable eraseblocks
6794 + * only when we read their lprops, and we do this only lazily, upon the
6795 + * need. So at any given point of time @c->freeable_cnt might be not
6796 + * exactly accurate.
6798 + * Just one example about the issue we hit when we did not zero
6799 + * @c->freeable_cnt.
6800 + * 1. The file-system is mounted R/O, c->freeable_cnt is %0. We save the
6801 + * amount of free space in @d->saved_free
6802 + * 2. We re-mount R/W, which makes UBIFS to read the "lsave"
6803 + * information from flash, where we cache LEBs from various
6804 + * categories ('ubifs_remount_fs()' -> 'ubifs_lpt_init()'
6805 + * -> 'lpt_init_wr()' -> 'read_lsave()' -> 'ubifs_lpt_lookup()'
6806 + * -> 'ubifs_get_pnode()' -> 'update_cats()'
6807 + * -> 'ubifs_add_to_cat()').
6808 + * 3. Lsave contains a freeable eraseblock, and @c->freeable_cnt
6810 + * 4. We calculate the amount of free space when the re-mount is
6811 + * finished in 'dbg_check_space_info()' and it does not match
6814 + freeable_cnt = c->freeable_cnt;
6815 + c->freeable_cnt = 0;
6816 d->saved_free = ubifs_get_free_space_nolock(c);
6817 + c->freeable_cnt = freeable_cnt;
6818 spin_unlock(&c->space_lock);
6821 @@ -890,12 +1050,15 @@ int dbg_check_space_info(struct ubifs_in
6823 struct ubifs_debug_info *d = c->dbg;
6824 struct ubifs_lp_stats lst;
6825 - long long avail, free;
6829 spin_lock(&c->space_lock);
6830 - avail = ubifs_calc_available(c, c->min_idx_lebs);
6831 + freeable_cnt = c->freeable_cnt;
6832 + c->freeable_cnt = 0;
6833 + free = ubifs_get_free_space_nolock(c);
6834 + c->freeable_cnt = freeable_cnt;
6835 spin_unlock(&c->space_lock);
6836 - free = ubifs_get_free_space(c);
6838 if (free != d->saved_free) {
6839 ubifs_err("free space changed from %lld to %lld",
6840 @@ -908,12 +1071,14 @@ int dbg_check_space_info(struct ubifs_in
6842 ubifs_msg("saved lprops statistics dump");
6843 dbg_dump_lstats(&d->saved_lst);
6844 - ubifs_get_lp_stats(c, &lst);
6845 + ubifs_msg("saved budgeting info dump");
6846 + dbg_dump_budg(c, &d->saved_bi);
6847 + ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt);
6848 ubifs_msg("current lprops statistics dump");
6849 - dbg_dump_lstats(&d->saved_lst);
6850 - spin_lock(&c->space_lock);
6852 - spin_unlock(&c->space_lock);
6853 + ubifs_get_lp_stats(c, &lst);
6854 + dbg_dump_lstats(&lst);
6855 + ubifs_msg("current budgeting info dump");
6856 + dbg_dump_budg(c, &c->bi);
6860 @@ -1214,7 +1379,7 @@ static int dbg_check_znode(struct ubifs_
6863 * Make sure the last key in our znode is less or
6864 - * equivalent than the the key in zbranch which goes
6865 + * equivalent than the key in the zbranch which goes
6866 * after our pointing zbranch.
6868 cmp = keys_cmp(c, max,
6869 @@ -1657,6 +1822,8 @@ static struct fsck_inode *add_inode(stru
6870 struct rb_node **p, *parent = NULL;
6871 struct fsck_inode *fscki;
6872 ino_t inum = key_inum_flash(c, &ino->key);
6873 + struct inode *inode;
6874 + struct ubifs_inode *ui;
6876 p = &fsckd->inodes.rb_node;
6878 @@ -1680,19 +1847,46 @@ static struct fsck_inode *add_inode(stru
6880 return ERR_PTR(-ENOMEM);
6882 + inode = ilookup(c->vfs_sb, inum);
6885 - fscki->nlink = le32_to_cpu(ino->nlink);
6886 - fscki->size = le64_to_cpu(ino->size);
6887 - fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
6888 - fscki->xattr_sz = le32_to_cpu(ino->xattr_size);
6889 - fscki->xattr_nms = le32_to_cpu(ino->xattr_names);
6890 - fscki->mode = le32_to_cpu(ino->mode);
6892 + * If the inode is present in the VFS inode cache, use it instead of
6893 + * the on-flash inode which might be out-of-date. E.g., the size might
6894 + * be out-of-date. If we do not do this, the following may happen, for
6896 + * 1. A power cut happens
6897 + * 2. We mount the file-system R/O, the replay process fixes up the
6898 + * inode size in the VFS cache, but on on-flash.
6899 + * 3. 'check_leaf()' fails because it hits a data node beyond inode
6903 + fscki->nlink = le32_to_cpu(ino->nlink);
6904 + fscki->size = le64_to_cpu(ino->size);
6905 + fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
6906 + fscki->xattr_sz = le32_to_cpu(ino->xattr_size);
6907 + fscki->xattr_nms = le32_to_cpu(ino->xattr_names);
6908 + fscki->mode = le32_to_cpu(ino->mode);
6910 + ui = ubifs_inode(inode);
6911 + fscki->nlink = inode->i_nlink;
6912 + fscki->size = inode->i_size;
6913 + fscki->xattr_cnt = ui->xattr_cnt;
6914 + fscki->xattr_sz = ui->xattr_size;
6915 + fscki->xattr_nms = ui->xattr_names;
6916 + fscki->mode = inode->i_mode;
6920 if (S_ISDIR(fscki->mode)) {
6921 fscki->calc_sz = UBIFS_INO_NODE_SZ;
6922 fscki->calc_cnt = 2;
6925 rb_link_node(&fscki->rb, parent, p);
6926 rb_insert_color(&fscki->rb, &fsckd->inodes);
6931 @@ -1916,7 +2110,7 @@ static int check_leaf(struct ubifs_info
6932 inum = key_inum_flash(c, &dent->key);
6933 fscki1 = read_add_inode(c, priv, inum);
6934 if (IS_ERR(fscki1)) {
6935 - err = PTR_ERR(fscki);
6936 + err = PTR_ERR(fscki1);
6937 ubifs_err("error %d while processing entry node and "
6938 "trying to find parent inode node %lu",
6939 err, (unsigned long)inum);
6940 @@ -2145,14 +2339,169 @@ out_free:
6944 -static int invocation_cnt;
6946 + * dbg_check_data_nodes_order - check that list of data nodes is sorted.
6947 + * @c: UBIFS file-system description object
6948 + * @head: the list of nodes ('struct ubifs_scan_node' objects)
6950 + * This function returns zero if the list of data nodes is sorted correctly,
6951 + * and %-EINVAL if not.
6953 +int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
6955 + struct list_head *cur;
6956 + struct ubifs_scan_node *sa, *sb;
6958 + if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
6961 + for (cur = head->next; cur->next != head; cur = cur->next) {
6962 + ino_t inuma, inumb;
6963 + uint32_t blka, blkb;
6966 + sa = container_of(cur, struct ubifs_scan_node, list);
6967 + sb = container_of(cur->next, struct ubifs_scan_node, list);
6969 + if (sa->type != UBIFS_DATA_NODE) {
6970 + ubifs_err("bad node type %d", sa->type);
6971 + dbg_dump_node(c, sa->node);
6974 + if (sb->type != UBIFS_DATA_NODE) {
6975 + ubifs_err("bad node type %d", sb->type);
6976 + dbg_dump_node(c, sb->node);
6980 + inuma = key_inum(c, &sa->key);
6981 + inumb = key_inum(c, &sb->key);
6983 + if (inuma < inumb)
6985 + if (inuma > inumb) {
6986 + ubifs_err("larger inum %lu goes before inum %lu",
6987 + (unsigned long)inuma, (unsigned long)inumb);
6991 + blka = key_block(c, &sa->key);
6992 + blkb = key_block(c, &sb->key);
6994 + if (blka > blkb) {
6995 + ubifs_err("larger block %u goes before %u", blka, blkb);
6998 + if (blka == blkb) {
6999 + ubifs_err("two data nodes for the same block");
7007 + dbg_dump_node(c, sa->node);
7008 + dbg_dump_node(c, sb->node);
7013 + * dbg_check_nondata_nodes_order - check that list of data nodes is sorted.
7014 + * @c: UBIFS file-system description object
7015 + * @head: the list of nodes ('struct ubifs_scan_node' objects)
7017 + * This function returns zero if the list of non-data nodes is sorted correctly,
7018 + * and %-EINVAL if not.
7020 +int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
7022 + struct list_head *cur;
7023 + struct ubifs_scan_node *sa, *sb;
7025 + if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
7028 + for (cur = head->next; cur->next != head; cur = cur->next) {
7029 + ino_t inuma, inumb;
7030 + uint32_t hasha, hashb;
7033 + sa = container_of(cur, struct ubifs_scan_node, list);
7034 + sb = container_of(cur->next, struct ubifs_scan_node, list);
7036 + if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE &&
7037 + sa->type != UBIFS_XENT_NODE) {
7038 + ubifs_err("bad node type %d", sa->type);
7039 + dbg_dump_node(c, sa->node);
7042 + if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE &&
7043 + sa->type != UBIFS_XENT_NODE) {
7044 + ubifs_err("bad node type %d", sb->type);
7045 + dbg_dump_node(c, sb->node);
7049 + if (sa->type != UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) {
7050 + ubifs_err("non-inode node goes before inode node");
7054 + if (sa->type == UBIFS_INO_NODE && sb->type != UBIFS_INO_NODE)
7057 + if (sa->type == UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) {
7058 + /* Inode nodes are sorted in descending size order */
7059 + if (sa->len < sb->len) {
7060 + ubifs_err("smaller inode node goes first");
7067 + * This is either a dentry or xentry, which should be sorted in
7068 + * ascending (parent ino, hash) order.
7070 + inuma = key_inum(c, &sa->key);
7071 + inumb = key_inum(c, &sb->key);
7073 + if (inuma < inumb)
7075 + if (inuma > inumb) {
7076 + ubifs_err("larger inum %lu goes before inum %lu",
7077 + (unsigned long)inuma, (unsigned long)inumb);
7081 + hasha = key_block(c, &sa->key);
7082 + hashb = key_block(c, &sb->key);
7084 + if (hasha > hashb) {
7085 + ubifs_err("larger hash %u goes before %u",
7094 + ubifs_msg("dumping first node");
7095 + dbg_dump_node(c, sa->node);
7096 + ubifs_msg("dumping second node");
7097 + dbg_dump_node(c, sb->node);
7102 int dbg_force_in_the_gaps(void)
7104 - if (!dbg_force_in_the_gaps_enabled)
7105 + if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
7107 - /* Force in-the-gaps every 8th commit */
7108 - return !((invocation_cnt++) & 0x7);
7110 + return !(random32() & 7);
7113 /* Failure mode for recovery testing */
7114 @@ -2340,7 +2689,7 @@ int dbg_leb_read(struct ubi_volume_desc
7117 if (in_failure_mode(desc))
7120 return ubi_leb_read(desc, lnum, buf, offset, len, check);
7123 @@ -2350,7 +2699,7 @@ int dbg_leb_write(struct ubi_volume_desc
7126 if (in_failure_mode(desc))
7129 failing = do_fail(desc, lnum, 1);
7132 @@ -2358,7 +2707,7 @@ int dbg_leb_write(struct ubi_volume_desc
7141 @@ -2368,12 +2717,12 @@ int dbg_leb_change(struct ubi_volume_des
7144 if (do_fail(desc, lnum, 1))
7147 err = ubi_leb_change(desc, lnum, buf, len, dtype);
7150 if (do_fail(desc, lnum, 1))
7156 @@ -2382,12 +2731,12 @@ int dbg_leb_erase(struct ubi_volume_desc
7159 if (do_fail(desc, lnum, 0))
7162 err = ubi_leb_erase(desc, lnum);
7165 if (do_fail(desc, lnum, 0))
7171 @@ -2396,19 +2745,19 @@ int dbg_leb_unmap(struct ubi_volume_desc
7174 if (do_fail(desc, lnum, 0))
7177 err = ubi_leb_unmap(desc, lnum);
7180 if (do_fail(desc, lnum, 0))
7186 int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum)
7188 if (in_failure_mode(desc))
7191 return ubi_is_mapped(desc, lnum);
7194 @@ -2417,12 +2766,12 @@ int dbg_leb_map(struct ubi_volume_desc *
7197 if (do_fail(desc, lnum, 0))
7200 err = ubi_leb_map(desc, lnum, dtype);
7203 if (do_fail(desc, lnum, 0))
7209 @@ -2440,16 +2789,8 @@ int ubifs_debugging_init(struct ubifs_in
7213 - c->dbg->buf = vmalloc(c->leb_size);
7217 failure_mode_init(c);
7226 @@ -2459,7 +2800,6 @@ out:
7227 void ubifs_debugging_exit(struct ubifs_info *c)
7229 failure_mode_exit(c);
7230 - vfree(c->dbg->buf);
7234 @@ -2501,7 +2841,7 @@ void dbg_debugfs_exit(void)
7235 static int open_debugfs_file(struct inode *inode, struct file *file)
7237 file->private_data = inode->i_private;
7239 + return nonseekable_open(inode, file);
7242 static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
7243 @@ -2512,18 +2852,15 @@ static ssize_t write_debugfs_file(struct
7245 if (file->f_path.dentry == d->dfs_dump_lprops)
7247 - else if (file->f_path.dentry == d->dfs_dump_budg) {
7248 - spin_lock(&c->space_lock);
7250 - spin_unlock(&c->space_lock);
7251 - } else if (file->f_path.dentry == d->dfs_dump_tnc) {
7252 + else if (file->f_path.dentry == d->dfs_dump_budg)
7253 + dbg_dump_budg(c, &c->bi);
7254 + else if (file->f_path.dentry == d->dfs_dump_tnc) {
7255 mutex_lock(&c->tnc_mutex);
7257 mutex_unlock(&c->tnc_mutex);
7265 @@ -2531,6 +2868,7 @@ static const struct file_operations dfs_
7266 .open = open_debugfs_file,
7267 .write = write_debugfs_file,
7268 .owner = THIS_MODULE,
7269 + .llseek = no_llseek,
7273 @@ -2553,40 +2891,38 @@ int dbg_debugfs_init_fs(struct ubifs_inf
7274 struct ubifs_debug_info *d = c->dbg;
7276 sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
7277 - d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir);
7278 - if (IS_ERR(d->dfs_dir)) {
7279 - err = PTR_ERR(d->dfs_dir);
7280 - ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
7281 - d->dfs_dir_name, err);
7282 + fname = d->dfs_dir_name;
7283 + dent = debugfs_create_dir(fname, dfs_rootdir);
7284 + if (!dent || IS_ERR(dent))
7287 + d->dfs_dir = dent;
7289 fname = "dump_lprops";
7290 - dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops);
7292 + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
7293 + if (!dent || IS_ERR(dent))
7295 d->dfs_dump_lprops = dent;
7297 fname = "dump_budg";
7298 - dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops);
7300 + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
7301 + if (!dent || IS_ERR(dent))
7303 d->dfs_dump_budg = dent;
7306 - dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops);
7308 + dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
7309 + if (!dent || IS_ERR(dent))
7311 d->dfs_dump_tnc = dent;
7316 - err = PTR_ERR(dent);
7317 - ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
7319 debugfs_remove_recursive(d->dfs_dir);
7321 + err = dent ? PTR_ERR(dent) : -ENODEV;
7322 + ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
7327 diff -uprN linux-2.6.28/fs/ubifs/debug.h ubifs-v2.6.28/fs/ubifs/debug.h
7328 --- linux-2.6.28/fs/ubifs/debug.h 2011-06-15 15:12:27.000000000 -0400
7329 +++ ubifs-v2.6.28/fs/ubifs/debug.h 2011-06-15 14:22:09.000000000 -0400
7331 #ifndef __UBIFS_DEBUG_H__
7332 #define __UBIFS_DEBUG_H__
7334 +/* Checking helper functions */
7335 +typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
7336 + struct ubifs_zbranch *zbr, void *priv);
7337 +typedef int (*dbg_znode_callback)(struct ubifs_info *c,
7338 + struct ubifs_znode *znode, void *priv);
7340 #ifdef CONFIG_UBIFS_FS_DEBUG
7342 +#include <linux/random.h>
7345 * ubifs_debug_info - per-FS debugging information.
7346 - * @buf: a buffer of LEB size, used for various purposes
7347 * @old_zroot: old index root - used by 'dbg_check_old_index()'
7348 * @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
7349 * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
7351 * @new_ihead_offs: used by debugging to check @c->ihead_offs
7353 * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()')
7354 - * @saved_free: saved free space (used by 'dbg_save_space_info()')
7355 + * @saved_bi: saved budgeting information
7356 + * @saved_free: saved amount of free space
7357 + * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt
7359 - * dfs_dir_name: name of debugfs directory containing this file-system's files
7360 - * dfs_dir: direntry object of the file-system debugfs directory
7361 - * dfs_dump_lprops: "dump lprops" debugfs knob
7362 - * dfs_dump_budg: "dump budgeting information" debugfs knob
7363 - * dfs_dump_tnc: "dump TNC" debugfs knob
7364 + * @dfs_dir_name: name of debugfs directory containing this file-system's files
7365 + * @dfs_dir: direntry object of the file-system debugfs directory
7366 + * @dfs_dump_lprops: "dump lprops" debugfs knob
7367 + * @dfs_dump_budg: "dump budgeting information" debugfs knob
7368 + * @dfs_dump_tnc: "dump TNC" debugfs knob
7370 struct ubifs_debug_info {
7372 struct ubifs_zbranch old_zroot;
7373 int old_zroot_level;
7374 unsigned long long old_zroot_sqnum;
7375 @@ -72,7 +80,9 @@ struct ubifs_debug_info {
7378 struct ubifs_lp_stats saved_lst;
7379 + struct ubifs_budg_info saved_bi;
7380 long long saved_free;
7381 + int saved_idx_gc_cnt;
7383 char dfs_dir_name[100];
7384 struct dentry *dfs_dir;
7385 @@ -97,23 +107,7 @@ struct ubifs_debug_info {
7389 -#define dbg_dump_stack() do { \
7390 - if (!dbg_failure_mode) \
7394 -/* Generic debugging messages */
7395 -#define dbg_msg(fmt, ...) do { \
7396 - spin_lock(&dbg_lock); \
7397 - printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \
7398 - __func__, ##__VA_ARGS__); \
7399 - spin_unlock(&dbg_lock); \
7402 -#define dbg_do_msg(typ, fmt, ...) do { \
7403 - if (ubifs_msg_flags & typ) \
7404 - dbg_msg(fmt, ##__VA_ARGS__); \
7406 +#define dbg_dump_stack() dump_stack()
7408 #define dbg_err(fmt, ...) do { \
7409 spin_lock(&dbg_lock); \
7410 @@ -133,86 +127,43 @@ const char *dbg_key_str1(const struct ub
7411 #define DBGKEY(key) dbg_key_str0(c, (key))
7412 #define DBGKEY1(key) dbg_key_str1(c, (key))
7414 -/* General messages */
7415 -#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__)
7416 +#define ubifs_dbg_msg(type, fmt, ...) do { \
7417 + spin_lock(&dbg_lock); \
7418 + pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \
7419 + spin_unlock(&dbg_lock); \
7422 +/* Just a debugging messages not related to any specific UBIFS subsystem */
7423 +#define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__)
7424 +/* General messages */
7425 +#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__)
7426 /* Additional journal messages */
7427 -#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__)
7429 +#define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__)
7430 /* Additional TNC messages */
7431 -#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__)
7433 +#define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__)
7434 /* Additional lprops messages */
7435 -#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__)
7437 +#define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__)
7438 /* Additional LEB find messages */
7439 -#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__)
7441 +#define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__)
7442 /* Additional mount messages */
7443 -#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__)
7445 +#define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__)
7446 /* Additional I/O messages */
7447 -#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__)
7449 +#define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__)
7450 /* Additional commit messages */
7451 -#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__)
7453 +#define dbg_cmt(fmt, ...) ubifs_dbg_msg("cmt", fmt, ##__VA_ARGS__)
7454 /* Additional budgeting messages */
7455 -#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__)
7457 +#define dbg_budg(fmt, ...) ubifs_dbg_msg("budg", fmt, ##__VA_ARGS__)
7458 /* Additional log messages */
7459 -#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__)
7461 +#define dbg_log(fmt, ...) ubifs_dbg_msg("log", fmt, ##__VA_ARGS__)
7462 /* Additional gc messages */
7463 -#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__)
7465 +#define dbg_gc(fmt, ...) ubifs_dbg_msg("gc", fmt, ##__VA_ARGS__)
7466 /* Additional scan messages */
7467 -#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__)
7469 +#define dbg_scan(fmt, ...) ubifs_dbg_msg("scan", fmt, ##__VA_ARGS__)
7470 /* Additional recovery messages */
7471 -#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__)
7472 +#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__)
7475 - * Debugging message type flags (must match msg_type_names in debug.c).
7477 - * UBIFS_MSG_GEN: general messages
7478 - * UBIFS_MSG_JNL: journal messages
7479 - * UBIFS_MSG_MNT: mount messages
7480 - * UBIFS_MSG_CMT: commit messages
7481 - * UBIFS_MSG_FIND: LEB find messages
7482 - * UBIFS_MSG_BUDG: budgeting messages
7483 - * UBIFS_MSG_GC: garbage collection messages
7484 - * UBIFS_MSG_TNC: TNC messages
7485 - * UBIFS_MSG_LP: lprops messages
7486 - * UBIFS_MSG_IO: I/O messages
7487 - * UBIFS_MSG_LOG: log messages
7488 - * UBIFS_MSG_SCAN: scan messages
7489 - * UBIFS_MSG_RCVRY: recovery messages
7492 - UBIFS_MSG_GEN = 0x1,
7493 - UBIFS_MSG_JNL = 0x2,
7494 - UBIFS_MSG_MNT = 0x4,
7495 - UBIFS_MSG_CMT = 0x8,
7496 - UBIFS_MSG_FIND = 0x10,
7497 - UBIFS_MSG_BUDG = 0x20,
7498 - UBIFS_MSG_GC = 0x40,
7499 - UBIFS_MSG_TNC = 0x80,
7500 - UBIFS_MSG_LP = 0x100,
7501 - UBIFS_MSG_IO = 0x200,
7502 - UBIFS_MSG_LOG = 0x400,
7503 - UBIFS_MSG_SCAN = 0x800,
7504 - UBIFS_MSG_RCVRY = 0x1000,
7507 -/* Debugging message type flags for each default debug message level */
7508 -#define UBIFS_MSG_LVL_0 0
7509 -#define UBIFS_MSG_LVL_1 0x1
7510 -#define UBIFS_MSG_LVL_2 0x7f
7511 -#define UBIFS_MSG_LVL_3 0xffff
7514 - * Debugging check flags (must match chk_names in debug.c).
7515 + * Debugging check flags.
7517 * UBIFS_CHK_GEN: general checks
7518 * UBIFS_CHK_TNC: check TNC
7519 @@ -233,32 +184,14 @@ enum {
7523 - * Special testing flags (must match tst_names in debug.c).
7524 + * Special testing flags.
7526 - * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method
7527 * UBIFS_TST_RCVRY: failure mode for recovery testing
7530 - UBIFS_TST_FORCE_IN_THE_GAPS = 0x2,
7531 UBIFS_TST_RCVRY = 0x4,
7534 -#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1
7535 -#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1
7536 -#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2
7537 -#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2
7538 -#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3
7539 -#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3
7541 -#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0
7544 -#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS
7545 -#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff
7547 -#define UBIFS_CHK_FLAGS_DEFAULT 0
7550 extern spinlock_t dbg_lock;
7552 extern unsigned int ubifs_msg_flags;
7553 @@ -271,6 +204,7 @@ void ubifs_debugging_exit(struct ubifs_i
7554 /* Dump functions */
7555 const char *dbg_ntype(int type);
7556 const char *dbg_cstate(int cmt_state);
7557 +const char *dbg_jhead(int jhead);
7558 const char *dbg_get_key_dump(const struct ubifs_info *c,
7559 const union ubifs_key *key);
7560 void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode);
7561 @@ -279,7 +213,7 @@ void dbg_dump_lpt_node(const struct ubif
7563 void dbg_dump_budget_req(const struct ubifs_budget_req *req);
7564 void dbg_dump_lstats(const struct ubifs_lp_stats *lst);
7565 -void dbg_dump_budg(struct ubifs_info *c);
7566 +void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi);
7567 void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp);
7568 void dbg_dump_lprops(struct ubifs_info *c);
7569 void dbg_dump_lpt_info(struct ubifs_info *c);
7570 @@ -293,11 +227,6 @@ void dbg_dump_tnc(struct ubifs_info *c);
7571 void dbg_dump_index(struct ubifs_info *c);
7572 void dbg_dump_lpt_lebs(const struct ubifs_info *c);
7574 -/* Checking helper functions */
7575 -typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
7576 - struct ubifs_zbranch *zbr, void *priv);
7577 -typedef int (*dbg_znode_callback)(struct ubifs_info *c,
7578 - struct ubifs_znode *znode, void *priv);
7579 int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
7580 dbg_znode_callback znode_cb, void *priv);
7582 @@ -318,23 +247,24 @@ int dbg_check_idx_size(struct ubifs_info
7583 int dbg_check_filesystem(struct ubifs_info *c);
7584 void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
7586 -int dbg_check_lprops(struct ubifs_info *c);
7587 int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
7589 +int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
7591 +int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head);
7592 +int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head);
7594 /* Force the use of in-the-gaps method for testing */
7596 -#define dbg_force_in_the_gaps_enabled \
7597 - (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS)
7599 +static inline int dbg_force_in_the_gaps_enabled(void)
7601 + return ubifs_chk_flags & UBIFS_CHK_GEN;
7603 int dbg_force_in_the_gaps(void);
7605 /* Failure mode for recovery testing */
7607 #define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY)
7609 #ifndef UBIFS_DBG_PRESERVE_UBI
7611 #define ubi_leb_read dbg_leb_read
7612 #define ubi_leb_write dbg_leb_write
7613 #define ubi_leb_change dbg_leb_change
7614 @@ -342,7 +272,6 @@ int dbg_force_in_the_gaps(void);
7615 #define ubi_leb_unmap dbg_leb_unmap
7616 #define ubi_is_mapped dbg_is_mapped
7617 #define ubi_leb_map dbg_leb_map
7621 int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
7622 @@ -389,85 +318,127 @@ void dbg_debugfs_exit_fs(struct ubifs_in
7623 __func__, __LINE__, current->pid); \
7626 -#define dbg_err(fmt, ...) do { \
7628 - ubifs_err(fmt, ##__VA_ARGS__); \
7629 +#define dbg_err(fmt, ...) do { \
7631 + ubifs_err(fmt, ##__VA_ARGS__); \
7634 -#define dbg_msg(fmt, ...) do { \
7636 - printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \
7637 - current->pid, __func__, ##__VA_ARGS__); \
7638 +#define ubifs_dbg_msg(fmt, ...) do { \
7640 + pr_debug(fmt "\n", ##__VA_ARGS__); \
7643 #define dbg_dump_stack()
7644 #define ubifs_assert_cmt_locked(c)
7646 -#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
7647 -#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
7648 -#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
7649 -#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
7650 -#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
7651 -#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
7652 -#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
7653 -#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
7654 -#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
7655 -#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
7656 -#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
7657 -#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
7658 -#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__)
7659 +#define dbg_msg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
7660 +#define dbg_gen(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
7661 +#define dbg_jnl(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
7662 +#define dbg_tnc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
7663 +#define dbg_lp(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
7664 +#define dbg_find(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
7665 +#define dbg_mnt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
7666 +#define dbg_io(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
7667 +#define dbg_cmt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
7668 +#define dbg_budg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
7669 +#define dbg_log(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
7670 +#define dbg_gc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
7671 +#define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
7672 +#define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
7674 #define DBGKEY(key) ((char *)(key))
7675 #define DBGKEY1(key) ((char *)(key))
7677 -#define ubifs_debugging_init(c) 0
7678 -#define ubifs_debugging_exit(c) ({})
7680 -#define dbg_ntype(type) ""
7681 -#define dbg_cstate(cmt_state) ""
7682 -#define dbg_get_key_dump(c, key) ({})
7683 -#define dbg_dump_inode(c, inode) ({})
7684 -#define dbg_dump_node(c, node) ({})
7685 -#define dbg_dump_lpt_node(c, node, lnum, offs) ({})
7686 -#define dbg_dump_budget_req(req) ({})
7687 -#define dbg_dump_lstats(lst) ({})
7688 -#define dbg_dump_budg(c) ({})
7689 -#define dbg_dump_lprop(c, lp) ({})
7690 -#define dbg_dump_lprops(c) ({})
7691 -#define dbg_dump_lpt_info(c) ({})
7692 -#define dbg_dump_leb(c, lnum) ({})
7693 -#define dbg_dump_znode(c, znode) ({})
7694 -#define dbg_dump_heap(c, heap, cat) ({})
7695 -#define dbg_dump_pnode(c, pnode, parent, iip) ({})
7696 -#define dbg_dump_tnc(c) ({})
7697 -#define dbg_dump_index(c) ({})
7698 -#define dbg_dump_lpt_lebs(c) ({})
7700 -#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0
7701 -#define dbg_old_index_check_init(c, zroot) 0
7702 -#define dbg_save_space_info(c) ({})
7703 -#define dbg_check_space_info(c) 0
7704 -#define dbg_check_old_index(c, zroot) 0
7705 -#define dbg_check_cats(c) 0
7706 -#define dbg_check_ltab(c) 0
7707 -#define dbg_chk_lpt_free_spc(c) 0
7708 -#define dbg_chk_lpt_sz(c, action, len) 0
7709 -#define dbg_check_synced_i_size(inode) 0
7710 -#define dbg_check_dir_size(c, dir) 0
7711 -#define dbg_check_tnc(c, x) 0
7712 -#define dbg_check_idx_size(c, idx_size) 0
7713 -#define dbg_check_filesystem(c) 0
7714 -#define dbg_check_heap(c, heap, cat, add_pos) ({})
7715 -#define dbg_check_lprops(c) 0
7716 -#define dbg_check_lpt_nodes(c, cnode, row, col) 0
7717 -#define dbg_force_in_the_gaps_enabled 0
7718 -#define dbg_force_in_the_gaps() 0
7719 -#define dbg_failure_mode 0
7721 -#define dbg_debugfs_init() 0
7722 -#define dbg_debugfs_exit()
7723 -#define dbg_debugfs_init_fs(c) 0
7724 -#define dbg_debugfs_exit_fs(c) 0
7725 +static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; }
7726 +static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; }
7727 +static inline const char *dbg_ntype(int type) { return ""; }
7728 +static inline const char *dbg_cstate(int cmt_state) { return ""; }
7729 +static inline const char *dbg_jhead(int jhead) { return ""; }
7730 +static inline const char *
7731 +dbg_get_key_dump(const struct ubifs_info *c,
7732 + const union ubifs_key *key) { return ""; }
7733 +static inline void dbg_dump_inode(const struct ubifs_info *c,
7734 + const struct inode *inode) { return; }
7735 +static inline void dbg_dump_node(const struct ubifs_info *c,
7736 + const void *node) { return; }
7737 +static inline void dbg_dump_lpt_node(const struct ubifs_info *c,
7738 + void *node, int lnum,
7739 + int offs) { return; }
7741 +dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; }
7743 +dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; }
7745 +dbg_dump_budg(struct ubifs_info *c,
7746 + const struct ubifs_budg_info *bi) { return; }
7747 +static inline void dbg_dump_lprop(const struct ubifs_info *c,
7748 + const struct ubifs_lprops *lp) { return; }
7749 +static inline void dbg_dump_lprops(struct ubifs_info *c) { return; }
7750 +static inline void dbg_dump_lpt_info(struct ubifs_info *c) { return; }
7751 +static inline void dbg_dump_leb(const struct ubifs_info *c,
7752 + int lnum) { return; }
7754 +dbg_dump_znode(const struct ubifs_info *c,
7755 + const struct ubifs_znode *znode) { return; }
7756 +static inline void dbg_dump_heap(struct ubifs_info *c,
7757 + struct ubifs_lpt_heap *heap,
7758 + int cat) { return; }
7759 +static inline void dbg_dump_pnode(struct ubifs_info *c,
7760 + struct ubifs_pnode *pnode,
7761 + struct ubifs_nnode *parent,
7762 + int iip) { return; }
7763 +static inline void dbg_dump_tnc(struct ubifs_info *c) { return; }
7764 +static inline void dbg_dump_index(struct ubifs_info *c) { return; }
7765 +static inline void dbg_dump_lpt_lebs(const struct ubifs_info *c) { return; }
7767 +static inline int dbg_walk_index(struct ubifs_info *c,
7768 + dbg_leaf_callback leaf_cb,
7769 + dbg_znode_callback znode_cb,
7770 + void *priv) { return 0; }
7771 +static inline void dbg_save_space_info(struct ubifs_info *c) { return; }
7772 +static inline int dbg_check_space_info(struct ubifs_info *c) { return 0; }
7773 +static inline int dbg_check_lprops(struct ubifs_info *c) { return 0; }
7775 +dbg_old_index_check_init(struct ubifs_info *c,
7776 + struct ubifs_zbranch *zroot) { return 0; }
7778 +dbg_check_old_index(struct ubifs_info *c,
7779 + struct ubifs_zbranch *zroot) { return 0; }
7780 +static inline int dbg_check_cats(struct ubifs_info *c) { return 0; }
7781 +static inline int dbg_check_ltab(struct ubifs_info *c) { return 0; }
7782 +static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c) { return 0; }
7783 +static inline int dbg_chk_lpt_sz(struct ubifs_info *c,
7784 + int action, int len) { return 0; }
7785 +static inline int dbg_check_synced_i_size(struct inode *inode) { return 0; }
7786 +static inline int dbg_check_dir_size(struct ubifs_info *c,
7787 + const struct inode *dir) { return 0; }
7788 +static inline int dbg_check_tnc(struct ubifs_info *c, int extra) { return 0; }
7789 +static inline int dbg_check_idx_size(struct ubifs_info *c,
7790 + long long idx_size) { return 0; }
7791 +static inline int dbg_check_filesystem(struct ubifs_info *c) { return 0; }
7792 +static inline void dbg_check_heap(struct ubifs_info *c,
7793 + struct ubifs_lpt_heap *heap,
7794 + int cat, int add_pos) { return; }
7795 +static inline int dbg_check_lpt_nodes(struct ubifs_info *c,
7796 + struct ubifs_cnode *cnode, int row, int col) { return 0; }
7797 +static inline int dbg_check_inode_size(struct ubifs_info *c,
7798 + const struct inode *inode,
7799 + loff_t size) { return 0; }
7801 +dbg_check_data_nodes_order(struct ubifs_info *c,
7802 + struct list_head *head) { return 0; }
7804 +dbg_check_nondata_nodes_order(struct ubifs_info *c,
7805 + struct list_head *head) { return 0; }
7807 +static inline int dbg_force_in_the_gaps(void) { return 0; }
7808 +#define dbg_force_in_the_gaps_enabled() 0
7809 +#define dbg_failure_mode 0
7811 +static inline int dbg_debugfs_init(void) { return 0; }
7812 +static inline void dbg_debugfs_exit(void) { return; }
7813 +static inline int dbg_debugfs_init_fs(struct ubifs_info *c) { return 0; }
7814 +static inline int dbg_debugfs_exit_fs(struct ubifs_info *c) { return 0; }
7816 #endif /* !CONFIG_UBIFS_FS_DEBUG */
7817 #endif /* !__UBIFS_DEBUG_H__ */
7818 diff -uprN linux-2.6.28/fs/ubifs/dir.c ubifs-v2.6.28/fs/ubifs/dir.c
7819 --- linux-2.6.28/fs/ubifs/dir.c 2011-06-15 15:12:27.000000000 -0400
7820 +++ ubifs-v2.6.28/fs/ubifs/dir.c 2011-06-15 14:22:09.000000000 -0400
7821 @@ -104,13 +104,13 @@ struct inode *ubifs_new_inode(struct ubi
7823 inode->i_flags |= (S_NOCMTIME);
7825 - inode->i_uid = current->fsuid;
7826 + inode->i_uid = current_fsuid();
7827 if (dir->i_mode & S_ISGID) {
7828 inode->i_gid = dir->i_gid;
7832 - inode->i_gid = current->fsgid;
7833 + inode->i_gid = current_fsgid();
7834 inode->i_mode = mode;
7835 inode->i_mtime = inode->i_atime = inode->i_ctime =
7836 ubifs_current_time(inode);
7837 @@ -628,7 +628,7 @@ static int ubifs_unlink(struct inode *di
7838 ubifs_release_budget(c, &req);
7840 /* We've deleted something - clean the "no space" flags */
7841 - c->nospace = c->nospace_rp = 0;
7842 + c->bi.nospace = c->bi.nospace_rp = 0;
7846 @@ -718,7 +718,7 @@ static int ubifs_rmdir(struct inode *dir
7847 ubifs_release_budget(c, &req);
7849 /* We've deleted something - clean the "no space" flags */
7850 - c->nospace = c->nospace_rp = 0;
7851 + c->bi.nospace = c->bi.nospace_rp = 0;
7855 diff -uprN linux-2.6.28/fs/ubifs/file.c ubifs-v2.6.28/fs/ubifs/file.c
7856 --- linux-2.6.28/fs/ubifs/file.c 2011-06-15 15:12:27.000000000 -0400
7857 +++ ubifs-v2.6.28/fs/ubifs/file.c 2011-06-15 14:22:09.000000000 -0400
7862 - * This file implements VFS file and inode operations of regular files, device
7863 + * This file implements VFS file and inode operations for regular files, device
7864 * nodes and symlinks as well as address space operations.
7866 - * UBIFS uses 2 page flags: PG_private and PG_checked. PG_private is set if the
7867 - * page is dirty and is used for budgeting purposes - dirty pages should not be
7868 - * budgeted. The PG_checked flag is set if full budgeting is required for the
7869 - * page e.g., when it corresponds to a file hole or it is just beyond the file
7870 - * size. The budgeting is done in 'ubifs_write_begin()', because it is OK to
7871 - * fail in this function, and the budget is released in 'ubifs_write_end()'. So
7872 - * the PG_private and PG_checked flags carry the information about how the page
7873 - * was budgeted, to make it possible to release the budget properly.
7875 - * A thing to keep in mind: inode's 'i_mutex' is locked in most VFS operations
7876 - * we implement. However, this is not true for '->writepage()', which might be
7877 - * called with 'i_mutex' unlocked. For example, when pdflush is performing
7878 - * write-back, it calls 'writepage()' with unlocked 'i_mutex', although the
7879 - * inode has 'I_LOCK' flag in this case. At "normal" work-paths 'i_mutex' is
7880 - * locked in '->writepage', e.g. in "sys_write -> alloc_pages -> direct reclaim
7881 - * path'. So, in '->writepage()' we are only guaranteed that the page is
7884 - * Similarly, 'i_mutex' does not have to be locked in readpage(), e.g.,
7885 - * readahead path does not have it locked ("sys_read -> generic_file_aio_read
7886 - * -> ondemand_readahead -> readpage"). In case of readahead, 'I_LOCK' flag is
7887 - * not set as well. However, UBIFS disables readahead.
7889 - * This, for example means that there might be 2 concurrent '->writepage()'
7890 - * calls for the same inode, but different inode dirty pages.
7891 + * UBIFS uses 2 page flags: @PG_private and @PG_checked. @PG_private is set if
7892 + * the page is dirty and is used for optimization purposes - dirty pages are
7893 + * not budgeted so the flag shows that 'ubifs_write_end()' should not release
7894 + * the budget for this page. The @PG_checked flag is set if full budgeting is
7895 + * required for the page e.g., when it corresponds to a file hole or it is
7896 + * beyond the file size. The budgeting is done in 'ubifs_write_begin()', because
7897 + * it is OK to fail in this function, and the budget is released in
7898 + * 'ubifs_write_end()'. So the @PG_private and @PG_checked flags carry
7899 + * information about how the page was budgeted, to make it possible to release
7900 + * the budget properly.
7902 + * A thing to keep in mind: inode @i_mutex is locked in most VFS operations we
7903 + * implement. However, this is not true for 'ubifs_writepage()', which may be
7904 + * called with @i_mutex unlocked. For example, when pdflush is doing background
7905 + * write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex. At "normal"
7906 + * work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g. in the
7907 + * "sys_write -> alloc_pages -> direct reclaim path". So, in 'ubifs_writepage()'
7908 + * we are only guaranteed that the page is locked.
7910 + * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
7911 + * read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
7912 + * ondemand_readahead -> readpage"). In case of readahead, @I_LOCK flag is not
7913 + * set as well. However, UBIFS disables readahead.
7917 @@ -213,7 +211,7 @@ static void release_new_page_budget(stru
7919 static void release_existing_page_budget(struct ubifs_info *c)
7921 - struct ubifs_budget_req req = { .dd_growth = c->page_budget};
7922 + struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget};
7924 ubifs_release_budget(c, &req);
7926 @@ -433,8 +431,9 @@ static int ubifs_write_begin(struct file
7929 ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size);
7930 + ubifs_assert(!c->ro_media && !c->ro_mount);
7932 - if (unlikely(c->ro_media))
7933 + if (unlikely(c->ro_error))
7936 /* Try out the fast-path part first */
7937 @@ -447,10 +446,12 @@ static int ubifs_write_begin(struct file
7938 if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) {
7940 * We change whole page so no need to load it. But we
7941 - * have to set the @PG_checked flag to make the further
7942 - * code the page is new. This might be not true, but it
7943 - * is better to budget more that to read the page from
7945 + * do not know whether this page exists on the media or
7946 + * not, so we assume the latter because it requires
7947 + * larger budget. The assumption is that it is better
7948 + * to budget a bit more than to read the page from the
7949 + * media. Thus, we are setting the @PG_checked flag
7952 SetPageChecked(page);
7954 @@ -496,8 +497,8 @@ static int ubifs_write_begin(struct file
7958 - * Whee, we aquired budgeting quickly - without involving
7959 - * garbage-collection, committing or forceing write-back. We return
7960 + * Whee, we acquired budgeting quickly - without involving
7961 + * garbage-collection, committing or forcing write-back. We return
7962 * with @ui->ui_mutex locked if we are appending pages, and unlocked
7963 * otherwise. This is an optimization (slightly hacky though).
7965 @@ -558,10 +559,11 @@ static int ubifs_write_end(struct file *
7966 dbg_gen("copied %d instead of %d, read page and repeat",
7968 cancel_budget(c, page, ui, appending);
7969 + ClearPageChecked(page);
7972 * Return 0 to force VFS to repeat the whole operation, or the
7973 - * error code if 'do_readpage()' failes.
7974 + * error code if 'do_readpage()' fails.
7976 copied = do_readpage(page);
7978 @@ -958,7 +960,7 @@ static int do_writepage(struct page *pag
7979 * whole index and correct all inode sizes, which is long an unacceptable.
7981 * To prevent situations like this, UBIFS writes pages back only if they are
7982 - * within last synchronized inode size, i.e. the the size which has been
7983 + * within the last synchronized inode size, i.e. the size which has been
7984 * written to the flash media last time. Otherwise, UBIFS forces inode
7985 * write-back, thus making sure the on-flash inode contains current inode size,
7986 * and then keeps writing pages back.
7987 @@ -1174,16 +1176,16 @@ static int do_truncation(struct ubifs_in
7988 ui->ui_size = inode->i_size;
7989 /* Truncation changes inode [mc]time */
7990 inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
7991 - /* The other attributes may be changed at the same time as well */
7992 + /* Other attributes may be changed at the same time as well */
7993 do_attr_changes(inode, attr);
7995 err = ubifs_jnl_truncate(c, inode, old_size, new_size);
7996 mutex_unlock(&ui->ui_mutex);
8000 ubifs_release_budget(c, &req);
8002 - c->nospace = c->nospace_rp = 0;
8003 + c->bi.nospace = c->bi.nospace_rp = 0;
8007 @@ -1312,6 +1314,13 @@ int ubifs_fsync(struct file *file, struc
8009 dbg_gen("syncing inode %lu", inode->i_ino);
8013 + * For some really strange reasons VFS does not filter out
8014 + * 'fsync()' for R/O mounted file-systems as per 2.6.39.
8019 * VFS has already synchronized dirty pages for this inode. Synchronize
8020 * the inode unless this is a 'datasync()' call.
8021 @@ -1440,8 +1449,8 @@ static int ubifs_releasepage(struct page
8025 - * mmap()d file has taken write protection fault and is being made
8026 - * writable. UBIFS must ensure page is budgeted for.
8027 + * mmap()d file has taken write protection fault and is being made writable.
8028 + * UBIFS must ensure page is budgeted for.
8030 static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
8032 @@ -1453,9 +1462,9 @@ static int ubifs_vm_page_mkwrite(struct
8034 dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index,
8035 i_size_read(inode));
8036 - ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY));
8037 + ubifs_assert(!c->ro_media && !c->ro_mount);
8039 - if (unlikely(c->ro_media))
8040 + if (unlikely(c->ro_error))
8044 @@ -1541,7 +1550,6 @@ static int ubifs_file_mmap(struct file *
8048 - /* 'generic_file_mmap()' takes care of NOMMU case */
8049 err = generic_file_mmap(file, vma);
8052 diff -uprN linux-2.6.28/fs/ubifs/find.c ubifs-v2.6.28/fs/ubifs/find.c
8053 --- linux-2.6.28/fs/ubifs/find.c 2008-12-24 18:26:37.000000000 -0500
8054 +++ ubifs-v2.6.28/fs/ubifs/find.c 2011-06-15 14:22:09.000000000 -0400
8055 @@ -252,8 +252,8 @@ int ubifs_find_dirty_leb(struct ubifs_in
8056 * But if the index takes fewer LEBs than it is reserved for it,
8057 * this function must avoid picking those reserved LEBs.
8059 - if (c->min_idx_lebs >= c->lst.idx_lebs) {
8060 - rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
8061 + if (c->bi.min_idx_lebs >= c->lst.idx_lebs) {
8062 + rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
8065 spin_unlock(&c->space_lock);
8066 @@ -276,7 +276,7 @@ int ubifs_find_dirty_leb(struct ubifs_in
8069 spin_lock(&c->space_lock);
8070 - exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs);
8071 + exclude_index = (c->bi.min_idx_lebs >= c->lst.idx_lebs);
8072 spin_unlock(&c->space_lock);
8075 @@ -478,7 +478,7 @@ const struct ubifs_lprops *do_find_free_
8076 * ubifs_find_free_space - find a data LEB with free space.
8077 * @c: the UBIFS file-system description object
8078 * @min_space: minimum amount of required free space
8079 - * @free: contains amount of free space in the LEB on exit
8080 + * @offs: contains offset of where free space starts on exit
8081 * @squeeze: whether to try to find space in a non-empty LEB first
8083 * This function looks for an LEB with at least @min_space bytes of free space.
8084 @@ -490,7 +490,7 @@ const struct ubifs_lprops *do_find_free_
8085 * failed to find a LEB with @min_space bytes of free space and other a negative
8086 * error codes in case of failure.
8088 -int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
8089 +int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
8092 const struct ubifs_lprops *lprops;
8093 @@ -501,8 +501,8 @@ int ubifs_find_free_space(struct ubifs_i
8095 /* Check if there are enough empty LEBs for commit */
8096 spin_lock(&c->space_lock);
8097 - if (c->min_idx_lebs > c->lst.idx_lebs)
8098 - rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
8099 + if (c->bi.min_idx_lebs > c->lst.idx_lebs)
8100 + rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
8103 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
8104 @@ -558,10 +558,10 @@ int ubifs_find_free_space(struct ubifs_i
8105 spin_unlock(&c->space_lock);
8108 - *free = lprops->free;
8109 + *offs = c->leb_size - lprops->free;
8110 ubifs_release_lprops(c);
8112 - if (*free == c->leb_size) {
8115 * Ensure that empty LEBs have been unmapped. They may not have
8116 * been, for example, because of an unclean unmount. Also
8117 @@ -573,8 +573,8 @@ int ubifs_find_free_space(struct ubifs_i
8121 - dbg_find("found LEB %d, free %d", lnum, *free);
8122 - ubifs_assert(*free >= min_space);
8123 + dbg_find("found LEB %d, free %d", lnum, c->leb_size - *offs);
8124 + ubifs_assert(*offs <= c->leb_size - min_space);
8128 diff -uprN linux-2.6.28/fs/ubifs/gc.c ubifs-v2.6.28/fs/ubifs/gc.c
8129 --- linux-2.6.28/fs/ubifs/gc.c 2011-06-15 15:12:27.000000000 -0400
8130 +++ ubifs-v2.6.28/fs/ubifs/gc.c 2011-06-15 14:22:09.000000000 -0400
8132 * have to waste large pieces of free space at the end of LEB B, because nodes
8133 * from LEB A would not fit. And the worst situation is when all nodes are of
8134 * maximum size. So dark watermark is the amount of free + dirty space in LEB
8135 - * which are guaranteed to be reclaimable. If LEB has less space, the GC migh
8136 + * which are guaranteed to be reclaimable. If LEB has less space, the GC might
8137 * be unable to reclaim it. So, LEBs with free + dirty greater than dark
8138 * watermark are "good" LEBs from GC's point of few. The other LEBs are not so
8139 * good, and GC takes extra care when moving them.
8142 #include <linux/pagemap.h>
8143 +#include <linux/list_sort.h>
8147 - * GC tries to optimize the way it fit nodes to available space, and it sorts
8148 - * nodes a little. The below constants are watermarks which define "large",
8149 - * "medium", and "small" nodes.
8151 -#define MEDIUM_NODE_WM (UBIFS_BLOCK_SIZE / 4)
8152 -#define SMALL_NODE_WM UBIFS_MAX_DENT_NODE_SZ
8155 - * GC may need to move more then one LEB to make progress. The below constants
8156 + * GC may need to move more than one LEB to make progress. The below constants
8157 * define "soft" and "hard" limits on the number of LEBs the garbage collector
8160 @@ -106,6 +99,10 @@ static int switch_gc_head(struct ubifs_i
8164 + err = ubifs_wbuf_sync_nolock(wbuf);
8168 err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0);
8171 @@ -116,138 +113,243 @@ static int switch_gc_head(struct ubifs_i
8175 - * joinup - bring data nodes for an inode together.
8176 - * @c: UBIFS file-system description object
8177 - * @sleb: describes scanned LEB
8178 - * @inum: inode number
8179 - * @blk: block number
8180 - * @data: list to which to add data nodes
8182 - * This function looks at the first few nodes in the scanned LEB @sleb and adds
8183 - * them to @data if they are data nodes from @inum and have a larger block
8184 - * number than @blk. This function returns %0 on success and a negative error
8185 - * code on failure.
8186 + * data_nodes_cmp - compare 2 data nodes.
8187 + * @priv: UBIFS file-system description object
8188 + * @a: first data node
8189 + * @a: second data node
8191 + * This function compares data nodes @a and @b. Returns %1 if @a has greater
8192 + * inode or block number, and %-1 otherwise.
8194 -static int joinup(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ino_t inum,
8195 - unsigned int blk, struct list_head *data)
8196 +static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
8198 - int err, cnt = 6, lnum = sleb->lnum, offs;
8199 - struct ubifs_scan_node *snod, *tmp;
8200 - union ubifs_key *key;
8201 + ino_t inuma, inumb;
8202 + struct ubifs_info *c = priv;
8203 + struct ubifs_scan_node *sa, *sb;
8209 + sa = list_entry(a, struct ubifs_scan_node, list);
8210 + sb = list_entry(b, struct ubifs_scan_node, list);
8212 + ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY);
8213 + ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY);
8214 + ubifs_assert(sa->type == UBIFS_DATA_NODE);
8215 + ubifs_assert(sb->type == UBIFS_DATA_NODE);
8217 + inuma = key_inum(c, &sa->key);
8218 + inumb = key_inum(c, &sb->key);
8220 + if (inuma == inumb) {
8221 + unsigned int blka = key_block(c, &sa->key);
8222 + unsigned int blkb = key_block(c, &sb->key);
8226 + } else if (inuma <= inumb)
8229 - list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
8231 - if (key_inum(c, key) == inum &&
8232 - key_type(c, key) == UBIFS_DATA_KEY &&
8233 - key_block(c, key) > blk) {
8234 - offs = snod->offs;
8235 - err = ubifs_tnc_has_node(c, key, 0, lnum, offs, 0);
8238 - list_del(&snod->list);
8240 - list_add_tail(&snod->list, data);
8241 - blk = key_block(c, key);
8245 - } else if (--cnt == 0)
8253 + * nondata_nodes_cmp - compare 2 non-data nodes.
8254 + * @priv: UBIFS file-system description object
8258 + * This function compares nodes @a and @b. It makes sure that inode nodes go
8259 + * first and sorted by length in descending order. Directory entry nodes go
8260 + * after inode nodes and are sorted in ascending hash valuer order.
8262 +static int nondata_nodes_cmp(void *priv, struct list_head *a,
8263 + struct list_head *b)
8265 + ino_t inuma, inumb;
8266 + struct ubifs_info *c = priv;
8267 + struct ubifs_scan_node *sa, *sb;
8273 + sa = list_entry(a, struct ubifs_scan_node, list);
8274 + sb = list_entry(b, struct ubifs_scan_node, list);
8276 + ubifs_assert(key_type(c, &sa->key) != UBIFS_DATA_KEY &&
8277 + key_type(c, &sb->key) != UBIFS_DATA_KEY);
8278 + ubifs_assert(sa->type != UBIFS_DATA_NODE &&
8279 + sb->type != UBIFS_DATA_NODE);
8281 + /* Inodes go before directory entries */
8282 + if (sa->type == UBIFS_INO_NODE) {
8283 + if (sb->type == UBIFS_INO_NODE)
8284 + return sb->len - sa->len;
8287 + if (sb->type == UBIFS_INO_NODE)
8290 + ubifs_assert(key_type(c, &sa->key) == UBIFS_DENT_KEY ||
8291 + key_type(c, &sa->key) == UBIFS_XENT_KEY);
8292 + ubifs_assert(key_type(c, &sb->key) == UBIFS_DENT_KEY ||
8293 + key_type(c, &sb->key) == UBIFS_XENT_KEY);
8294 + ubifs_assert(sa->type == UBIFS_DENT_NODE ||
8295 + sa->type == UBIFS_XENT_NODE);
8296 + ubifs_assert(sb->type == UBIFS_DENT_NODE ||
8297 + sb->type == UBIFS_XENT_NODE);
8299 + inuma = key_inum(c, &sa->key);
8300 + inumb = key_inum(c, &sb->key);
8302 + if (inuma == inumb) {
8303 + uint32_t hasha = key_hash(c, &sa->key);
8304 + uint32_t hashb = key_hash(c, &sb->key);
8306 + if (hasha <= hashb)
8308 + } else if (inuma <= inumb)
8315 - * move_nodes - move nodes.
8316 + * sort_nodes - sort nodes for GC.
8317 * @c: UBIFS file-system description object
8318 - * @sleb: describes nodes to move
8320 - * This function moves valid nodes from data LEB described by @sleb to the GC
8321 - * journal head. The obsolete nodes are dropped.
8323 - * When moving nodes we have to deal with classical bin-packing problem: the
8324 - * space in the current GC journal head LEB and in @c->gc_lnum are the "bins",
8325 - * where the nodes in the @sleb->nodes list are the elements which should be
8326 - * fit optimally to the bins. This function uses the "first fit decreasing"
8327 - * strategy, although it does not really sort the nodes but just split them on
8328 - * 3 classes - large, medium, and small, so they are roughly sorted.
8329 + * @sleb: describes nodes to sort and contains the result on exit
8330 + * @nondata: contains non-data nodes on exit
8331 + * @min: minimum node size is returned here
8333 + * This function sorts the list of inodes to garbage collect. First of all, it
8334 + * kills obsolete nodes and separates data and non-data nodes to the
8335 + * @sleb->nodes and @nondata lists correspondingly.
8337 + * Data nodes are then sorted in block number order - this is important for
8338 + * bulk-read; data nodes with lower inode number go before data nodes with
8339 + * higher inode number, and data nodes with lower block number go before data
8340 + * nodes with higher block number;
8342 + * Non-data nodes are sorted as follows.
8343 + * o First go inode nodes - they are sorted in descending length order.
8344 + * o Then go directory entry nodes - they are sorted in hash order, which
8345 + * should supposedly optimize 'readdir()'. Direntry nodes with lower parent
8346 + * inode number go before direntry nodes with higher parent inode number,
8347 + * and direntry nodes with lower name hash values go before direntry nodes
8348 + * with higher name hash values.
8350 - * This function returns zero in case of success, %-EAGAIN if commit is
8351 - * required, and other negative error codes in case of other failures.
8352 + * This function returns zero in case of success and a negative error code in
8353 + * case of failure.
8355 -static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
8356 +static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
8357 + struct list_head *nondata, int *min)
8360 struct ubifs_scan_node *snod, *tmp;
8361 - struct list_head data, large, medium, small;
8362 - struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
8363 - int avail, err, min = INT_MAX;
8364 - unsigned int blk = 0;
8367 - INIT_LIST_HEAD(&data);
8368 - INIT_LIST_HEAD(&large);
8369 - INIT_LIST_HEAD(&medium);
8370 - INIT_LIST_HEAD(&small);
8372 - while (!list_empty(&sleb->nodes)) {
8373 - struct list_head *lst = sleb->nodes.next;
8375 - snod = list_entry(lst, struct ubifs_scan_node, list);
8377 - ubifs_assert(snod->type != UBIFS_IDX_NODE);
8378 - ubifs_assert(snod->type != UBIFS_REF_NODE);
8379 - ubifs_assert(snod->type != UBIFS_CS_NODE);
8383 + /* Separate data nodes and non-data nodes */
8384 + list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
8385 + ubifs_assert(snod->type == UBIFS_INO_NODE ||
8386 + snod->type == UBIFS_DATA_NODE ||
8387 + snod->type == UBIFS_DENT_NODE ||
8388 + snod->type == UBIFS_XENT_NODE ||
8389 + snod->type == UBIFS_TRUN_NODE);
8391 + if (snod->type != UBIFS_INO_NODE &&
8392 + snod->type != UBIFS_DATA_NODE &&
8393 + snod->type != UBIFS_DENT_NODE &&
8394 + snod->type != UBIFS_XENT_NODE) {
8395 + /* Probably truncation node, zap it */
8396 + list_del(&snod->list);
8401 + ubifs_assert(key_type(c, &snod->key) == UBIFS_DATA_KEY ||
8402 + key_type(c, &snod->key) == UBIFS_INO_KEY ||
8403 + key_type(c, &snod->key) == UBIFS_DENT_KEY ||
8404 + key_type(c, &snod->key) == UBIFS_XENT_KEY);
8406 err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum,
8414 /* The node is obsolete, remove it from the list */
8415 + list_del(&snod->list);
8421 - * Sort the list of nodes so that data nodes go first, large
8422 - * nodes go second, and small nodes go last.
8424 - if (key_type(c, &snod->key) == UBIFS_DATA_KEY) {
8425 - if (inum != key_inum(c, &snod->key)) {
8428 - * Try to move data nodes from the same
8431 - err = joinup(c, sleb, inum, blk, &data);
8435 - inum = key_inum(c, &snod->key);
8436 - blk = key_block(c, &snod->key);
8438 - list_add_tail(lst, &data);
8439 - } else if (snod->len > MEDIUM_NODE_WM)
8440 - list_add_tail(lst, &large);
8441 - else if (snod->len > SMALL_NODE_WM)
8442 - list_add_tail(lst, &medium);
8444 - list_add_tail(lst, &small);
8446 - /* And find the smallest node */
8447 - if (snod->len < min)
8449 + if (snod->len < *min)
8452 + if (key_type(c, &snod->key) != UBIFS_DATA_KEY)
8453 + list_move_tail(&snod->list, nondata);
8457 - * Join the tree lists so that we'd have one roughly sorted list
8458 - * ('large' will be the head of the joined list).
8460 - list_splice(&data, &large);
8461 - list_splice(&medium, large.prev);
8462 - list_splice(&small, large.prev);
8463 + /* Sort data and non-data nodes */
8464 + list_sort(c, &sleb->nodes, &data_nodes_cmp);
8465 + list_sort(c, nondata, &nondata_nodes_cmp);
8467 + err = dbg_check_data_nodes_order(c, &sleb->nodes);
8470 + err = dbg_check_nondata_nodes_order(c, nondata);
8477 + * move_node - move a node.
8478 + * @c: UBIFS file-system description object
8479 + * @sleb: describes the LEB to move nodes from
8480 + * @snod: the mode to move
8481 + * @wbuf: write-buffer to move node to
8483 + * This function moves node @snod to @wbuf, changes TNC correspondingly, and
8484 + * destroys @snod. Returns zero in case of success and a negative error code in
8485 + * case of failure.
8487 +static int move_node(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
8488 + struct ubifs_scan_node *snod, struct ubifs_wbuf *wbuf)
8490 + int err, new_lnum = wbuf->lnum, new_offs = wbuf->offs + wbuf->used;
8493 + err = ubifs_wbuf_write_nolock(wbuf, snod->node, snod->len);
8497 + err = ubifs_tnc_replace(c, &snod->key, sleb->lnum,
8498 + snod->offs, new_lnum, new_offs,
8500 + list_del(&snod->list);
8506 + * move_nodes - move nodes.
8507 + * @c: UBIFS file-system description object
8508 + * @sleb: describes the LEB to move nodes from
8510 + * This function moves valid nodes from data LEB described by @sleb to the GC
8511 + * journal head. This function returns zero in case of success, %-EAGAIN if
8512 + * commit is required, and other negative error codes in case of other
8515 +static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
8518 + LIST_HEAD(nondata);
8519 + struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
8521 if (wbuf->lnum == -1) {
8523 @@ -256,42 +358,59 @@ static int move_nodes(struct ubifs_info
8525 err = switch_gc_head(c);
8531 + err = sort_nodes(c, sleb, &nondata, &min);
8535 /* Write nodes to their new location. Use the first-fit strategy */
8537 - avail = c->leb_size - wbuf->offs - wbuf->used;
8538 - list_for_each_entry_safe(snod, tmp, &large, list) {
8539 - int new_lnum, new_offs;
8541 + struct ubifs_scan_node *snod, *tmp;
8543 + /* Move data nodes */
8544 + list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
8545 + avail = c->leb_size - wbuf->offs - wbuf->used;
8546 + if (snod->len > avail)
8548 + * Do not skip data nodes in order to optimize
8553 + err = move_node(c, sleb, snod, wbuf);
8558 + /* Move non-data nodes */
8559 + list_for_each_entry_safe(snod, tmp, &nondata, list) {
8560 + avail = c->leb_size - wbuf->offs - wbuf->used;
8564 - if (snod->len > avail)
8565 - /* This node does not fit */
8566 + if (snod->len > avail) {
8568 + * Keep going only if this is an inode with
8569 + * some data. Otherwise stop and switch the GC
8570 + * head. IOW, we assume that data-less inode
8571 + * nodes and direntry nodes are roughly of the
8574 + if (key_type(c, &snod->key) == UBIFS_DENT_KEY ||
8575 + snod->len == UBIFS_INO_NODE_SZ)
8582 - new_lnum = wbuf->lnum;
8583 - new_offs = wbuf->offs + wbuf->used;
8584 - err = ubifs_wbuf_write_nolock(wbuf, snod->node,
8586 + err = move_node(c, sleb, snod, wbuf);
8589 - err = ubifs_tnc_replace(c, &snod->key, sleb->lnum,
8590 - snod->offs, new_lnum, new_offs,
8595 - avail = c->leb_size - wbuf->offs - wbuf->used;
8596 - list_del(&snod->list);
8600 - if (list_empty(&large))
8601 + if (list_empty(&sleb->nodes) && list_empty(&nondata))
8605 @@ -306,10 +425,7 @@ static int move_nodes(struct ubifs_info
8609 - list_for_each_entry_safe(snod, tmp, &large, list) {
8610 - list_del(&snod->list);
8613 + list_splice_tail(&nondata, &sleb->nodes);
8617 @@ -361,11 +477,42 @@ int ubifs_garbage_collect_leb(struct ubi
8618 ubifs_assert(c->gc_lnum != lnum);
8619 ubifs_assert(wbuf->lnum != lnum);
8621 + if (lp->free + lp->dirty == c->leb_size) {
8622 + /* Special case - a free LEB */
8623 + dbg_gc("LEB %d is free, return it", lp->lnum);
8624 + ubifs_assert(!(lp->flags & LPROPS_INDEX));
8626 + if (lp->free != c->leb_size) {
8628 + * Write buffers must be sync'd before unmapping
8629 + * freeable LEBs, because one of them may contain data
8630 + * which obsoletes something in 'lp->pnum'.
8632 + err = gc_sync_wbufs(c);
8635 + err = ubifs_change_one_lp(c, lp->lnum, c->leb_size,
8640 + err = ubifs_leb_unmap(c, lp->lnum);
8644 + if (c->gc_lnum == -1) {
8645 + c->gc_lnum = lnum;
8646 + return LEB_RETAINED;
8653 * We scan the entire LEB even though we only really need to scan up to
8654 * (c->leb_size - lp->free).
8656 - sleb = ubifs_scan(c, lnum, 0, c->sbuf);
8657 + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0);
8659 return PTR_ERR(sleb);
8661 @@ -504,13 +651,14 @@ int ubifs_garbage_collect(struct ubifs_i
8662 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
8664 ubifs_assert_cmt_locked(c);
8665 + ubifs_assert(!c->ro_media && !c->ro_mount);
8667 if (ubifs_gc_should_commit(c))
8670 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
8672 - if (c->ro_media) {
8673 + if (c->ro_error) {
8677 @@ -569,51 +717,18 @@ int ubifs_garbage_collect(struct ubifs_i
8678 "(min. space %d)", lp.lnum, lp.free, lp.dirty,
8679 lp.free + lp.dirty, min_space);
8681 - if (lp.free + lp.dirty == c->leb_size) {
8682 - /* An empty LEB was returned */
8683 - dbg_gc("LEB %d is free, return it", lp.lnum);
8685 - * ubifs_find_dirty_leb() doesn't return freeable index
8688 - ubifs_assert(!(lp.flags & LPROPS_INDEX));
8689 - if (lp.free != c->leb_size) {
8691 - * Write buffers must be sync'd before
8692 - * unmapping freeable LEBs, because one of them
8693 - * may contain data which obsoletes something
8696 - ret = gc_sync_wbufs(c);
8699 - ret = ubifs_change_one_lp(c, lp.lnum,
8700 - c->leb_size, 0, 0, 0,
8705 - ret = ubifs_leb_unmap(c, lp.lnum);
8712 space_before = c->leb_size - wbuf->offs - wbuf->used;
8713 if (wbuf->lnum == -1)
8716 ret = ubifs_garbage_collect_leb(c, &lp);
8718 - if (ret == -EAGAIN || ret == -ENOSPC) {
8719 + if (ret == -EAGAIN) {
8721 - * These codes are not errors, so we have to
8722 - * return the LEB to lprops. But if the
8723 - * 'ubifs_return_leb()' function fails, its
8724 - * failure code is propagated to the caller
8725 - * instead of the original '-EAGAIN' or
8727 + * This is not error, so we have to return the
8728 + * LEB to lprops. But if 'ubifs_return_leb()'
8729 + * fails, its failure code is propagated to the
8730 + * caller instead of the original '-EAGAIN'.
8732 err = ubifs_return_leb(c, lp.lnum);
8734 @@ -703,8 +818,8 @@ out_unlock:
8736 ubifs_assert(ret < 0);
8737 ubifs_assert(ret != -ENOSPC && ret != -EAGAIN);
8738 - ubifs_ro_mode(c, ret);
8739 ubifs_wbuf_sync_nolock(wbuf);
8740 + ubifs_ro_mode(c, ret);
8741 mutex_unlock(&wbuf->io_mutex);
8742 ubifs_return_leb(c, lp.lnum);
8744 diff -uprN linux-2.6.28/fs/ubifs/io.c ubifs-v2.6.28/fs/ubifs/io.c
8745 --- linux-2.6.28/fs/ubifs/io.c 2011-06-15 15:12:27.000000000 -0400
8746 +++ ubifs-v2.6.28/fs/ubifs/io.c 2011-06-15 14:22:09.000000000 -0400
8748 * buffer is full or when it is not used for some time (by timer). This is
8749 * similar to the mechanism is used by JFFS2.
8751 + * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum
8752 + * write size (@c->max_write_size). The latter is the maximum amount of bytes
8753 + * the underlying flash is able to program at a time, and writing in
8754 + * @c->max_write_size units should presumably be faster. Obviously,
8755 + * @c->min_io_size <= @c->max_write_size. Write-buffers are of
8756 + * @c->max_write_size bytes in size for maximum performance. However, when a
8757 + * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size
8758 + * boundary) which contains data is written, not the whole write-buffer,
8759 + * because this is more space-efficient.
8761 + * This optimization adds few complications to the code. Indeed, on the one
8762 + * hand, we want to write in optimal @c->max_write_size bytes chunks, which
8763 + * also means aligning writes at the @c->max_write_size bytes offsets. On the
8764 + * other hand, we do not want to waste space when synchronizing the write
8765 + * buffer, so during synchronization we writes in smaller chunks. And this makes
8766 + * the next write offset to be not aligned to @c->max_write_size bytes. So the
8767 + * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned
8768 + * to @c->max_write_size bytes again. We do this by temporarily shrinking
8769 + * write-buffer size (@wbuf->size).
8771 * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by
8772 * mutexes defined inside these objects. Since sometimes upper-level code
8773 * has to lock the write-buffer (e.g. journal space reservation code), many
8775 * UBIFS uses padding when it pads to the next min. I/O unit. In this case it
8776 * uses padding nodes or padding bytes, if the padding node does not fit.
8778 - * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes
8779 - * every time they are read from the flash media.
8780 + * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when
8781 + * they are read from the flash media.
8784 #include <linux/crc32.h>
8787 void ubifs_ro_mode(struct ubifs_info *c, int err)
8789 - if (!c->ro_media) {
8791 + if (!c->ro_error) {
8793 c->no_chk_data_crc = 0;
8794 + c->vfs_sb->s_flags |= MS_RDONLY;
8795 ubifs_warn("switched to read-only mode, error %d", err);
8798 @@ -86,8 +107,12 @@ void ubifs_ro_mode(struct ubifs_info *c,
8799 * This function may skip data nodes CRC checking if @c->no_chk_data_crc is
8800 * true, which is controlled by corresponding UBIFS mount option. However, if
8801 * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is
8802 - * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is
8803 - * ignored and CRC is checked.
8804 + * checked. Similarly, if @c->mounting or @c->remounting_rw is true (we are
8805 + * mounting or re-mounting to R/W mode), @c->no_chk_data_crc is ignored and CRC
8806 + * is checked. This is because during mounting or re-mounting from R/O mode to
8807 + * R/W mode we may read journal nodes (when replying the journal or doing the
8808 + * recovery) and the journal nodes may potentially be corrupted, so checking is
8811 * This function returns zero in case of success and %-EUCLEAN in case of bad
8813 @@ -129,8 +154,8 @@ int ubifs_check_node(const struct ubifs_
8814 node_len > c->ranges[type].max_len)
8817 - if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc &&
8818 - c->no_chk_data_crc)
8819 + if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->mounting &&
8820 + !c->remounting_rw && c->no_chk_data_crc)
8823 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
8824 @@ -297,7 +322,7 @@ static enum hrtimer_restart wbuf_timer_c
8826 struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer);
8828 - dbg_io("jhead %d", wbuf->jhead);
8829 + dbg_io("jhead %s", dbg_jhead(wbuf->jhead));
8830 wbuf->need_sync = 1;
8831 wbuf->c->need_wbuf_sync = 1;
8832 ubifs_wake_up_bgt(wbuf->c);
8833 @@ -314,7 +339,8 @@ static void new_wbuf_timer_nolock(struct
8837 - dbg_io("set timer for jhead %d, %llu-%llu millisecs", wbuf->jhead,
8838 + dbg_io("set timer for jhead %s, %llu-%llu millisecs",
8839 + dbg_jhead(wbuf->jhead),
8840 div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC),
8841 div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta,
8843 @@ -340,41 +366,73 @@ static void cancel_wbuf_timer_nolock(str
8845 * This function synchronizes write-buffer @buf and returns zero in case of
8846 * success or a negative error code in case of failure.
8848 + * Note, although write-buffers are of @c->max_write_size, this function does
8849 + * not necessarily writes all @c->max_write_size bytes to the flash. Instead,
8850 + * if the write-buffer is only partially filled with data, only the used part
8851 + * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized.
8852 + * This way we waste less space.
8854 int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
8856 struct ubifs_info *c = wbuf->c;
8858 + int err, dirt, sync_len;
8860 cancel_wbuf_timer_nolock(wbuf);
8861 if (!wbuf->used || wbuf->lnum == -1)
8862 /* Write-buffer is empty or not seeked */
8865 - dbg_io("LEB %d:%d, %d bytes, jhead %d",
8866 - wbuf->lnum, wbuf->offs, wbuf->used, wbuf->jhead);
8867 - ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY));
8868 + dbg_io("LEB %d:%d, %d bytes, jhead %s",
8869 + wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead));
8870 ubifs_assert(!(wbuf->avail & 7));
8871 - ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size);
8872 + ubifs_assert(wbuf->offs + wbuf->size <= c->leb_size);
8873 + ubifs_assert(wbuf->size >= c->min_io_size);
8874 + ubifs_assert(wbuf->size <= c->max_write_size);
8875 + ubifs_assert(wbuf->size % c->min_io_size == 0);
8876 + ubifs_assert(!c->ro_media && !c->ro_mount);
8877 + if (c->leb_size - wbuf->offs >= c->max_write_size)
8878 + ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
8884 - ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail);
8886 + * Do not write whole write buffer but write only the minimum necessary
8887 + * amount of min. I/O units.
8889 + sync_len = ALIGN(wbuf->used, c->min_io_size);
8890 + dirt = sync_len - wbuf->used;
8892 + ubifs_pad(c, wbuf->buf + wbuf->used, dirt);
8893 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
8894 - c->min_io_size, wbuf->dtype);
8895 + sync_len, wbuf->dtype);
8897 ubifs_err("cannot write %d bytes to LEB %d:%d",
8898 - c->min_io_size, wbuf->lnum, wbuf->offs);
8899 + sync_len, wbuf->lnum, wbuf->offs);
8904 - dirt = wbuf->avail;
8906 spin_lock(&wbuf->lock);
8907 - wbuf->offs += c->min_io_size;
8908 - wbuf->avail = c->min_io_size;
8909 + wbuf->offs += sync_len;
8911 + * Now @wbuf->offs is not necessarily aligned to @c->max_write_size.
8912 + * But our goal is to optimize writes and make sure we write in
8913 + * @c->max_write_size chunks and to @c->max_write_size-aligned offset.
8914 + * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make
8915 + * sure that @wbuf->offs + @wbuf->size is aligned to
8916 + * @c->max_write_size. This way we make sure that after next
8917 + * write-buffer flush we are again at the optimal offset (aligned to
8918 + * @c->max_write_size).
8920 + if (c->leb_size - wbuf->offs < c->max_write_size)
8921 + wbuf->size = c->leb_size - wbuf->offs;
8922 + else if (wbuf->offs & (c->max_write_size - 1))
8923 + wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs;
8925 + wbuf->size = c->max_write_size;
8926 + wbuf->avail = wbuf->size;
8929 spin_unlock(&wbuf->lock);
8930 @@ -393,31 +451,31 @@ int ubifs_wbuf_sync_nolock(struct ubifs_
8933 * This function targets the write-buffer to logical eraseblock @lnum:@offs.
8934 - * The write-buffer is synchronized if it is not empty. Returns zero in case of
8935 - * success and a negative error code in case of failure.
8936 + * The write-buffer has to be empty. Returns zero in case of success and a
8937 + * negative error code in case of failure.
8939 int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
8942 const struct ubifs_info *c = wbuf->c;
8944 - dbg_io("LEB %d:%d, jhead %d", lnum, offs, wbuf->jhead);
8945 + dbg_io("LEB %d:%d, jhead %s", lnum, offs, dbg_jhead(wbuf->jhead));
8946 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt);
8947 ubifs_assert(offs >= 0 && offs <= c->leb_size);
8948 ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
8949 ubifs_assert(lnum != wbuf->lnum);
8951 - if (wbuf->used > 0) {
8952 - int err = ubifs_wbuf_sync_nolock(wbuf);
8957 + ubifs_assert(wbuf->used == 0);
8959 spin_lock(&wbuf->lock);
8962 - wbuf->avail = c->min_io_size;
8963 + if (c->leb_size - wbuf->offs < c->max_write_size)
8964 + wbuf->size = c->leb_size - wbuf->offs;
8965 + else if (wbuf->offs & (c->max_write_size - 1))
8966 + wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs;
8968 + wbuf->size = c->max_write_size;
8969 + wbuf->avail = wbuf->size;
8971 spin_unlock(&wbuf->lock);
8972 wbuf->dtype = dtype;
8973 @@ -437,11 +495,12 @@ int ubifs_bg_wbufs_sync(struct ubifs_inf
8977 + ubifs_assert(!c->ro_media && !c->ro_mount);
8978 if (!c->need_wbuf_sync)
8980 c->need_wbuf_sync = 0;
8982 - if (c->ro_media) {
8983 + if (c->ro_error) {
8987 @@ -496,8 +555,9 @@ out_timers:
8989 * This function writes data to flash via write-buffer @wbuf. This means that
8990 * the last piece of the node won't reach the flash media immediately if it
8991 - * does not take whole minimal I/O unit. Instead, the node will sit in RAM
8992 - * until the write-buffer is synchronized (e.g., by timer).
8993 + * does not take whole max. write unit (@c->max_write_size). Instead, the node
8994 + * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or
8995 + * because more data are appended to the write-buffer).
8997 * This function returns zero in case of success and a negative error code in
8998 * case of failure. If the node cannot be written because there is no more
8999 @@ -506,16 +566,23 @@ out_timers:
9000 int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
9002 struct ubifs_info *c = wbuf->c;
9003 - int err, written, n, aligned_len = ALIGN(len, 8), offs;
9004 + int err, written, n, aligned_len = ALIGN(len, 8);
9006 - dbg_io("%d bytes (%s) to jhead %d wbuf at LEB %d:%d", len,
9007 - dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->jhead,
9008 - wbuf->lnum, wbuf->offs + wbuf->used);
9009 + dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len,
9010 + dbg_ntype(((struct ubifs_ch *)buf)->node_type),
9011 + dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs + wbuf->used);
9012 ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt);
9013 ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0);
9014 ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size);
9015 - ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size);
9016 + ubifs_assert(wbuf->avail > 0 && wbuf->avail <= wbuf->size);
9017 + ubifs_assert(wbuf->size >= c->min_io_size);
9018 + ubifs_assert(wbuf->size <= c->max_write_size);
9019 + ubifs_assert(wbuf->size % c->min_io_size == 0);
9020 ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
9021 + ubifs_assert(!c->ro_media && !c->ro_mount);
9022 + ubifs_assert(!c->space_fixup);
9023 + if (c->leb_size - wbuf->offs >= c->max_write_size)
9024 + ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
9026 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
9028 @@ -524,7 +591,7 @@ int ubifs_wbuf_write_nolock(struct ubifs
9030 cancel_wbuf_timer_nolock(wbuf);
9036 if (aligned_len <= wbuf->avail) {
9037 @@ -535,17 +602,21 @@ int ubifs_wbuf_write_nolock(struct ubifs
9038 memcpy(wbuf->buf + wbuf->used, buf, len);
9040 if (aligned_len == wbuf->avail) {
9041 - dbg_io("flush jhead %d wbuf to LEB %d:%d",
9042 - wbuf->jhead, wbuf->lnum, wbuf->offs);
9043 + dbg_io("flush jhead %s wbuf to LEB %d:%d",
9044 + dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
9045 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf,
9046 - wbuf->offs, c->min_io_size,
9047 + wbuf->offs, wbuf->size,
9052 spin_lock(&wbuf->lock);
9053 - wbuf->offs += c->min_io_size;
9054 - wbuf->avail = c->min_io_size;
9055 + wbuf->offs += wbuf->size;
9056 + if (c->leb_size - wbuf->offs >= c->max_write_size)
9057 + wbuf->size = c->max_write_size;
9059 + wbuf->size = c->leb_size - wbuf->offs;
9060 + wbuf->avail = wbuf->size;
9063 spin_unlock(&wbuf->lock);
9064 @@ -559,39 +630,63 @@ int ubifs_wbuf_write_nolock(struct ubifs
9069 - * The node is large enough and does not fit entirely within current
9070 - * minimal I/O unit. We have to fill and flush write-buffer and switch
9071 - * to the next min. I/O unit.
9073 - dbg_io("flush jhead %d wbuf to LEB %d:%d",
9074 - wbuf->jhead, wbuf->lnum, wbuf->offs);
9075 - memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
9076 - err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
9077 - c->min_io_size, wbuf->dtype);
9082 - offs = wbuf->offs + c->min_io_size;
9083 - len -= wbuf->avail;
9084 - aligned_len -= wbuf->avail;
9085 - written = wbuf->avail;
9088 + * The node is large enough and does not fit entirely within
9089 + * current available space. We have to fill and flush
9090 + * write-buffer and switch to the next max. write unit.
9092 + dbg_io("flush jhead %s wbuf to LEB %d:%d",
9093 + dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
9094 + memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
9095 + err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
9096 + wbuf->size, wbuf->dtype);
9100 + wbuf->offs += wbuf->size;
9101 + len -= wbuf->avail;
9102 + aligned_len -= wbuf->avail;
9103 + written += wbuf->avail;
9104 + } else if (wbuf->offs & (c->max_write_size - 1)) {
9106 + * The write-buffer offset is not aligned to
9107 + * @c->max_write_size and @wbuf->size is less than
9108 + * @c->max_write_size. Write @wbuf->size bytes to make sure the
9109 + * following writes are done in optimal @c->max_write_size
9112 + dbg_io("write %d bytes to LEB %d:%d",
9113 + wbuf->size, wbuf->lnum, wbuf->offs);
9114 + err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs,
9115 + wbuf->size, wbuf->dtype);
9119 + wbuf->offs += wbuf->size;
9120 + len -= wbuf->size;
9121 + aligned_len -= wbuf->size;
9122 + written += wbuf->size;
9126 - * The remaining data may take more whole min. I/O units, so write the
9127 - * remains multiple to min. I/O unit size directly to the flash media.
9128 + * The remaining data may take more whole max. write units, so write the
9129 + * remains multiple to max. write unit size directly to the flash media.
9130 * We align node length to 8-byte boundary because we anyway flash wbuf
9131 * if the remaining space is less than 8 bytes.
9133 - n = aligned_len >> c->min_io_shift;
9134 + n = aligned_len >> c->max_write_shift;
9136 - n <<= c->min_io_shift;
9137 - dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs);
9138 - err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n,
9140 + n <<= c->max_write_shift;
9141 + dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum,
9143 + err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written,
9144 + wbuf->offs, n, wbuf->dtype);
9152 @@ -601,14 +696,17 @@ int ubifs_wbuf_write_nolock(struct ubifs
9155 * And now we have what's left and what does not take whole
9156 - * min. I/O unit, so write it to the write-buffer and we are
9157 + * max. write unit, so write it to the write-buffer and we are
9160 memcpy(wbuf->buf, buf + written, len);
9162 - wbuf->offs = offs;
9163 + if (c->leb_size - wbuf->offs >= c->max_write_size)
9164 + wbuf->size = c->max_write_size;
9166 + wbuf->size = c->leb_size - wbuf->offs;
9167 + wbuf->avail = wbuf->size - aligned_len;
9168 wbuf->used = aligned_len;
9169 - wbuf->avail = c->min_io_size - aligned_len;
9171 spin_unlock(&wbuf->lock);
9173 @@ -660,8 +758,10 @@ int ubifs_write_node(struct ubifs_info *
9175 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
9176 ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size);
9177 + ubifs_assert(!c->ro_media && !c->ro_mount);
9178 + ubifs_assert(!c->space_fixup);
9184 ubifs_prepare_node(c, buf, len, 1);
9185 @@ -698,8 +798,8 @@ int ubifs_read_node_wbuf(struct ubifs_wb
9186 int err, rlen, overlap;
9187 struct ubifs_ch *ch = buf;
9189 - dbg_io("LEB %d:%d, %s, length %d, jhead %d", lnum, offs,
9190 - dbg_ntype(type), len, wbuf->jhead);
9191 + dbg_io("LEB %d:%d, %s, length %d, jhead %s", lnum, offs,
9192 + dbg_ntype(type), len, dbg_jhead(wbuf->jhead));
9193 ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
9194 ubifs_assert(!(offs & 7) && offs < c->leb_size);
9195 ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
9196 @@ -812,7 +912,8 @@ int ubifs_read_node(const struct ubifs_i
9200 - ubifs_err("bad node at LEB %d:%d", lnum, offs);
9201 + ubifs_err("bad node at LEB %d:%d, LEB mapping status %d", lnum, offs,
9202 + ubi_is_mapped(c->ubi, lnum));
9203 dbg_dump_node(c, buf);
9206 @@ -830,11 +931,11 @@ int ubifs_wbuf_init(struct ubifs_info *c
9210 - wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL);
9211 + wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL);
9215 - size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t);
9216 + size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t);
9217 wbuf->inodes = kmalloc(size, GFP_KERNEL);
9218 if (!wbuf->inodes) {
9220 @@ -844,7 +945,14 @@ int ubifs_wbuf_init(struct ubifs_info *c
9223 wbuf->lnum = wbuf->offs = -1;
9224 - wbuf->avail = c->min_io_size;
9226 + * If the LEB starts at the max. write size aligned address, then
9227 + * write-buffer size has to be set to @c->max_write_size. Otherwise,
9228 + * set it to something smaller so that it ends at the closest max.
9229 + * write size boundary.
9231 + size = c->max_write_size - (c->leb_start % c->max_write_size);
9232 + wbuf->avail = wbuf->size = size;
9233 wbuf->dtype = UBI_UNKNOWN;
9234 wbuf->sync_callback = NULL;
9235 mutex_init(&wbuf->io_mutex);
9236 diff -uprN linux-2.6.28/fs/ubifs/journal.c ubifs-v2.6.28/fs/ubifs/journal.c
9237 --- linux-2.6.28/fs/ubifs/journal.c 2011-06-15 15:12:27.000000000 -0400
9238 +++ ubifs-v2.6.28/fs/ubifs/journal.c 2011-06-15 14:22:09.000000000 -0400
9239 @@ -114,7 +114,7 @@ static inline void zero_trun_node_unused
9241 static int reserve_space(struct ubifs_info *c, int jhead, int len)
9243 - int err = 0, err1, retries = 0, avail, lnum, offs, free, squeeze;
9244 + int err = 0, err1, retries = 0, avail, lnum, offs, squeeze;
9245 struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
9248 @@ -122,11 +122,12 @@ static int reserve_space(struct ubifs_in
9249 * better to try to allocate space at the ends of eraseblocks. This is
9250 * what the squeeze parameter does.
9252 + ubifs_assert(!c->ro_media && !c->ro_mount);
9253 squeeze = (jhead == BASEHD);
9255 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
9257 - if (c->ro_media) {
9258 + if (c->ro_error) {
9262 @@ -139,16 +140,9 @@ again:
9263 * Write buffer wasn't seek'ed or there is no enough space - look for an
9264 * LEB with some empty space.
9266 - lnum = ubifs_find_free_space(c, len, &free, squeeze);
9268 - /* Found an LEB, add it to the journal head */
9269 - offs = c->leb_size - free;
9270 - err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
9273 - /* A new bud was successfully allocated and added to the log */
9274 + lnum = ubifs_find_free_space(c, len, &offs, squeeze);
9281 @@ -159,7 +153,7 @@ again:
9282 * some. But the write-buffer mutex has to be unlocked because
9285 - dbg_jnl("no free space jhead %d, run GC", jhead);
9286 + dbg_jnl("no free space in jhead %s, run GC", dbg_jhead(jhead));
9287 mutex_unlock(&wbuf->io_mutex);
9289 lnum = ubifs_garbage_collect(c, 0);
9290 @@ -174,7 +168,8 @@ again:
9291 * because we dropped @wbuf->io_mutex, so try once
9294 - dbg_jnl("GC couldn't make a free LEB for jhead %d", jhead);
9295 + dbg_jnl("GC couldn't make a free LEB for jhead %s",
9296 + dbg_jhead(jhead));
9297 if (retries++ < 2) {
9298 dbg_jnl("retry (%d)", retries);
9300 @@ -185,13 +180,13 @@ again:
9303 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
9304 - dbg_jnl("got LEB %d for jhead %d", lnum, jhead);
9305 + dbg_jnl("got LEB %d for jhead %s", lnum, dbg_jhead(jhead));
9306 avail = c->leb_size - wbuf->offs - wbuf->used;
9308 if (wbuf->lnum != -1 && avail >= len) {
9310 * Someone else has switched the journal head and we have
9311 - * enough space now. This happens when more then one process is
9312 + * enough space now. This happens when more than one process is
9313 * trying to write to the same journal head at the same time.
9315 dbg_jnl("return LEB %d back, already have LEB %d:%d",
9316 @@ -202,12 +197,23 @@ again:
9320 - err = ubifs_add_bud_to_log(c, jhead, lnum, 0);
9327 + * Make sure we synchronize the write-buffer before we add the new bud
9328 + * to the log. Otherwise we may have a power cut after the log
9329 + * reference node for the last bud (@lnum) is written but before the
9330 + * write-buffer data are written to the next-to-last bud
9331 + * (@wbuf->lnum). And the effect would be that the recovery would see
9332 + * that there is corruption in the next-to-last bud.
9334 + err = ubifs_wbuf_sync_nolock(wbuf);
9337 + err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
9340 err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype);
9343 @@ -256,7 +262,8 @@ static int write_node(struct ubifs_info
9344 *lnum = c->jheads[jhead].wbuf.lnum;
9345 *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
9347 - dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len);
9348 + dbg_jnl("jhead %s, LEB %d:%d, len %d",
9349 + dbg_jhead(jhead), *lnum, *offs, len);
9350 ubifs_prepare_node(c, node, len, 0);
9352 return ubifs_wbuf_write_nolock(wbuf, node, len);
9353 @@ -286,7 +293,8 @@ static int write_head(struct ubifs_info
9355 *lnum = c->jheads[jhead].wbuf.lnum;
9356 *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
9357 - dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len);
9358 + dbg_jnl("jhead %s, LEB %d:%d, len %d",
9359 + dbg_jhead(jhead), *lnum, *offs, len);
9361 err = ubifs_wbuf_write_nolock(wbuf, buf, len);
9363 @@ -377,10 +385,8 @@ out:
9364 if (err == -ENOSPC) {
9365 /* This are some budgeting problems, print useful information */
9366 down_write(&c->commit_sem);
9367 - spin_lock(&c->space_lock);
9370 - spin_unlock(&c->space_lock);
9371 + dbg_dump_budg(c, &c->bi);
9373 cmt_retries = dbg_check_lprops(c);
9374 up_write(&c->commit_sem);
9375 @@ -469,10 +475,7 @@ static void pack_inode(struct ubifs_info
9376 ino->flags = cpu_to_le32(ui->flags);
9377 ino->size = cpu_to_le64(ui->ui_size);
9378 ino->nlink = cpu_to_le32(inode->i_nlink);
9379 - if (ui->compr_type == UBIFS_COMPR_LZO999)
9380 - ino->compr_type = cpu_to_le16(UBIFS_COMPR_LZO);
9382 - ino->compr_type = cpu_to_le16(ui->compr_type);
9383 + ino->compr_type = cpu_to_le16(ui->compr_type);
9384 ino->data_len = cpu_to_le32(ui->data_len);
9385 ino->xattr_cnt = cpu_to_le32(ui->xattr_cnt);
9386 ino->xattr_size = cpu_to_le32(ui->xattr_size);
9387 @@ -666,6 +669,7 @@ out_free:
9390 release_head(c, BASEHD);
9393 ubifs_ro_mode(c, err);
9395 @@ -690,7 +694,7 @@ int ubifs_jnl_write_data(struct ubifs_in
9397 struct ubifs_data_node *data;
9398 int err, lnum, offs, compr_type, out_len;
9399 - int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR;
9400 + int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1;
9401 struct ubifs_inode *ui = ubifs_inode(inode);
9403 dbg_jnl("ino %lu, blk %u, len %d, key %s",
9404 @@ -698,9 +702,19 @@ int ubifs_jnl_write_data(struct ubifs_in
9406 ubifs_assert(len <= UBIFS_BLOCK_SIZE);
9408 - data = kmalloc(dlen, GFP_NOFS);
9411 + data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN);
9414 + * Fall-back to the write reserve buffer. Note, we might be
9415 + * currently on the memory reclaim path, when the kernel is
9416 + * trying to free some memory by writing out dirty pages. The
9417 + * write reserve buffer helps us to guarantee that we are
9418 + * always able to write the data.
9421 + mutex_lock(&c->write_reserve_mutex);
9422 + data = c->write_reserve_buf;
9425 data->ch.node_type = UBIFS_DATA_NODE;
9426 key_write(c, key, &data->key);
9427 @@ -736,7 +750,10 @@ int ubifs_jnl_write_data(struct ubifs_in
9430 finish_reservation(c);
9433 + mutex_unlock(&c->write_reserve_mutex);
9439 @@ -745,7 +762,10 @@ out_ro:
9440 ubifs_ro_mode(c, err);
9441 finish_reservation(c);
9445 + mutex_unlock(&c->write_reserve_mutex);
9451 @@ -1369,7 +1389,7 @@ out_ro:
9454 * This function writes the updated version of an extended attribute inode and
9455 - * the host inode tho the journal (to the base head). The host inode is written
9456 + * the host inode to the journal (to the base head). The host inode is written
9457 * after the extended attribute inode in order to guarantee that the extended
9458 * attribute will be flushed when the inode is synchronized by 'fsync()' and
9459 * consequently, the write-buffer is synchronized. This function returns zero
9460 diff -uprN linux-2.6.28/fs/ubifs/Kconfig ubifs-v2.6.28/fs/ubifs/Kconfig
9461 --- linux-2.6.28/fs/ubifs/Kconfig 2008-12-24 18:26:37.000000000 -0500
9462 +++ ubifs-v2.6.28/fs/ubifs/Kconfig 2011-06-15 14:22:09.000000000 -0400
9463 @@ -40,33 +40,21 @@ config UBIFS_FS_ZLIB
9467 - Zlib copresses better then LZO but it is slower. Say 'Y' if unsure.
9468 + Zlib compresses better than LZO but it is slower. Say 'Y' if unsure.
9470 # Debugging-related stuff
9471 config UBIFS_FS_DEBUG
9472 - bool "Enable debugging"
9473 + bool "Enable debugging support"
9476 - select KALLSYMS_ALL
9479 - This option enables UBIFS debugging.
9481 -config UBIFS_FS_DEBUG_MSG_LVL
9482 - int "Default message level (0 = no extra messages, 3 = lots)"
9483 - depends on UBIFS_FS_DEBUG
9486 - This controls the amount of debugging messages produced by UBIFS.
9487 - If reporting bugs, please try to have available a full dump of the
9488 - messages at level 1 while the misbehaviour was occurring. Level 2
9489 - may become necessary if level 1 messages were not enough to find the
9490 - bug. Generally Level 3 should be avoided.
9492 -config UBIFS_FS_DEBUG_CHKS
9493 - bool "Enable extra checks"
9494 - depends on UBIFS_FS_DEBUG
9496 - If extra checks are enabled UBIFS will check the consistency of its
9497 - internal data structures during operation. However, UBIFS performance
9498 - is dramatically slower when this option is selected especially if the
9499 - file system is large.
9500 + This option enables UBIFS debugging support. It makes sure various
9501 + assertions, self-checks, debugging messages and test modes are compiled
9502 + in (this all is compiled out otherwise). Assertions are light-weight
9503 + and this option also enables them. Self-checks, debugging messages and
9504 + test modes are switched off by default. Thus, it is safe and actually
9505 + recommended to have debugging support enabled, and it should not slow
9506 + down UBIFS. You can then further enable / disable individual debugging
9507 + features using UBIFS module parameters and the corresponding sysfs
9509 diff -uprN linux-2.6.28/fs/ubifs/key.h ubifs-v2.6.28/fs/ubifs/key.h
9510 --- linux-2.6.28/fs/ubifs/key.h 2011-06-15 15:12:27.000000000 -0400
9511 +++ ubifs-v2.6.28/fs/ubifs/key.h 2011-06-15 14:22:09.000000000 -0400
9512 @@ -229,23 +229,6 @@ static inline void xent_key_init(const s
9516 - * xent_key_init_hash - initialize extended attribute entry key without
9517 - * re-calculating hash function.
9518 - * @c: UBIFS file-system description object
9519 - * @key: key to initialize
9520 - * @inum: host inode number
9521 - * @hash: extended attribute entry name hash
9523 -static inline void xent_key_init_hash(const struct ubifs_info *c,
9524 - union ubifs_key *key, ino_t inum,
9527 - ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
9528 - key->u32[0] = inum;
9529 - key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS);
9533 * xent_key_init_flash - initialize on-flash extended attribute entry key.
9534 * @c: UBIFS file-system description object
9535 * @k: key to initialize
9536 @@ -295,22 +278,15 @@ static inline void data_key_init(const s
9540 - * data_key_init_flash - initialize on-flash data key.
9541 + * highest_data_key - get the highest possible data key for an inode.
9542 * @c: UBIFS file-system description object
9543 - * @k: key to initialize
9544 + * @key: key to initialize
9545 * @inum: inode number
9546 - * @block: block number
9548 -static inline void data_key_init_flash(const struct ubifs_info *c, void *k,
9549 - ino_t inum, unsigned int block)
9550 +static inline void highest_data_key(const struct ubifs_info *c,
9551 + union ubifs_key *key, ino_t inum)
9553 - union ubifs_key *key = k;
9555 - ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK));
9556 - key->j32[0] = cpu_to_le32(inum);
9557 - key->j32[1] = cpu_to_le32(block |
9558 - (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS));
9559 - memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8);
9560 + data_key_init(c, key, inum, UBIFS_S_KEY_BLOCK_MASK);
9564 @@ -330,6 +306,20 @@ static inline void trun_key_init(const s
9568 + * invalid_key_init - initialize invalid node key.
9569 + * @c: UBIFS file-system description object
9570 + * @key: key to initialize
9572 + * This is a helper function which marks a @key object as invalid.
9574 +static inline void invalid_key_init(const struct ubifs_info *c,
9575 + union ubifs_key *key)
9577 + key->u32[0] = 0xDEADBEAF;
9578 + key->u32[1] = UBIFS_INVALID_KEY;
9582 * key_type - get key type.
9583 * @c: UBIFS file-system description object
9584 * @key: key to get type of
9585 @@ -381,8 +371,8 @@ static inline ino_t key_inum_flash(const
9586 * @c: UBIFS file-system description object
9587 * @key: the key to get hash from
9589 -static inline int key_hash(const struct ubifs_info *c,
9590 - const union ubifs_key *key)
9591 +static inline uint32_t key_hash(const struct ubifs_info *c,
9592 + const union ubifs_key *key)
9594 return key->u32[1] & UBIFS_S_KEY_HASH_MASK;
9596 @@ -392,7 +382,7 @@ static inline int key_hash(const struct
9597 * @c: UBIFS file-system description object
9598 * @k: the key to get hash from
9600 -static inline int key_hash_flash(const struct ubifs_info *c, const void *k)
9601 +static inline uint32_t key_hash_flash(const struct ubifs_info *c, const void *k)
9603 const union ubifs_key *key = k;
9605 @@ -554,4 +544,5 @@ static inline unsigned long long key_max
9610 #endif /* !__UBIFS_KEY_H__ */
9611 diff -uprN linux-2.6.28/fs/ubifs/log.c ubifs-v2.6.28/fs/ubifs/log.c
9612 --- linux-2.6.28/fs/ubifs/log.c 2008-12-24 18:26:37.000000000 -0500
9613 +++ ubifs-v2.6.28/fs/ubifs/log.c 2011-06-15 14:22:09.000000000 -0400
9614 @@ -100,20 +100,6 @@ struct ubifs_wbuf *ubifs_get_wbuf(struct
9618 - * next_log_lnum - switch to the next log LEB.
9619 - * @c: UBIFS file-system description object
9620 - * @lnum: current log LEB
9622 -static inline int next_log_lnum(const struct ubifs_info *c, int lnum)
9625 - if (lnum > c->log_last)
9626 - lnum = UBIFS_LOG_LNUM;
9632 * empty_log_bytes - calculate amount of empty space in the log.
9633 * @c: UBIFS file-system description object
9635 @@ -159,7 +145,7 @@ void ubifs_add_bud(struct ubifs_info *c,
9636 jhead = &c->jheads[bud->jhead];
9637 list_add_tail(&bud->list, &jhead->buds_list);
9639 - ubifs_assert(c->replaying && (c->vfs_sb->s_flags & MS_RDONLY));
9640 + ubifs_assert(c->replaying && c->ro_mount);
9643 * Note, although this is a new bud, we anyway account this space now,
9644 @@ -169,28 +155,8 @@ void ubifs_add_bud(struct ubifs_info *c,
9646 c->bud_bytes += c->leb_size - bud->start;
9648 - dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum,
9649 - bud->start, bud->jhead, c->bud_bytes);
9650 - spin_unlock(&c->buds_lock);
9654 - * ubifs_create_buds_lists - create journal head buds lists for remount rw.
9655 - * @c: UBIFS file-system description object
9657 -void ubifs_create_buds_lists(struct ubifs_info *c)
9659 - struct rb_node *p;
9661 - spin_lock(&c->buds_lock);
9662 - p = rb_first(&c->buds);
9664 - struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb);
9665 - struct ubifs_jhead *jhead = &c->jheads[bud->jhead];
9667 - list_add_tail(&bud->list, &jhead->buds_list);
9670 + dbg_log("LEB %d:%d, jhead %s, bud_bytes %lld", bud->lnum,
9671 + bud->start, dbg_jhead(bud->jhead), c->bud_bytes);
9672 spin_unlock(&c->buds_lock);
9675 @@ -223,8 +189,8 @@ int ubifs_add_bud_to_log(struct ubifs_in
9678 mutex_lock(&c->log_mutex);
9680 - if (c->ro_media) {
9681 + ubifs_assert(!c->ro_media && !c->ro_mount);
9682 + if (c->ro_error) {
9686 @@ -239,7 +205,7 @@ int ubifs_add_bud_to_log(struct ubifs_in
9690 - * Make sure the the amount of space in buds will not exceed
9691 + * Make sure the amount of space in buds will not exceed the
9692 * 'c->max_bud_bytes' limit, because we want to guarantee mount time
9695 @@ -277,7 +243,7 @@ int ubifs_add_bud_to_log(struct ubifs_in
9696 ref->jhead = cpu_to_le32(jhead);
9698 if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
9699 - c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
9700 + c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
9704 @@ -355,19 +321,18 @@ static void remove_buds(struct ubifs_inf
9705 * heads (non-closed buds).
9707 c->cmt_bud_bytes += wbuf->offs - bud->start;
9708 - dbg_log("preserve %d:%d, jhead %d, bud bytes %d, "
9709 + dbg_log("preserve %d:%d, jhead %s, bud bytes %d, "
9710 "cmt_bud_bytes %lld", bud->lnum, bud->start,
9711 - bud->jhead, wbuf->offs - bud->start,
9712 + dbg_jhead(bud->jhead), wbuf->offs - bud->start,
9714 bud->start = wbuf->offs;
9716 c->cmt_bud_bytes += c->leb_size - bud->start;
9717 - dbg_log("remove %d:%d, jhead %d, bud bytes %d, "
9718 + dbg_log("remove %d:%d, jhead %s, bud bytes %d, "
9719 "cmt_bud_bytes %lld", bud->lnum, bud->start,
9720 - bud->jhead, c->leb_size - bud->start,
9721 + dbg_jhead(bud->jhead), c->leb_size - bud->start,
9723 rb_erase(p1, &c->buds);
9724 - list_del(&bud->list);
9726 * If the commit does not finish, the recovery will need
9727 * to replay the journal, in which case the old buds
9728 @@ -375,7 +340,7 @@ static void remove_buds(struct ubifs_inf
9729 * commit i.e. do not allow them to be garbage
9732 - list_add(&bud->list, &c->old_buds);
9733 + list_move(&bud->list, &c->old_buds);
9736 spin_unlock(&c->buds_lock);
9737 @@ -430,7 +395,8 @@ int ubifs_log_start_commit(struct ubifs_
9738 if (lnum == -1 || offs == c->leb_size)
9741 - dbg_log("add ref to LEB %d:%d for jhead %d", lnum, offs, i);
9742 + dbg_log("add ref to LEB %d:%d for jhead %s",
9743 + lnum, offs, dbg_jhead(i));
9745 ref->ch.node_type = UBIFS_REF_NODE;
9746 ref->lnum = cpu_to_le32(lnum);
9747 @@ -445,7 +411,7 @@ int ubifs_log_start_commit(struct ubifs_
9749 /* Switch to the next log LEB */
9750 if (c->lhead_offs) {
9751 - c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
9752 + c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
9756 @@ -466,7 +432,7 @@ int ubifs_log_start_commit(struct ubifs_
9758 c->lhead_offs += len;
9759 if (c->lhead_offs == c->leb_size) {
9760 - c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
9761 + c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
9765 @@ -553,7 +519,7 @@ int ubifs_log_post_commit(struct ubifs_i
9767 mutex_lock(&c->log_mutex);
9768 for (lnum = old_ltail_lnum; lnum != c->ltail_lnum;
9769 - lnum = next_log_lnum(c, lnum)) {
9770 + lnum = ubifs_next_log_lnum(c, lnum)) {
9771 dbg_log("unmap log LEB %d", lnum);
9772 err = ubifs_leb_unmap(c, lnum);
9774 @@ -662,7 +628,7 @@ static int add_node(struct ubifs_info *c
9775 err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM);
9778 - *lnum = next_log_lnum(c, *lnum);
9779 + *lnum = ubifs_next_log_lnum(c, *lnum);
9782 memcpy(buf + *offs, node, len);
9783 @@ -696,7 +662,7 @@ int ubifs_consolidate_log(struct ubifs_i
9784 lnum = c->ltail_lnum;
9787 - sleb = ubifs_scan(c, lnum, 0, c->sbuf);
9788 + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0);
9790 err = PTR_ERR(sleb);
9792 @@ -732,7 +698,7 @@ int ubifs_consolidate_log(struct ubifs_i
9793 ubifs_scan_destroy(sleb);
9794 if (lnum == c->lhead_lnum)
9796 - lnum = next_log_lnum(c, lnum);
9797 + lnum = ubifs_next_log_lnum(c, lnum);
9800 int sz = ALIGN(offs, c->min_io_size);
9801 @@ -752,7 +718,7 @@ int ubifs_consolidate_log(struct ubifs_i
9802 /* Unmap remaining LEBs */
9805 - lnum = next_log_lnum(c, lnum);
9806 + lnum = ubifs_next_log_lnum(c, lnum);
9807 err = ubifs_leb_unmap(c, lnum);
9810 diff -uprN linux-2.6.28/fs/ubifs/lprops.c ubifs-v2.6.28/fs/ubifs/lprops.c
9811 --- linux-2.6.28/fs/ubifs/lprops.c 2011-06-15 15:12:27.000000000 -0400
9812 +++ ubifs-v2.6.28/fs/ubifs/lprops.c 2011-06-15 14:22:09.000000000 -0400
9813 @@ -281,7 +281,7 @@ void ubifs_add_to_cat(struct ubifs_info
9815 if (add_to_lpt_heap(c, lprops, cat))
9817 - /* No more room on heap so make it uncategorized */
9818 + /* No more room on heap so make it un-categorized */
9822 @@ -375,8 +375,8 @@ void ubifs_replace_cat(struct ubifs_info
9823 * @lprops: LEB properties
9825 * A LEB may have fallen off of the bottom of a heap, and ended up as
9826 - * uncategorized even though it has enough space for us now. If that is the case
9827 - * this function will put the LEB back onto a heap.
9828 + * un-categorized even though it has enough space for us now. If that is the
9829 + * case this function will put the LEB back onto a heap.
9831 void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops)
9833 @@ -436,10 +436,10 @@ int ubifs_categorize_lprops(const struct
9835 * change_category - change LEB properties category.
9836 * @c: UBIFS file-system description object
9837 - * @lprops: LEB properties to recategorize
9838 + * @lprops: LEB properties to re-categorize
9840 * LEB properties are categorized to enable fast find operations. When the LEB
9841 - * properties change they must be recategorized.
9842 + * properties change they must be re-categorized.
9844 static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops)
9846 @@ -461,21 +461,18 @@ static void change_category(struct ubifs
9850 - * calc_dark - calculate LEB dark space size.
9851 + * ubifs_calc_dark - calculate LEB dark space size.
9852 * @c: the UBIFS file-system description object
9853 * @spc: amount of free and dirty space in the LEB
9855 - * This function calculates amount of dark space in an LEB which has @spc bytes
9856 - * of free and dirty space. Returns the calculations result.
9857 + * This function calculates and returns amount of dark space in an LEB which
9858 + * has @spc bytes of free and dirty space.
9860 - * Dark space is the space which is not always usable - it depends on which
9861 - * nodes are written in which order. E.g., if an LEB has only 512 free bytes,
9862 - * it is dark space, because it cannot fit a large data node. So UBIFS cannot
9863 - * count on this LEB and treat these 512 bytes as usable because it is not true
9864 - * if, for example, only big chunks of uncompressible data will be written to
9866 + * UBIFS is trying to account the space which might not be usable, and this
9867 + * space is called "dark space". For example, if an LEB has only %512 free
9868 + * bytes, it is dark space, because it cannot fit a large data node.
9870 -static int calc_dark(struct ubifs_info *c, int spc)
9871 +int ubifs_calc_dark(const struct ubifs_info *c, int spc)
9873 ubifs_assert(!(spc & 7));
9875 @@ -518,7 +515,7 @@ static int is_lprops_dirty(struct ubifs_
9876 * @free: new free space amount
9877 * @dirty: new dirty space amount
9879 - * @idx_gc_cnt: change to the count of idx_gc list
9880 + * @idx_gc_cnt: change to the count of @idx_gc list
9882 * This function changes LEB properties (@free, @dirty or @flag). However, the
9883 * property which has the %LPROPS_NC value is not changed. Returns a pointer to
9884 @@ -535,7 +532,7 @@ const struct ubifs_lprops *ubifs_change_
9887 * This is the only function that is allowed to change lprops, so we
9888 - * discard the const qualifier.
9889 + * discard the "const" qualifier.
9891 struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp;
9893 @@ -575,7 +572,7 @@ const struct ubifs_lprops *ubifs_change_
9894 if (old_spc < c->dead_wm)
9895 c->lst.total_dead -= old_spc;
9897 - c->lst.total_dark -= calc_dark(c, old_spc);
9898 + c->lst.total_dark -= ubifs_calc_dark(c, old_spc);
9900 c->lst.total_used -= c->leb_size - old_spc;
9902 @@ -616,7 +613,7 @@ const struct ubifs_lprops *ubifs_change_
9903 if (new_spc < c->dead_wm)
9904 c->lst.total_dead += new_spc;
9906 - c->lst.total_dark += calc_dark(c, new_spc);
9907 + c->lst.total_dark += ubifs_calc_dark(c, new_spc);
9909 c->lst.total_used += c->leb_size - new_spc;
9911 @@ -1010,21 +1007,11 @@ out:
9915 - * struct scan_check_data - data provided to scan callback function.
9916 - * @lst: LEB properties statistics
9917 - * @err: error code
9919 -struct scan_check_data {
9920 - struct ubifs_lp_stats lst;
9925 * scan_check_cb - scan callback.
9926 * @c: the UBIFS file-system description object
9927 * @lp: LEB properties to scan
9928 * @in_tree: whether the LEB properties are in main memory
9929 - * @data: information passed to and from the caller of the scan
9930 + * @lst: lprops statistics to update
9932 * This function returns a code that indicates whether the scan should continue
9933 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
9934 @@ -1033,12 +1020,12 @@ struct scan_check_data {
9936 static int scan_check_cb(struct ubifs_info *c,
9937 const struct ubifs_lprops *lp, int in_tree,
9938 - struct scan_check_data *data)
9939 + struct ubifs_lp_stats *lst)
9941 struct ubifs_scan_leb *sleb;
9942 struct ubifs_scan_node *snod;
9943 - struct ubifs_lp_stats *lst = &data->lst;
9944 - int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty;
9945 + int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret;
9948 cat = lp->flags & LPROPS_CAT_MASK;
9949 if (cat != LPROPS_UNCAT) {
9950 @@ -1046,7 +1033,7 @@ static int scan_check_cb(struct ubifs_in
9951 if (cat != (lp->flags & LPROPS_CAT_MASK)) {
9952 ubifs_err("bad LEB category %d expected %d",
9953 (lp->flags & LPROPS_CAT_MASK), cat);
9959 @@ -1080,7 +1067,7 @@ static int scan_check_cb(struct ubifs_in
9962 ubifs_err("bad LPT list (category %d)", cat);
9968 @@ -1092,36 +1079,40 @@ static int scan_check_cb(struct ubifs_in
9969 if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) ||
9970 lp != heap->arr[lp->hpos]) {
9971 ubifs_err("bad LPT heap (category %d)", cat);
9977 - sleb = ubifs_scan(c, lnum, 0, c->dbg->buf);
9978 + buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
9983 + * After an unclean unmount, empty and freeable LEBs
9984 + * may contain garbage - do not scan them.
9986 + if (lp->free == c->leb_size) {
9987 + lst->empty_lebs += 1;
9988 + lst->total_free += c->leb_size;
9989 + lst->total_dark += ubifs_calc_dark(c, c->leb_size);
9990 + return LPT_SCAN_CONTINUE;
9992 + if (lp->free + lp->dirty == c->leb_size &&
9993 + !(lp->flags & LPROPS_INDEX)) {
9994 + lst->total_free += lp->free;
9995 + lst->total_dirty += lp->dirty;
9996 + lst->total_dark += ubifs_calc_dark(c, c->leb_size);
9997 + return LPT_SCAN_CONTINUE;
10000 + sleb = ubifs_scan(c, lnum, 0, buf, 0);
10001 if (IS_ERR(sleb)) {
10003 - * After an unclean unmount, empty and freeable LEBs
10004 - * may contain garbage.
10006 - if (lp->free == c->leb_size) {
10007 - ubifs_err("scan errors were in empty LEB "
10008 - "- continuing checking");
10009 - lst->empty_lebs += 1;
10010 - lst->total_free += c->leb_size;
10011 - lst->total_dark += calc_dark(c, c->leb_size);
10012 - return LPT_SCAN_CONTINUE;
10015 - if (lp->free + lp->dirty == c->leb_size &&
10016 - !(lp->flags & LPROPS_INDEX)) {
10017 - ubifs_err("scan errors were in freeable LEB "
10018 - "- continuing checking");
10019 - lst->total_free += lp->free;
10020 - lst->total_dirty += lp->dirty;
10021 - lst->total_dark += calc_dark(c, c->leb_size);
10022 - return LPT_SCAN_CONTINUE;
10023 + ret = PTR_ERR(sleb);
10024 + if (ret == -EUCLEAN) {
10025 + dbg_dump_lprops(c);
10026 + dbg_dump_budg(c, &c->bi);
10028 - data->err = PTR_ERR(sleb);
10029 - return LPT_SCAN_STOP;
10034 @@ -1235,10 +1226,11 @@ static int scan_check_cb(struct ubifs_in
10035 if (spc < c->dead_wm)
10036 lst->total_dead += spc;
10038 - lst->total_dark += calc_dark(c, spc);
10039 + lst->total_dark += ubifs_calc_dark(c, spc);
10042 ubifs_scan_destroy(sleb);
10044 return LPT_SCAN_CONTINUE;
10047 @@ -1248,9 +1240,10 @@ out_print:
10048 dbg_dump_leb(c, lnum);
10050 ubifs_scan_destroy(sleb);
10053 - data->err = -EINVAL;
10054 - return LPT_SCAN_STOP;
10060 @@ -1267,8 +1260,7 @@ out:
10061 int dbg_check_lprops(struct ubifs_info *c)
10064 - struct scan_check_data data;
10065 - struct ubifs_lp_stats *lst = &data.lst;
10066 + struct ubifs_lp_stats lst;
10068 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
10070 @@ -1283,29 +1275,23 @@ int dbg_check_lprops(struct ubifs_info *
10074 - memset(lst, 0, sizeof(struct ubifs_lp_stats));
10077 + memset(&lst, 0, sizeof(struct ubifs_lp_stats));
10078 err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1,
10079 (ubifs_lpt_scan_callback)scan_check_cb,
10082 if (err && err != -ENOSPC)
10089 - if (lst->empty_lebs != c->lst.empty_lebs ||
10090 - lst->idx_lebs != c->lst.idx_lebs ||
10091 - lst->total_free != c->lst.total_free ||
10092 - lst->total_dirty != c->lst.total_dirty ||
10093 - lst->total_used != c->lst.total_used) {
10094 + if (lst.empty_lebs != c->lst.empty_lebs ||
10095 + lst.idx_lebs != c->lst.idx_lebs ||
10096 + lst.total_free != c->lst.total_free ||
10097 + lst.total_dirty != c->lst.total_dirty ||
10098 + lst.total_used != c->lst.total_used) {
10099 ubifs_err("bad overall accounting");
10100 ubifs_err("calculated: empty_lebs %d, idx_lebs %d, "
10101 "total_free %lld, total_dirty %lld, total_used %lld",
10102 - lst->empty_lebs, lst->idx_lebs, lst->total_free,
10103 - lst->total_dirty, lst->total_used);
10104 + lst.empty_lebs, lst.idx_lebs, lst.total_free,
10105 + lst.total_dirty, lst.total_used);
10106 ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, "
10107 "total_free %lld, total_dirty %lld, total_used %lld",
10108 c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free,
10109 @@ -1314,11 +1300,11 @@ int dbg_check_lprops(struct ubifs_info *
10113 - if (lst->total_dead != c->lst.total_dead ||
10114 - lst->total_dark != c->lst.total_dark) {
10115 + if (lst.total_dead != c->lst.total_dead ||
10116 + lst.total_dark != c->lst.total_dark) {
10117 ubifs_err("bad dead/dark space accounting");
10118 ubifs_err("calculated: total_dead %lld, total_dark %lld",
10119 - lst->total_dead, lst->total_dark);
10120 + lst.total_dead, lst.total_dark);
10121 ubifs_err("read from lprops: total_dead %lld, total_dark %lld",
10122 c->lst.total_dead, c->lst.total_dark);
10124 diff -uprN linux-2.6.28/fs/ubifs/lpt.c ubifs-v2.6.28/fs/ubifs/lpt.c
10125 --- linux-2.6.28/fs/ubifs/lpt.c 2011-06-15 15:12:27.000000000 -0400
10126 +++ ubifs-v2.6.28/fs/ubifs/lpt.c 2011-06-15 14:22:09.000000000 -0400
10127 @@ -1269,10 +1269,9 @@ static int read_pnode(struct ubifs_info
10128 lnum = branch->lnum;
10129 offs = branch->offs;
10130 pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS);
10140 * This pnode was not written which just means that the LEB
10141 @@ -1362,6 +1361,7 @@ static int read_lsave(struct ubifs_info
10143 for (i = 0; i < c->lsave_cnt; i++) {
10144 int lnum = c->lsave[i];
10145 + struct ubifs_lprops *lprops;
10148 * Due to automatic resizing, the values in the lsave table
10149 @@ -1369,7 +1369,11 @@ static int read_lsave(struct ubifs_info
10151 if (lnum >= c->leb_cnt)
10153 - ubifs_lpt_lookup(c, lnum);
10154 + lprops = ubifs_lpt_lookup(c, lnum);
10155 + if (IS_ERR(lprops)) {
10156 + err = PTR_ERR(lprops);
10162 @@ -1456,13 +1460,13 @@ struct ubifs_lprops *ubifs_lpt_lookup(st
10163 shft -= UBIFS_LPT_FANOUT_SHIFT;
10164 nnode = ubifs_get_nnode(c, nnode, iip);
10166 - return ERR_PTR(PTR_ERR(nnode));
10167 + return ERR_CAST(nnode);
10169 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
10170 shft -= UBIFS_LPT_FANOUT_SHIFT;
10171 pnode = ubifs_get_pnode(c, nnode, iip);
10173 - return ERR_PTR(PTR_ERR(pnode));
10174 + return ERR_CAST(pnode);
10175 iip = (i & (UBIFS_LPT_FANOUT - 1));
10176 dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum,
10177 pnode->lprops[iip].free, pnode->lprops[iip].dirty,
10178 @@ -1585,7 +1589,7 @@ struct ubifs_lprops *ubifs_lpt_lookup_di
10180 nnode = dirty_cow_nnode(c, nnode);
10182 - return ERR_PTR(PTR_ERR(nnode));
10183 + return ERR_CAST(nnode);
10184 i = lnum - c->main_first;
10185 shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
10186 for (h = 1; h < c->lpt_hght; h++) {
10187 @@ -1593,19 +1597,19 @@ struct ubifs_lprops *ubifs_lpt_lookup_di
10188 shft -= UBIFS_LPT_FANOUT_SHIFT;
10189 nnode = ubifs_get_nnode(c, nnode, iip);
10191 - return ERR_PTR(PTR_ERR(nnode));
10192 + return ERR_CAST(nnode);
10193 nnode = dirty_cow_nnode(c, nnode);
10195 - return ERR_PTR(PTR_ERR(nnode));
10196 + return ERR_CAST(nnode);
10198 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
10199 shft -= UBIFS_LPT_FANOUT_SHIFT;
10200 pnode = ubifs_get_pnode(c, nnode, iip);
10202 - return ERR_PTR(PTR_ERR(pnode));
10203 + return ERR_CAST(pnode);
10204 pnode = dirty_cow_pnode(c, pnode);
10206 - return ERR_PTR(PTR_ERR(pnode));
10207 + return ERR_CAST(pnode);
10208 iip = (i & (UBIFS_LPT_FANOUT - 1));
10209 dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum,
10210 pnode->lprops[iip].free, pnode->lprops[iip].dirty,
10211 diff -uprN linux-2.6.28/fs/ubifs/lpt_commit.c ubifs-v2.6.28/fs/ubifs/lpt_commit.c
10212 --- linux-2.6.28/fs/ubifs/lpt_commit.c 2011-06-15 15:12:27.000000000 -0400
10213 +++ ubifs-v2.6.28/fs/ubifs/lpt_commit.c 2011-06-15 14:22:09.000000000 -0400
10215 #include <linux/crc16.h>
10218 +#ifdef CONFIG_UBIFS_FS_DEBUG
10219 +static int dbg_populate_lsave(struct ubifs_info *c);
10221 +#define dbg_populate_lsave(c) 0
10225 * first_dirty_cnode - find first dirty cnode.
10226 * @c: UBIFS file-system description object
10227 @@ -585,7 +591,7 @@ static struct ubifs_pnode *next_pnode_to
10228 if (nnode->nbranch[iip].lnum)
10231 - } while (iip >= UBIFS_LPT_FANOUT);
10232 + } while (iip >= UBIFS_LPT_FANOUT);
10235 nnode = ubifs_get_nnode(c, nnode, iip);
10236 @@ -645,7 +651,7 @@ static struct ubifs_pnode *pnode_lookup(
10237 shft -= UBIFS_LPT_FANOUT_SHIFT;
10238 nnode = ubifs_get_nnode(c, nnode, iip);
10240 - return ERR_PTR(PTR_ERR(nnode));
10241 + return ERR_CAST(nnode);
10243 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
10244 return ubifs_get_pnode(c, nnode, iip);
10245 @@ -704,6 +710,9 @@ static int make_tree_dirty(struct ubifs_
10246 struct ubifs_pnode *pnode;
10248 pnode = pnode_lookup(c, 0);
10249 + if (IS_ERR(pnode))
10250 + return PTR_ERR(pnode);
10253 do_make_pnode_dirty(c, pnode);
10254 pnode = next_pnode_to_dirty(c, pnode);
10255 @@ -811,6 +820,10 @@ static void populate_lsave(struct ubifs_
10256 c->lpt_drty_flgs |= LSAVE_DIRTY;
10257 ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz);
10260 + if (dbg_populate_lsave(c))
10263 list_for_each_entry(lprops, &c->empty_list, list) {
10264 c->lsave[cnt++] = lprops->lnum;
10265 if (cnt >= c->lsave_cnt)
10266 @@ -1624,29 +1637,35 @@ static int dbg_check_ltab_lnum(struct ub
10268 int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len;
10270 - void *buf = c->dbg->buf;
10273 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
10276 + buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
10278 + ubifs_err("cannot allocate memory for ltab checking");
10282 dbg_lp("LEB %d", lnum);
10283 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
10285 dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err);
10290 - if (!is_a_node(c, buf, len)) {
10291 + if (!is_a_node(c, p, len)) {
10294 - pad_len = get_pad_len(c, buf, len);
10295 + pad_len = get_pad_len(c, p, len);
10303 - if (!dbg_is_all_ff(buf, len)) {
10304 + if (!dbg_is_all_ff(p, len)) {
10305 dbg_msg("invalid empty space in LEB %d at %d",
10306 lnum, c->leb_size - len);
10308 @@ -1664,16 +1683,21 @@ static int dbg_check_ltab_lnum(struct ub
10309 lnum, dirty, c->ltab[i].dirty);
10315 - node_type = get_lpt_node_type(c, buf, &node_num);
10316 + node_type = get_lpt_node_type(c, p, &node_num);
10317 node_len = get_lpt_node_len(c, node_type);
10318 ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len);
10333 @@ -1866,25 +1890,31 @@ int dbg_chk_lpt_sz(struct ubifs_info *c,
10334 static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
10336 int err, len = c->leb_size, node_type, node_num, node_len, offs;
10337 - void *buf = c->dbg->buf;
10340 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
10341 current->pid, lnum);
10342 + buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
10344 + ubifs_err("cannot allocate memory to dump LPT");
10348 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
10350 ubifs_err("cannot read LEB %d, error %d", lnum, err);
10355 offs = c->leb_size - len;
10356 - if (!is_a_node(c, buf, len)) {
10357 + if (!is_a_node(c, p, len)) {
10360 - pad_len = get_pad_len(c, buf, len);
10361 + pad_len = get_pad_len(c, p, len);
10363 printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n",
10364 lnum, offs, pad_len);
10370 @@ -1894,7 +1924,7 @@ static void dump_lpt_leb(const struct ub
10374 - node_type = get_lpt_node_type(c, buf, &node_num);
10375 + node_type = get_lpt_node_type(c, p, &node_num);
10376 switch (node_type) {
10377 case UBIFS_LPT_PNODE:
10379 @@ -1919,14 +1949,14 @@ static void dump_lpt_leb(const struct ub
10381 printk(KERN_DEBUG "LEB %d:%d, nnode, ",
10383 - err = ubifs_unpack_nnode(c, buf, &nnode);
10384 + err = ubifs_unpack_nnode(c, p, &nnode);
10385 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
10386 - printk("%d:%d", nnode.nbranch[i].lnum,
10387 + printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum,
10388 nnode.nbranch[i].offs);
10389 if (i != UBIFS_LPT_FANOUT - 1)
10391 + printk(KERN_CONT ", ");
10394 + printk(KERN_CONT "\n");
10397 case UBIFS_LPT_LTAB:
10398 @@ -1940,15 +1970,18 @@ static void dump_lpt_leb(const struct ub
10401 ubifs_err("LPT node type %d not recognized", node_type);
10411 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n",
10412 current->pid, lnum);
10419 @@ -1970,4 +2003,47 @@ void dbg_dump_lpt_lebs(const struct ubif
10424 + * dbg_populate_lsave - debugging version of 'populate_lsave()'
10425 + * @c: UBIFS file-system description object
10427 + * This is a debugging version for 'populate_lsave()' which populates lsave
10428 + * with random LEBs instead of useful LEBs, which is good for test coverage.
10429 + * Returns zero if lsave has not been populated (this debugging feature is
10430 + * disabled) an non-zero if lsave has been populated.
10432 +static int dbg_populate_lsave(struct ubifs_info *c)
10434 + struct ubifs_lprops *lprops;
10435 + struct ubifs_lpt_heap *heap;
10438 + if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
10440 + if (random32() & 3)
10443 + for (i = 0; i < c->lsave_cnt; i++)
10444 + c->lsave[i] = c->main_first;
10446 + list_for_each_entry(lprops, &c->empty_list, list)
10447 + c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
10448 + list_for_each_entry(lprops, &c->freeable_list, list)
10449 + c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
10450 + list_for_each_entry(lprops, &c->frdi_idx_list, list)
10451 + c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
10453 + heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
10454 + for (i = 0; i < heap->cnt; i++)
10455 + c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
10456 + heap = &c->lpt_heap[LPROPS_DIRTY - 1];
10457 + for (i = 0; i < heap->cnt; i++)
10458 + c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
10459 + heap = &c->lpt_heap[LPROPS_FREE - 1];
10460 + for (i = 0; i < heap->cnt; i++)
10461 + c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
10466 #endif /* CONFIG_UBIFS_FS_DEBUG */
10467 diff -uprN linux-2.6.28/fs/ubifs/master.c ubifs-v2.6.28/fs/ubifs/master.c
10468 --- linux-2.6.28/fs/ubifs/master.c 2011-06-15 15:12:27.000000000 -0400
10469 +++ ubifs-v2.6.28/fs/ubifs/master.c 2011-06-15 14:22:09.000000000 -0400
10471 * @c: UBIFS file-system description object
10473 * This function scans the master node LEBs and search for the latest master
10474 - * node. Returns zero in case of success and a negative error code in case of
10475 + * node. Returns zero in case of success, %-EUCLEAN if there master area is
10476 + * corrupted and requires recovery, and a negative error code in case of
10479 static int scan_for_master(struct ubifs_info *c)
10480 @@ -40,7 +41,7 @@ static int scan_for_master(struct ubifs_
10482 lnum = UBIFS_MST_LNUM;
10484 - sleb = ubifs_scan(c, lnum, 0, c->sbuf);
10485 + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
10487 return PTR_ERR(sleb);
10488 nodes_cnt = sleb->nodes_cnt;
10489 @@ -48,7 +49,7 @@ static int scan_for_master(struct ubifs_
10490 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
10492 if (snod->type != UBIFS_MST_NODE)
10495 memcpy(c->mst_node, snod->node, snod->len);
10498 @@ -56,7 +57,7 @@ static int scan_for_master(struct ubifs_
10502 - sleb = ubifs_scan(c, lnum, 0, c->sbuf);
10503 + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
10505 return PTR_ERR(sleb);
10506 if (sleb->nodes_cnt != nodes_cnt)
10507 @@ -65,7 +66,7 @@ static int scan_for_master(struct ubifs_
10509 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list);
10510 if (snod->type != UBIFS_MST_NODE)
10513 if (snod->offs != offs)
10515 if (memcmp((void *)c->mst_node + UBIFS_CH_SZ,
10516 @@ -78,6 +79,12 @@ static int scan_for_master(struct ubifs_
10519 ubifs_scan_destroy(sleb);
10523 + ubifs_err("unexpected node type %d master LEB %d:%d",
10524 + snod->type, lnum, snod->offs);
10525 + ubifs_scan_destroy(sleb);
10529 @@ -141,7 +148,7 @@ static int validate_master(const struct
10532 main_sz = (long long)c->main_lebs * c->leb_size;
10533 - if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) {
10534 + if (c->bi.old_idx_sz & 7 || c->bi.old_idx_sz >= main_sz) {
10538 @@ -211,7 +218,7 @@ static int validate_master(const struct
10541 if (c->lst.total_dead + c->lst.total_dark +
10542 - c->lst.total_used + c->old_idx_sz > main_sz) {
10543 + c->lst.total_used + c->bi.old_idx_sz > main_sz) {
10547 @@ -256,7 +263,8 @@ int ubifs_read_master(struct ubifs_info
10549 err = scan_for_master(c);
10551 - err = ubifs_recover_master_node(c);
10552 + if (err == -EUCLEAN)
10553 + err = ubifs_recover_master_node(c);
10556 * Note, we do not free 'c->mst_node' here because the
10557 @@ -278,7 +286,7 @@ int ubifs_read_master(struct ubifs_info
10558 c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum);
10559 c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum);
10560 c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs);
10561 - c->old_idx_sz = le64_to_cpu(c->mst_node->index_size);
10562 + c->bi.old_idx_sz = le64_to_cpu(c->mst_node->index_size);
10563 c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum);
10564 c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs);
10565 c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum);
10566 @@ -297,7 +305,7 @@ int ubifs_read_master(struct ubifs_info
10567 c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead);
10568 c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark);
10570 - c->calc_idx_sz = c->old_idx_sz;
10571 + c->calc_idx_sz = c->bi.old_idx_sz;
10573 if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS))
10575 @@ -353,7 +361,8 @@ int ubifs_write_master(struct ubifs_info
10577 int err, lnum, offs, len;
10580 + ubifs_assert(!c->ro_media && !c->ro_mount);
10584 lnum = UBIFS_MST_LNUM;
10585 diff -uprN linux-2.6.28/fs/ubifs/misc.h ubifs-v2.6.28/fs/ubifs/misc.h
10586 --- linux-2.6.28/fs/ubifs/misc.h 2008-12-24 18:26:37.000000000 -0500
10587 +++ ubifs-v2.6.28/fs/ubifs/misc.h 2011-06-15 14:22:09.000000000 -0400
10588 @@ -132,7 +132,8 @@ static inline int ubifs_leb_unmap(const
10593 + ubifs_assert(!c->ro_media && !c->ro_mount);
10596 err = ubi_leb_unmap(c->ubi, lnum);
10598 @@ -159,7 +160,8 @@ static inline int ubifs_leb_write(const
10603 + ubifs_assert(!c->ro_media && !c->ro_mount);
10606 err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype);
10608 @@ -186,7 +188,8 @@ static inline int ubifs_leb_change(const
10613 + ubifs_assert(!c->ro_media && !c->ro_mount);
10616 err = ubi_leb_change(c->ubi, lnum, buf, len, dtype);
10618 @@ -337,4 +340,21 @@ static inline void ubifs_release_lprops(
10619 mutex_unlock(&c->lp_mutex);
10623 + * ubifs_next_log_lnum - switch to the next log LEB.
10624 + * @c: UBIFS file-system description object
10625 + * @lnum: current log LEB
10627 + * This helper function returns the log LEB number which goes next after LEB
10630 +static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum)
10633 + if (lnum > c->log_last)
10634 + lnum = UBIFS_LOG_LNUM;
10639 #endif /* __UBIFS_MISC_H__ */
10640 diff -uprN linux-2.6.28/fs/ubifs/orphan.c ubifs-v2.6.28/fs/ubifs/orphan.c
10641 --- linux-2.6.28/fs/ubifs/orphan.c 2011-06-15 15:12:27.000000000 -0400
10642 +++ ubifs-v2.6.28/fs/ubifs/orphan.c 2011-06-15 14:22:09.000000000 -0400
10643 @@ -670,9 +670,11 @@ static int kill_orphans(struct ubifs_inf
10644 struct ubifs_scan_leb *sleb;
10646 dbg_rcvry("LEB %d", lnum);
10647 - sleb = ubifs_scan(c, lnum, 0, c->sbuf);
10648 + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
10649 if (IS_ERR(sleb)) {
10650 - sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0);
10651 + if (PTR_ERR(sleb) == -EUCLEAN)
10652 + sleb = ubifs_recover_leb(c, lnum, 0,
10654 if (IS_ERR(sleb)) {
10655 err = PTR_ERR(sleb);
10657 @@ -891,15 +893,22 @@ static int dbg_read_orphans(struct check
10658 static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
10663 /* Check no-orphans flag and skip this if no orphans */
10667 + buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
10669 + ubifs_err("cannot allocate memory to check orphans");
10673 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
10674 struct ubifs_scan_leb *sleb;
10676 - sleb = ubifs_scan(c, lnum, 0, c->dbg->buf);
10677 + sleb = ubifs_scan(c, lnum, 0, buf, 0);
10678 if (IS_ERR(sleb)) {
10679 err = PTR_ERR(sleb);
10681 @@ -911,6 +920,7 @@ static int dbg_scan_orphans(struct ubifs
10689 diff -uprN linux-2.6.28/fs/ubifs/recovery.c ubifs-v2.6.28/fs/ubifs/recovery.c
10690 --- linux-2.6.28/fs/ubifs/recovery.c 2011-06-15 15:12:27.000000000 -0400
10691 +++ ubifs-v2.6.28/fs/ubifs/recovery.c 2011-06-15 14:22:09.000000000 -0400
10692 @@ -24,10 +24,27 @@
10693 * This file implements functions needed to recover from unclean un-mounts.
10694 * When UBIFS is mounted, it checks a flag on the master node to determine if
10695 * an un-mount was completed sucessfully. If not, the process of mounting
10696 - * incorparates additional checking and fixing of on-flash data structures.
10697 + * incorporates additional checking and fixing of on-flash data structures.
10698 * UBIFS always cleans away all remnants of an unclean un-mount, so that
10699 * errors do not accumulate. However UBIFS defers recovery if it is mounted
10700 * read-only, and the flash is not modified in that case.
10702 + * The general UBIFS approach to the recovery is that it recovers from
10703 + * corruptions which could be caused by power cuts, but it refuses to recover
10704 + * from corruption caused by other reasons. And UBIFS tries to distinguish
10705 + * between these 2 reasons of corruptions and silently recover in the former
10706 + * case and loudly complain in the latter case.
10708 + * UBIFS writes only to erased LEBs, so it writes only to the flash space
10709 + * containing only 0xFFs. UBIFS also always writes strictly from the beginning
10710 + * of the LEB to the end. And UBIFS assumes that the underlying flash media
10711 + * writes in @c->max_write_size bytes at a time.
10713 + * Hence, if UBIFS finds a corrupted node at offset X, it expects only the min.
10714 + * I/O unit corresponding to offset X to contain corrupted data, all the
10715 + * following min. I/O units have to contain empty space (all 0xFFs). If this is
10716 + * not true, the corruption cannot be the result of a power cut, and UBIFS
10717 + * refuses to mount.
10720 #include <linux/crc32.h>
10721 @@ -53,6 +70,25 @@ static int is_empty(void *buf, int len)
10725 + * first_non_ff - find offset of the first non-0xff byte.
10726 + * @buf: buffer to search in
10727 + * @len: length of buffer
10729 + * This function returns offset of the first non-0xff byte in @buf or %-1 if
10730 + * the buffer contains only 0xff bytes.
10732 +static int first_non_ff(void *buf, int len)
10734 + uint8_t *p = buf;
10737 + for (i = 0; i < len; i++)
10738 + if (*p++ != 0xff)
10744 * get_master_node - get the last valid master node allowing for corruption.
10745 * @c: UBIFS file-system description object
10746 * @lnum: LEB number
10747 @@ -267,12 +303,12 @@ int ubifs_recover_master_node(struct ubi
10751 - dbg_rcvry("recovered master node from LEB %d",
10752 + ubifs_msg("recovered master node from LEB %d",
10753 (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1));
10755 memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ);
10757 - if ((c->vfs_sb->s_flags & MS_RDONLY)) {
10758 + if (c->ro_mount) {
10759 /* Read-only mode. Keep a copy for switching to rw mode */
10760 c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL);
10761 if (!c->rcvrd_mst_node) {
10762 @@ -280,6 +316,32 @@ int ubifs_recover_master_node(struct ubi
10765 memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ);
10768 + * We had to recover the master node, which means there was an
10769 + * unclean reboot. However, it is possible that the master node
10770 + * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set.
10771 + * E.g., consider the following chain of events:
10773 + * 1. UBIFS was cleanly unmounted, so the master node is clean
10774 + * 2. UBIFS is being mounted R/W and starts changing the master
10775 + * node in the first (%UBIFS_MST_LNUM). A power cut happens,
10776 + * so this LEB ends up with some amount of garbage at the
10778 + * 3. UBIFS is being mounted R/O. We reach this place and
10779 + * recover the master node from the second LEB
10780 + * (%UBIFS_MST_LNUM + 1). But we cannot update the media
10781 + * because we are being mounted R/O. We have to defer the
10783 + * 4. However, this master node (@c->mst_node) is marked as
10784 + * clean (since the step 1). And if we just return, the
10785 + * mount code will be confused and won't recover the master
10786 + * node when it is re-mounter R/W later.
10788 + * Thus, to force the recovery by marking the master node as
10791 + c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
10793 /* Write the recovered master node */
10794 c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1;
10795 @@ -342,44 +404,23 @@ int ubifs_write_rcvrd_mst_node(struct ub
10796 * @offs: offset to check
10798 * This function returns %1 if @offs was in the last write to the LEB whose data
10799 - * is in @buf, otherwise %0 is returned. The determination is made by checking
10800 - * for subsequent empty space starting from the next min_io_size boundary (or a
10801 - * bit less than the common header size if min_io_size is one).
10802 + * is in @buf, otherwise %0 is returned. The determination is made by checking
10803 + * for subsequent empty space starting from the next @c->max_write_size
10806 static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
10810 + int empty_offs, check_len;
10813 - if (c->min_io_size == 1) {
10814 - check_len = c->leb_size - offs;
10815 - p = buf + check_len;
10816 - for (; check_len > 0; check_len--)
10817 - if (*--p != 0xff)
10820 - * 'check_len' is the size of the corruption which cannot be
10821 - * more than the size of 1 node if it was caused by an unclean
10824 - if (check_len > UBIFS_MAX_NODE_SZ)
10830 - * Round up to the next c->min_io_size boundary i.e. 'offs' is in the
10831 - * last wbuf written. After that should be empty space.
10832 + * Round up to the next @c->max_write_size boundary i.e. @offs is in
10833 + * the last wbuf written. After that should be empty space.
10835 - empty_offs = ALIGN(offs + 1, c->min_io_size);
10836 + empty_offs = ALIGN(offs + 1, c->max_write_size);
10837 check_len = c->leb_size - empty_offs;
10838 p = buf + empty_offs - offs;
10840 - for (; check_len > 0; check_len--)
10841 - if (*p++ != 0xff)
10844 + return is_empty(p, check_len);
10848 @@ -392,7 +433,7 @@ static int is_last_write(const struct ub
10850 * This function pads up to the next min_io_size boundary (if there is one) and
10851 * sets empty space to all 0xff. @buf, @offs and @len are updated to the next
10852 - * min_io_size boundary (if there is one).
10853 + * @c->min_io_size boundary.
10855 static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
10856 int *offs, int *len)
10857 @@ -402,11 +443,6 @@ static void clean_buf(const struct ubifs
10859 dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs);
10861 - if (c->min_io_size == 1) {
10862 - memset(*buf, 0xff, c->leb_size - *offs);
10866 ubifs_assert(!(*offs & 7));
10867 empty_offs = ALIGN(*offs, c->min_io_size);
10868 pad_len = empty_offs - *offs;
10869 @@ -436,7 +472,7 @@ static int no_more_nodes(const struct ub
10870 int skip, dlen = le32_to_cpu(ch->len);
10872 /* Check for empty space after the corrupt node's common header */
10873 - skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs;
10874 + skip = ALIGN(offs + UBIFS_CH_SZ, c->max_write_size) - offs;
10875 if (is_empty(buf + skip, len - skip))
10878 @@ -448,7 +484,7 @@ static int no_more_nodes(const struct ub
10881 /* Now we know the corrupt node's length we can skip over it */
10882 - skip = ALIGN(offs + dlen, c->min_io_size) - offs;
10883 + skip = ALIGN(offs + dlen, c->max_write_size) - offs;
10884 /* After which there should be empty space */
10885 if (is_empty(buf + skip, len - skip))
10887 @@ -476,7 +512,7 @@ static int fix_unclean_leb(struct ubifs_
10888 endpt = snod->offs + snod->len;
10891 - if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) {
10892 + if (c->ro_mount && !c->remounting_rw) {
10893 /* Add to recovery list */
10894 struct ubifs_unclean_leb *ucleb;
10896 @@ -527,16 +563,15 @@ static int fix_unclean_leb(struct ubifs_
10900 - * drop_incomplete_group - drop nodes from an incomplete group.
10901 + * drop_last_group - drop the last group of nodes.
10902 * @sleb: scanned LEB information
10903 * @offs: offset of dropped nodes is returned here
10905 - * This function returns %1 if nodes are dropped and %0 otherwise.
10906 + * This is a helper function for 'ubifs_recover_leb()' which drops the last
10907 + * group of nodes of the scanned LEB.
10909 -static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
10910 +static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs)
10914 while (!list_empty(&sleb->nodes)) {
10915 struct ubifs_scan_node *snod;
10916 struct ubifs_ch *ch;
10917 @@ -545,15 +580,40 @@ static int drop_incomplete_group(struct
10920 if (ch->group_type != UBIFS_IN_NODE_GROUP)
10922 - dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs);
10925 + dbg_rcvry("dropping grouped node at %d:%d",
10926 + sleb->lnum, snod->offs);
10927 + *offs = snod->offs;
10928 + list_del(&snod->list);
10930 + sleb->nodes_cnt -= 1;
10935 + * drop_last_node - drop the last node.
10936 + * @sleb: scanned LEB information
10937 + * @offs: offset of dropped nodes is returned here
10938 + * @grouped: non-zero if whole group of nodes have to be dropped
10940 + * This is a helper function for 'ubifs_recover_leb()' which drops the last
10941 + * node of the scanned LEB.
10943 +static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs)
10945 + struct ubifs_scan_node *snod;
10947 + if (!list_empty(&sleb->nodes)) {
10948 + snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
10951 + dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs);
10952 *offs = snod->offs;
10953 list_del(&snod->list);
10955 sleb->nodes_cnt -= 1;
10962 @@ -562,33 +622,30 @@ static int drop_incomplete_group(struct
10963 * @lnum: LEB number
10965 * @sbuf: LEB-sized buffer to use
10966 - * @grouped: nodes may be grouped for recovery
10967 + * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not
10968 + * belong to any journal head)
10970 * This function does a scan of a LEB, but caters for errors that might have
10971 * been caused by the unclean unmount from which we are attempting to recover.
10973 - * This function returns %0 on success and a negative error code on failure.
10974 + * Returns %0 in case of success, %-EUCLEAN if an unrecoverable corruption is
10975 + * found, and a negative error code in case of failure.
10977 struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
10978 - int offs, void *sbuf, int grouped)
10979 + int offs, void *sbuf, int jhead)
10981 - int err, len = c->leb_size - offs, need_clean = 0, quiet = 1;
10982 - int empty_chkd = 0, start = offs;
10983 + int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
10984 + int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped;
10985 struct ubifs_scan_leb *sleb;
10986 void *buf = sbuf + offs;
10988 - dbg_rcvry("%d:%d", lnum, offs);
10989 + dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped);
10991 sleb = ubifs_start_scan(c, lnum, offs, sbuf);
10998 + ubifs_assert(len >= 8);
11002 dbg_scan("look at LEB %d:%d (%d bytes left)",
11005 @@ -598,8 +655,7 @@ struct ubifs_scan_leb *ubifs_recover_leb
11006 * Scan quietly until there is an error from which we cannot
11009 - ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
11011 + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
11012 if (ret == SCANNED_A_NODE) {
11013 /* A valid node, and not a padding node */
11014 struct ubifs_ch *ch = buf;
11015 @@ -612,98 +668,126 @@ struct ubifs_scan_leb *ubifs_recover_leb
11023 + } else if (ret > 0) {
11024 /* Padding bytes or a valid padding node */
11031 - if (ret == SCANNED_EMPTY_SPACE) {
11032 - if (!is_empty(buf, len)) {
11033 - if (!is_last_write(c, buf, offs))
11035 - clean_buf(c, &buf, lnum, &offs, &len);
11039 + } else if (ret == SCANNED_EMPTY_SPACE ||
11040 + ret == SCANNED_GARBAGE ||
11041 + ret == SCANNED_A_BAD_PAD_NODE ||
11042 + ret == SCANNED_A_CORRUPT_NODE) {
11043 + dbg_rcvry("found corruption - %d", ret);
11046 + dbg_err("unexpected return value %d", ret);
11052 - if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE)
11053 - if (is_last_write(c, buf, offs)) {
11054 - clean_buf(c, &buf, lnum, &offs, &len);
11060 - if (ret == SCANNED_A_CORRUPT_NODE)
11061 - if (no_more_nodes(c, buf, len, lnum, offs)) {
11062 - clean_buf(c, &buf, lnum, &offs, &len);
11069 - /* Redo the last scan but noisily */
11073 + if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) {
11074 + if (!is_last_write(c, buf, offs))
11075 + goto corrupted_rescan;
11076 + } else if (ret == SCANNED_A_CORRUPT_NODE) {
11077 + if (!no_more_nodes(c, buf, len, lnum, offs))
11078 + goto corrupted_rescan;
11079 + } else if (!is_empty(buf, len)) {
11080 + if (!is_last_write(c, buf, offs)) {
11081 + int corruption = first_non_ff(buf, len);
11084 - case SCANNED_GARBAGE:
11085 - dbg_err("garbage");
11087 - case SCANNED_A_CORRUPT_NODE:
11088 - case SCANNED_A_BAD_PAD_NODE:
11089 - dbg_err("bad node");
11092 - dbg_err("unknown");
11094 + * See header comment for this file for more
11095 + * explanations about the reasons we have this check.
11097 + ubifs_err("corrupt empty space LEB %d:%d, corruption "
11098 + "starts at %d", lnum, offs, corruption);
11099 + /* Make sure we dump interesting non-0xFF data */
11100 + offs += corruption;
11101 + buf += corruption;
11106 - if (!empty_chkd && !is_empty(buf, len)) {
11107 - if (is_last_write(c, buf, offs)) {
11108 - clean_buf(c, &buf, lnum, &offs, &len);
11111 - ubifs_err("corrupt empty space at LEB %d:%d",
11116 + min_io_unit = round_down(offs, c->min_io_size);
11119 + * If nodes are grouped, always drop the incomplete group at
11122 + drop_last_group(sleb, &offs);
11124 - /* Drop nodes from incomplete group */
11125 - if (grouped && drop_incomplete_group(sleb, &offs)) {
11126 - buf = sbuf + offs;
11127 - len = c->leb_size - offs;
11128 - clean_buf(c, &buf, lnum, &offs, &len);
11130 + if (jhead == GCHD) {
11132 + * If this LEB belongs to the GC head then while we are in the
11133 + * middle of the same min. I/O unit keep dropping nodes. So
11134 + * basically, what we want is to make sure that the last min.
11135 + * I/O unit where we saw the corruption is dropped completely
11136 + * with all the uncorrupted nodes which may possibly sit there.
11138 + * In other words, let's name the min. I/O unit where the
11139 + * corruption starts B, and the previous min. I/O unit A. The
11140 + * below code tries to deal with a situation when half of B
11141 + * contains valid nodes or the end of a valid node, and the
11142 + * second half of B contains corrupted data or garbage. This
11143 + * means that UBIFS had been writing to B just before the power
11144 + * cut happened. I do not know how realistic is this scenario
11145 + * that half of the min. I/O unit had been written successfully
11146 + * and the other half not, but this is possible in our 'failure
11147 + * mode emulation' infrastructure at least.
11149 + * So what is the problem, why we need to drop those nodes? Why
11150 + * can't we just clean-up the second half of B by putting a
11151 + * padding node there? We can, and this works fine with one
11152 + * exception which was reproduced with power cut emulation
11153 + * testing and happens extremely rarely.
11155 + * Imagine the file-system is full, we run GC which starts
11156 + * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is
11157 + * the current GC head LEB). The @c->gc_lnum is -1, which means
11158 + * that GC will retain LEB X and will try to continue. Imagine
11159 + * that LEB X is currently the dirtiest LEB, and the amount of
11160 + * used space in LEB Y is exactly the same as amount of free
11161 + * space in LEB X.
11163 + * And a power cut happens when nodes are moved from LEB X to
11164 + * LEB Y. We are here trying to recover LEB Y which is the GC
11165 + * head LEB. We find the min. I/O unit B as described above.
11166 + * Then we clean-up LEB Y by padding min. I/O unit. And later
11167 + * 'ubifs_rcvry_gc_commit()' function fails, because it cannot
11168 + * find a dirty LEB which could be GC'd into LEB Y! Even LEB X
11169 + * does not match because the amount of valid nodes there does
11170 + * not fit the free space in LEB Y any more! And this is
11171 + * because of the padding node which we added to LEB Y. The
11172 + * user-visible effect of this which I once observed and
11173 + * analysed is that we cannot mount the file-system with
11176 + * So obviously, to make sure that situation does not happen we
11177 + * should free min. I/O unit B in LEB Y completely and the last
11178 + * used min. I/O unit in LEB Y should be A. This is basically
11179 + * what the below code tries to do.
11181 + while (offs > min_io_unit)
11182 + drop_last_node(sleb, &offs);
11185 - if (offs % c->min_io_size) {
11186 - clean_buf(c, &buf, lnum, &offs, &len);
11189 + buf = sbuf + offs;
11190 + len = c->leb_size - offs;
11192 + clean_buf(c, &buf, lnum, &offs, &len);
11193 ubifs_end_scan(c, sleb, lnum, offs);
11195 - if (need_clean) {
11196 - err = fix_unclean_leb(c, sleb, start);
11200 + err = fix_unclean_leb(c, sleb, start);
11207 + /* Re-scan the corrupted data with verbose messages */
11208 + dbg_err("corruptio %d", ret);
11209 + ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
11211 ubifs_scanned_corruption(c, lnum, offs, buf);
11213 @@ -773,7 +857,8 @@ out_free:
11214 * @sbuf: LEB-sized buffer to use
11216 * This function does a scan of a LEB, but caters for errors that might have
11217 - * been caused by the unclean unmount from which we are attempting to recover.
11218 + * been caused by unclean reboots from which we are attempting to recover
11219 + * (assume that only the last log LEB can be corrupted by an unclean reboot).
11221 * This function returns %0 on success and a negative error code on failure.
11223 @@ -792,7 +877,7 @@ struct ubifs_scan_leb *ubifs_recover_log
11224 * We can only recover at the end of the log, so check that the
11225 * next log LEB is empty or out of date.
11227 - sleb = ubifs_scan(c, next_lnum, 0, sbuf);
11228 + sleb = ubifs_scan(c, next_lnum, 0, sbuf, 0);
11231 if (sleb->nodes_cnt) {
11232 @@ -819,7 +904,7 @@ struct ubifs_scan_leb *ubifs_recover_log
11234 ubifs_scan_destroy(sleb);
11236 - return ubifs_recover_leb(c, lnum, offs, sbuf, 0);
11237 + return ubifs_recover_leb(c, lnum, offs, sbuf, -1);
11241 @@ -836,12 +921,8 @@ struct ubifs_scan_leb *ubifs_recover_log
11242 static int recover_head(const struct ubifs_info *c, int lnum, int offs,
11245 - int len, err, need_clean = 0;
11246 + int len = c->max_write_size, err;
11248 - if (c->min_io_size > 1)
11249 - len = c->min_io_size;
11252 if (offs + len > c->leb_size)
11253 len = c->leb_size - offs;
11255 @@ -850,19 +931,7 @@ static int recover_head(const struct ubi
11257 /* Read at the head location and check it is empty flash */
11258 err = ubi_read(c->ubi, lnum, sbuf, offs, len);
11262 - uint8_t *p = sbuf;
11265 - if (*p++ != 0xff) {
11271 - if (need_clean) {
11272 + if (err || !is_empty(sbuf, len)) {
11273 dbg_rcvry("cleaning head at %d:%d", lnum, offs);
11275 return ubifs_leb_unmap(c, lnum);
11276 @@ -896,7 +965,7 @@ int ubifs_recover_inl_heads(const struct
11280 - ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw);
11281 + ubifs_assert(!c->ro_mount || c->remounting_rw);
11283 dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs);
11284 err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf);
11285 @@ -1038,6 +1107,53 @@ int ubifs_clean_lebs(const struct ubifs_
11289 + * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit.
11290 + * @c: UBIFS file-system description object
11292 + * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty
11293 + * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns
11294 + * zero in case of success and a negative error code in case of failure.
11296 +static int grab_empty_leb(struct ubifs_info *c)
11301 + * Note, it is very important to first search for an empty LEB and then
11302 + * run the commit, not vice-versa. The reason is that there might be
11303 + * only one empty LEB at the moment, the one which has been the
11304 + * @c->gc_lnum just before the power cut happened. During the regular
11305 + * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no
11306 + * one but GC can grab it. But at this moment this single empty LEB is
11307 + * not marked as taken, so if we run commit - what happens? Right, the
11308 + * commit will grab it and write the index there. Remember that the
11309 + * index always expands as long as there is free space, and it only
11310 + * starts consolidating when we run out of space.
11312 + * IOW, if we run commit now, we might not be able to find a free LEB
11315 + lnum = ubifs_find_free_leb_for_idx(c);
11317 + dbg_err("could not find an empty LEB");
11318 + dbg_dump_lprops(c);
11319 + dbg_dump_budg(c, &c->bi);
11323 + /* Reset the index flag */
11324 + err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
11325 + LPROPS_INDEX, 0);
11329 + c->gc_lnum = lnum;
11330 + dbg_rcvry("found empty LEB %d, run commit", lnum);
11332 + return ubifs_run_commit(c);
11336 * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
11337 * @c: UBIFS file-system description object
11339 @@ -1059,58 +1175,26 @@ int ubifs_rcvry_gc_commit(struct ubifs_i
11341 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
11342 struct ubifs_lprops lp;
11346 + dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs);
11349 - if (wbuf->lnum == -1) {
11350 - dbg_rcvry("no GC head LEB");
11354 - * See whether the used space in the dirtiest LEB fits in the GC head
11357 - if (wbuf->offs == c->leb_size) {
11358 - dbg_rcvry("no room in GC head LEB");
11361 + if (wbuf->lnum == -1 || wbuf->offs == c->leb_size)
11362 + return grab_empty_leb(c);
11364 err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
11366 - if (err == -ENOSPC)
11367 - dbg_err("could not find a dirty LEB");
11370 - ubifs_assert(!(lp.flags & LPROPS_INDEX));
11372 - if (lp.free + lp.dirty == c->leb_size) {
11373 - /* An empty LEB was returned */
11374 - if (lp.free != c->leb_size) {
11375 - err = ubifs_change_one_lp(c, lnum, c->leb_size,
11380 - err = ubifs_leb_unmap(c, lnum);
11382 + if (err != -ENOSPC)
11384 - c->gc_lnum = lnum;
11385 - dbg_rcvry("allocated LEB %d for GC", lnum);
11386 - /* Run the commit */
11387 - dbg_rcvry("committing");
11388 - return ubifs_run_commit(c);
11391 - * There was no empty LEB so the used space in the dirtiest LEB must fit
11392 - * in the GC head LEB.
11394 - if (lp.free + lp.dirty < wbuf->offs) {
11395 - dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d",
11396 - lnum, wbuf->lnum, wbuf->offs);
11397 - err = ubifs_return_leb(c, lnum);
11402 + dbg_rcvry("could not find a dirty LEB");
11403 + return grab_empty_leb(c);
11406 + ubifs_assert(!(lp.flags & LPROPS_INDEX));
11407 + ubifs_assert(lp.free + lp.dirty >= wbuf->offs);
11410 * We run the commit before garbage collection otherwise subsequent
11411 * mounts will see the GC and orphan deletion in a different order.
11412 @@ -1119,11 +1203,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_i
11413 err = ubifs_run_commit(c);
11417 - * The data in the dirtiest LEB fits in the GC head LEB, so do the GC
11418 - * - use locking to keep 'ubifs_assert()' happy.
11420 - dbg_rcvry("GC'ing LEB %d", lnum);
11422 + dbg_rcvry("GC'ing LEB %d", lp.lnum);
11423 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
11424 err = ubifs_garbage_collect_leb(c, &lp);
11426 @@ -1139,37 +1220,17 @@ int ubifs_rcvry_gc_commit(struct ubifs_i
11430 - if (err != LEB_RETAINED) {
11431 - dbg_err("GC returned %d", err);
11433 + ubifs_assert(err == LEB_RETAINED);
11434 + if (err != LEB_RETAINED)
11438 err = ubifs_leb_unmap(c, c->gc_lnum);
11441 - dbg_rcvry("allocated LEB %d for GC", lnum);
11446 - * There is no GC head LEB or the free space in the GC head LEB is too
11447 - * small. Allocate gc_lnum by calling 'ubifs_find_free_leb_for_idx()' so
11450 - lnum = ubifs_find_free_leb_for_idx(c);
11452 - dbg_err("could not find an empty LEB");
11455 - /* And reset the index flag */
11456 - err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
11457 - LPROPS_INDEX, 0);
11460 - c->gc_lnum = lnum;
11461 - dbg_rcvry("allocated LEB %d for GC", lnum);
11462 - /* Run the commit */
11463 - dbg_rcvry("committing");
11464 - return ubifs_run_commit(c);
11465 + dbg_rcvry("allocated LEB %d for GC", lp.lnum);
11470 @@ -1411,7 +1472,7 @@ static int fix_size_in_place(struct ubif
11471 err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
11474 - dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ",
11475 + dbg_rcvry("inode %lu at %d:%d size %lld -> %lld",
11476 (unsigned long)e->inum, lnum, offs, i_size, e->d_size);
11479 @@ -1460,20 +1521,27 @@ int ubifs_recover_size(struct ubifs_info
11480 e->i_size = le64_to_cpu(ino->size);
11484 if (e->exists && e->i_size < e->d_size) {
11485 - if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) {
11486 + if (c->ro_mount) {
11487 /* Fix the inode size and pin it in memory */
11488 struct inode *inode;
11489 + struct ubifs_inode *ui;
11491 + ubifs_assert(!e->inode);
11493 inode = ubifs_iget(c->vfs_sb, e->inum);
11495 return PTR_ERR(inode);
11497 + ui = ubifs_inode(inode);
11498 if (inode->i_size < e->d_size) {
11499 dbg_rcvry("ino %lu size %lld -> %lld",
11500 (unsigned long)e->inum,
11501 - e->d_size, inode->i_size);
11502 + inode->i_size, e->d_size);
11503 inode->i_size = e->d_size;
11504 - ubifs_inode(inode)->ui_size = e->d_size;
11505 + ui->ui_size = e->d_size;
11506 + ui->synced_i_size = e->d_size;
11508 this = rb_next(this);
11510 @@ -1488,9 +1556,11 @@ int ubifs_recover_size(struct ubifs_info
11515 this = rb_next(this);
11516 rb_erase(&e->rb, &c->size_tree);
11522 diff -uprN linux-2.6.28/fs/ubifs/replay.c ubifs-v2.6.28/fs/ubifs/replay.c
11523 --- linux-2.6.28/fs/ubifs/replay.c 2011-06-15 15:12:27.000000000 -0400
11524 +++ ubifs-v2.6.28/fs/ubifs/replay.c 2011-06-15 14:22:09.000000000 -0400
11525 @@ -33,43 +33,32 @@
11533 - * REPLAY_DELETION: node was deleted
11534 - * REPLAY_REF: node is a reference node
11537 - REPLAY_DELETION = 1,
11540 +#include <linux/list_sort.h>
11543 - * struct replay_entry - replay tree entry.
11544 + * struct replay_entry - replay list entry.
11545 * @lnum: logical eraseblock number of the node
11546 * @offs: node offset
11547 * @len: node length
11548 + * @deletion: non-zero if this entry corresponds to a node deletion
11549 * @sqnum: node sequence number
11550 - * @flags: replay flags
11551 - * @rb: links the replay tree
11552 + * @list: links the replay list
11554 * @nm: directory entry name
11555 * @old_size: truncation old size
11556 * @new_size: truncation new size
11557 - * @free: amount of free space in a bud
11558 - * @dirty: amount of dirty space in a bud from padding and deletion nodes
11560 - * UBIFS journal replay must compare node sequence numbers, which means it must
11561 - * build a tree of node information to insert into the TNC.
11562 + * The replay process first scans all buds and builds the replay list, then
11563 + * sorts the replay list in nodes sequence number order, and then inserts all
11564 + * the replay entries to the TNC.
11566 struct replay_entry {
11570 + unsigned int deletion:1;
11571 unsigned long long sqnum;
11573 - struct rb_node rb;
11574 + struct list_head list;
11575 union ubifs_key key;
11578 @@ -77,10 +66,6 @@ struct replay_entry {
11589 @@ -88,83 +73,117 @@ struct replay_entry {
11590 * struct bud_entry - entry in the list of buds to replay.
11591 * @list: next bud in the list
11592 * @bud: bud description object
11593 - * @free: free bytes in the bud
11594 * @sqnum: reference node sequence number
11595 + * @free: free bytes in the bud
11596 + * @dirty: dirty bytes in the bud
11599 struct list_head list;
11600 struct ubifs_bud *bud;
11602 unsigned long long sqnum;
11608 * set_bud_lprops - set free and dirty space used by a bud.
11609 * @c: UBIFS file-system description object
11610 - * @r: replay entry of bud
11611 + * @b: bud entry which describes the bud
11613 + * This function makes sure the LEB properties of bud @b are set correctly
11614 + * after the replay. Returns zero in case of success and a negative error code
11615 + * in case of failure.
11617 -static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
11618 +static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b)
11620 const struct ubifs_lprops *lp;
11621 int err = 0, dirty;
11623 ubifs_get_lprops(c);
11625 - lp = ubifs_lpt_lookup_dirty(c, r->lnum);
11626 + lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum);
11633 - if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
11634 + if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
11636 * The LEB was added to the journal with a starting offset of
11637 * zero which means the LEB must have been empty. The LEB
11638 - * property values should be lp->free == c->leb_size and
11639 - * lp->dirty == 0, but that is not the case. The reason is that
11640 - * the LEB was garbage collected. The garbage collector resets
11641 - * the free and dirty space without recording it anywhere except
11642 - * lprops, so if there is not a commit then lprops does not have
11643 - * that information next time the file system is mounted.
11644 + * property values should be @lp->free == @c->leb_size and
11645 + * @lp->dirty == 0, but that is not the case. The reason is that
11646 + * the LEB had been garbage collected before it became the bud,
11647 + * and there was not commit inbetween. The garbage collector
11648 + * resets the free and dirty space without recording it
11649 + * anywhere except lprops, so if there was no commit then
11650 + * lprops does not have that information.
11652 * We do not need to adjust free space because the scan has told
11653 * us the exact value which is recorded in the replay entry as
11657 * However we do need to subtract from the dirty space the
11658 * amount of space that the garbage collector reclaimed, which
11659 * is the whole LEB minus the amount of space that was free.
11661 - dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
11662 + dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
11663 lp->free, lp->dirty);
11664 - dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
11665 + dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
11666 lp->free, lp->dirty);
11667 dirty -= c->leb_size - lp->free;
11669 * If the replay order was perfect the dirty space would now be
11670 - * zero. The order is not perfect because the the journal heads
11671 + * zero. The order is not perfect because the journal heads
11672 * race with each other. This is not a problem but is does mean
11673 * that the dirty space may temporarily exceed c->leb_size
11674 * during the replay.
11677 dbg_msg("LEB %d lp: %d free %d dirty "
11678 - "replay: %d free %d dirty", r->lnum, lp->free,
11679 - lp->dirty, r->free, r->dirty);
11680 + "replay: %d free %d dirty", b->bud->lnum,
11681 + lp->free, lp->dirty, b->free, b->dirty);
11683 - lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty,
11684 + lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty,
11685 lp->flags | LPROPS_TAKEN, 0);
11691 + /* Make sure the journal head points to the latest bud */
11692 + err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf,
11693 + b->bud->lnum, c->leb_size - b->free,
11697 ubifs_release_lprops(c);
11702 + * set_buds_lprops - set free and dirty space for all replayed buds.
11703 + * @c: UBIFS file-system description object
11705 + * This function sets LEB properties for all replayed buds. Returns zero in
11706 + * case of success and a negative error code in case of failure.
11708 +static int set_buds_lprops(struct ubifs_info *c)
11710 + struct bud_entry *b;
11713 + list_for_each_entry(b, &c->replay_buds, list) {
11714 + err = set_bud_lprops(c, b);
11723 * trun_remove_range - apply a replay entry for a truncation to the TNC.
11724 * @c: UBIFS file-system description object
11725 * @r: replay entry of truncation
11726 @@ -200,24 +219,22 @@ static int trun_remove_range(struct ubif
11728 static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
11730 - int err, deletion = ((r->flags & REPLAY_DELETION) != 0);
11733 - dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum,
11734 - r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key));
11735 + dbg_mnt("LEB %d:%d len %d deletion %d sqnum %llu %s", r->lnum,
11736 + r->offs, r->len, r->deletion, r->sqnum, DBGKEY(&r->key));
11738 /* Set c->replay_sqnum to help deal with dangling branches. */
11739 c->replay_sqnum = r->sqnum;
11741 - if (r->flags & REPLAY_REF)
11742 - err = set_bud_lprops(c, r);
11743 - else if (is_hash_key(c, &r->key)) {
11745 + if (is_hash_key(c, &r->key)) {
11747 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
11749 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
11754 switch (key_type(c, &r->key)) {
11755 case UBIFS_INO_KEY:
11757 @@ -240,7 +257,7 @@ static int apply_replay_entry(struct ubi
11760 if (c->need_recovery)
11761 - err = ubifs_recover_size_accum(c, &r->key, deletion,
11762 + err = ubifs_recover_size_accum(c, &r->key, r->deletion,
11766 @@ -248,68 +265,77 @@ static int apply_replay_entry(struct ubi
11770 - * destroy_replay_tree - destroy the replay.
11771 - * @c: UBIFS file-system description object
11772 + * replay_entries_cmp - compare 2 replay entries.
11773 + * @priv: UBIFS file-system description object
11774 + * @a: first replay entry
11775 + * @a: second replay entry
11777 - * Destroy the replay tree.
11778 + * This is a comparios function for 'list_sort()' which compares 2 replay
11779 + * entries @a and @b by comparing their sequence numer. Returns %1 if @a has
11780 + * greater sequence number and %-1 otherwise.
11782 -static void destroy_replay_tree(struct ubifs_info *c)
11783 +static int replay_entries_cmp(void *priv, struct list_head *a,
11784 + struct list_head *b)
11786 - struct rb_node *this = c->replay_tree.rb_node;
11787 - struct replay_entry *r;
11788 + struct replay_entry *ra, *rb;
11791 - if (this->rb_left) {
11792 - this = this->rb_left;
11794 - } else if (this->rb_right) {
11795 - this = this->rb_right;
11798 - r = rb_entry(this, struct replay_entry, rb);
11799 - this = rb_parent(this);
11801 - if (this->rb_left == &r->rb)
11802 - this->rb_left = NULL;
11804 - this->rb_right = NULL;
11806 - if (is_hash_key(c, &r->key))
11807 - kfree(r->nm.name);
11810 - c->replay_tree = RB_ROOT;
11815 + ra = list_entry(a, struct replay_entry, list);
11816 + rb = list_entry(b, struct replay_entry, list);
11817 + ubifs_assert(ra->sqnum != rb->sqnum);
11818 + if (ra->sqnum > rb->sqnum)
11824 - * apply_replay_tree - apply the replay tree to the TNC.
11825 + * apply_replay_list - apply the replay list to the TNC.
11826 * @c: UBIFS file-system description object
11828 - * Apply the replay tree.
11829 - * Returns zero in case of success and a negative error code in case of
11831 + * Apply all entries in the replay list to the TNC. Returns zero in case of
11832 + * success and a negative error code in case of failure.
11834 -static int apply_replay_tree(struct ubifs_info *c)
11835 +static int apply_replay_list(struct ubifs_info *c)
11837 - struct rb_node *this = rb_first(&c->replay_tree);
11838 + struct replay_entry *r;
11842 - struct replay_entry *r;
11844 + list_sort(c, &c->replay_list, &replay_entries_cmp);
11846 + list_for_each_entry(r, &c->replay_list, list) {
11849 - r = rb_entry(this, struct replay_entry, rb);
11850 err = apply_replay_entry(c, r);
11853 - this = rb_next(this);
11860 - * insert_node - insert a node to the replay tree.
11861 + * destroy_replay_list - destroy the replay.
11862 + * @c: UBIFS file-system description object
11864 + * Destroy the replay list.
11866 +static void destroy_replay_list(struct ubifs_info *c)
11868 + struct replay_entry *r, *tmp;
11870 + list_for_each_entry_safe(r, tmp, &c->replay_list, list) {
11871 + if (is_hash_key(c, &r->key))
11872 + kfree(r->nm.name);
11873 + list_del(&r->list);
11879 + * insert_node - insert a node to the replay list
11880 * @c: UBIFS file-system description object
11881 * @lnum: node logical eraseblock number
11882 * @offs: node offset
11883 @@ -321,39 +347,25 @@ static int apply_replay_tree(struct ubif
11884 * @old_size: truncation old size
11885 * @new_size: truncation new size
11887 - * This function inserts a scanned non-direntry node to the replay tree. The
11888 - * replay tree is an RB-tree containing @struct replay_entry elements which are
11889 - * indexed by the sequence number. The replay tree is applied at the very end
11890 - * of the replay process. Since the tree is sorted in sequence number order,
11891 - * the older modifications are applied first. This function returns zero in
11892 - * case of success and a negative error code in case of failure.
11893 + * This function inserts a scanned non-direntry node to the replay list. The
11894 + * replay list contains @struct replay_entry elements, and we sort this list in
11895 + * sequence number order before applying it. The replay list is applied at the
11896 + * very end of the replay process. Since the list is sorted in sequence number
11897 + * order, the older modifications are applied first. This function returns zero
11898 + * in case of success and a negative error code in case of failure.
11900 static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
11901 union ubifs_key *key, unsigned long long sqnum,
11902 int deletion, int *used, loff_t old_size,
11905 - struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
11906 struct replay_entry *r;
11908 + dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
11910 if (key_inum(c, key) >= c->highest_inum)
11911 c->highest_inum = key_inum(c, key);
11913 - dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
11916 - r = rb_entry(parent, struct replay_entry, rb);
11917 - if (sqnum < r->sqnum) {
11918 - p = &(*p)->rb_left;
11920 - } else if (sqnum > r->sqnum) {
11921 - p = &(*p)->rb_right;
11924 - ubifs_err("duplicate sqnum in replay");
11928 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
11931 @@ -363,19 +375,18 @@ static int insert_node(struct ubifs_info
11935 + r->deletion = !!deletion;
11937 - r->flags = (deletion ? REPLAY_DELETION : 0);
11938 + key_copy(c, key, &r->key);
11939 r->old_size = old_size;
11940 r->new_size = new_size;
11941 - key_copy(c, key, &r->key);
11943 - rb_link_node(&r->rb, parent, p);
11944 - rb_insert_color(&r->rb, &c->replay_tree);
11945 + list_add_tail(&r->list, &c->replay_list);
11950 - * insert_dent - insert a directory entry node into the replay tree.
11951 + * insert_dent - insert a directory entry node into the replay list.
11952 * @c: UBIFS file-system description object
11953 * @lnum: node logical eraseblock number
11954 * @offs: node offset
11955 @@ -387,43 +398,25 @@ static int insert_node(struct ubifs_info
11956 * @deletion: non-zero if this is a deletion
11957 * @used: number of bytes in use in a LEB
11959 - * This function inserts a scanned directory entry node to the replay tree.
11960 - * Returns zero in case of success and a negative error code in case of
11963 - * This function is also used for extended attribute entries because they are
11964 - * implemented as directory entry nodes.
11965 + * This function inserts a scanned directory entry node or an extended
11966 + * attribute entry to the replay list. Returns zero in case of success and a
11967 + * negative error code in case of failure.
11969 static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
11970 union ubifs_key *key, const char *name, int nlen,
11971 unsigned long long sqnum, int deletion, int *used)
11973 - struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
11974 struct replay_entry *r;
11977 + dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
11978 if (key_inum(c, key) >= c->highest_inum)
11979 c->highest_inum = key_inum(c, key);
11981 - dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
11984 - r = rb_entry(parent, struct replay_entry, rb);
11985 - if (sqnum < r->sqnum) {
11986 - p = &(*p)->rb_left;
11989 - if (sqnum > r->sqnum) {
11990 - p = &(*p)->rb_right;
11993 - ubifs_err("duplicate sqnum in replay");
11997 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
12001 nbuf = kmalloc(nlen + 1, GFP_KERNEL);
12004 @@ -435,17 +428,15 @@ static int insert_dent(struct ubifs_info
12008 + r->deletion = !!deletion;
12010 + key_copy(c, key, &r->key);
12012 memcpy(nbuf, name, nlen);
12015 - r->flags = (deletion ? REPLAY_DELETION : 0);
12016 - key_copy(c, key, &r->key);
12018 - ubifs_assert(!*p);
12019 - rb_link_node(&r->rb, parent, p);
12020 - rb_insert_color(&r->rb, &c->replay_tree);
12021 + list_add_tail(&r->list, &c->replay_list);
12025 @@ -482,31 +473,93 @@ int ubifs_validate_entry(struct ubifs_in
12029 + * is_last_bud - check if the bud is the last in the journal head.
12030 + * @c: UBIFS file-system description object
12031 + * @bud: bud description object
12033 + * This function checks if bud @bud is the last bud in its journal head. This
12034 + * information is then used by 'replay_bud()' to decide whether the bud can
12035 + * have corruptions or not. Indeed, only last buds can be corrupted by power
12036 + * cuts. Returns %1 if this is the last bud, and %0 if not.
12038 +static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
12040 + struct ubifs_jhead *jh = &c->jheads[bud->jhead];
12041 + struct ubifs_bud *next;
12045 + if (list_is_last(&bud->list, &jh->buds_list))
12049 + * The following is a quirk to make sure we work correctly with UBIFS
12050 + * images used with older UBIFS.
12052 + * Normally, the last bud will be the last in the journal head's list
12053 + * of bud. However, there is one exception if the UBIFS image belongs
12054 + * to older UBIFS. This is fairly unlikely: one would need to use old
12055 + * UBIFS, then have a power cut exactly at the right point, and then
12056 + * try to mount this image with new UBIFS.
12058 + * The exception is: it is possible to have 2 buds A and B, A goes
12059 + * before B, and B is the last, bud B is contains no data, and bud A is
12060 + * corrupted at the end. The reason is that in older versions when the
12061 + * journal code switched the next bud (from A to B), it first added a
12062 + * log reference node for the new bud (B), and only after this it
12063 + * synchronized the write-buffer of current bud (A). But later this was
12064 + * changed and UBIFS started to always synchronize the write-buffer of
12065 + * the bud (A) before writing the log reference for the new bud (B).
12067 + * But because older UBIFS always synchronized A's write-buffer before
12068 + * writing to B, we can recognize this exceptional situation but
12069 + * checking the contents of bud B - if it is empty, then A can be
12070 + * treated as the last and we can recover it.
12072 + * TODO: remove this piece of code in a couple of years (today it is
12075 + next = list_entry(bud->list.next, struct ubifs_bud, list);
12076 + if (!list_is_last(&next->list, &jh->buds_list))
12079 + err = ubi_read(c->ubi, next->lnum, (char *)&data,
12084 + return data == 0xFFFFFFFF;
12088 * replay_bud - replay a bud logical eraseblock.
12089 * @c: UBIFS file-system description object
12090 - * @lnum: bud logical eraseblock number to replay
12091 - * @offs: bud start offset
12092 - * @jhead: journal head to which this bud belongs
12093 - * @free: amount of free space in the bud is returned here
12094 - * @dirty: amount of dirty space from padding and deletion nodes is returned
12096 + * @b: bud entry which describes the bud
12098 - * This function returns zero in case of success and a negative error code in
12099 - * case of failure.
12100 + * This function replays bud @bud, recovers it if needed, and adds all nodes
12101 + * from this bud to the replay list. Returns zero in case of success and a
12102 + * negative error code in case of failure.
12104 -static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
12105 - int *free, int *dirty)
12106 +static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
12108 - int err = 0, used = 0;
12109 + int is_last = is_last_bud(c, b->bud);
12110 + int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;
12111 struct ubifs_scan_leb *sleb;
12112 struct ubifs_scan_node *snod;
12113 - struct ubifs_bud *bud;
12115 - dbg_mnt("replay bud LEB %d, head %d", lnum, jhead);
12116 - if (c->need_recovery)
12117 - sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD);
12118 + dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d",
12119 + lnum, b->bud->jhead, offs, is_last);
12121 + if (c->need_recovery && is_last)
12123 + * Recover only last LEBs in the journal heads, because power
12124 + * cuts may cause corruptions only in these LEBs, because only
12125 + * these LEBs could possibly be written to at the power cut
12128 + sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead);
12130 - sleb = ubifs_scan(c, lnum, offs, c->sbuf);
12131 + sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
12133 return PTR_ERR(sleb);
12135 @@ -620,20 +673,13 @@ static int replay_bud(struct ubifs_info
12139 - bud = ubifs_search_bud(c, lnum);
12143 + ubifs_assert(ubifs_search_bud(c, lnum));
12144 ubifs_assert(sleb->endpt - offs >= used);
12145 ubifs_assert(sleb->endpt % c->min_io_size == 0);
12147 - if (sleb->endpt + c->min_io_size <= c->leb_size &&
12148 - !(c->vfs_sb->s_flags & MS_RDONLY))
12149 - err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum,
12150 - sleb->endpt, UBI_SHORTTERM);
12152 - *dirty = sleb->endpt - offs - used;
12153 - *free = c->leb_size - sleb->endpt;
12154 + b->dirty = sleb->endpt - offs - used;
12155 + b->free = c->leb_size - sleb->endpt;
12156 + dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, b->dirty, b->free);
12159 ubifs_scan_destroy(sleb);
12160 @@ -647,55 +693,6 @@ out_dump:
12164 - * insert_ref_node - insert a reference node to the replay tree.
12165 - * @c: UBIFS file-system description object
12166 - * @lnum: node logical eraseblock number
12167 - * @offs: node offset
12168 - * @sqnum: sequence number
12169 - * @free: amount of free space in bud
12170 - * @dirty: amount of dirty space from padding and deletion nodes
12172 - * This function inserts a reference node to the replay tree and returns zero
12173 - * in case of success or a negative error code in case of failure.
12175 -static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
12176 - unsigned long long sqnum, int free, int dirty)
12178 - struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
12179 - struct replay_entry *r;
12181 - dbg_mnt("add ref LEB %d:%d", lnum, offs);
12184 - r = rb_entry(parent, struct replay_entry, rb);
12185 - if (sqnum < r->sqnum) {
12186 - p = &(*p)->rb_left;
12188 - } else if (sqnum > r->sqnum) {
12189 - p = &(*p)->rb_right;
12192 - ubifs_err("duplicate sqnum in replay tree");
12196 - r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
12202 - r->sqnum = sqnum;
12203 - r->flags = REPLAY_REF;
12205 - r->dirty = dirty;
12207 - rb_link_node(&r->rb, parent, p);
12208 - rb_insert_color(&r->rb, &c->replay_tree);
12213 * replay_buds - replay all buds.
12214 * @c: UBIFS file-system description object
12216 @@ -705,17 +702,16 @@ static int insert_ref_node(struct ubifs_
12217 static int replay_buds(struct ubifs_info *c)
12219 struct bud_entry *b;
12220 - int err, uninitialized_var(free), uninitialized_var(dirty);
12222 + unsigned long long prev_sqnum = 0;
12224 list_for_each_entry(b, &c->replay_buds, list) {
12225 - err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead,
12229 - err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum,
12231 + err = replay_bud(c, b);
12235 + ubifs_assert(b->sqnum > prev_sqnum);
12236 + prev_sqnum = b->sqnum;
12240 @@ -836,10 +832,16 @@ static int replay_log_leb(struct ubifs_i
12241 const struct ubifs_cs_node *node;
12243 dbg_mnt("replay log LEB %d:%d", lnum, offs);
12244 - sleb = ubifs_scan(c, lnum, offs, sbuf);
12245 + sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery);
12246 if (IS_ERR(sleb)) {
12247 - if (c->need_recovery)
12248 - sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
12249 + if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery)
12250 + return PTR_ERR(sleb);
12252 + * Note, the below function will recover this log LEB only if
12253 + * it is the last, because unclean reboots can possibly corrupt
12254 + * only the tail of the log.
12256 + sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
12258 return PTR_ERR(sleb);
12260 @@ -850,7 +852,6 @@ static int replay_log_leb(struct ubifs_i
12265 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
12266 if (c->cs_sqnum == 0) {
12268 @@ -897,7 +898,6 @@ static int replay_log_leb(struct ubifs_i
12271 list_for_each_entry(snod, &sleb->nodes, list) {
12275 if (snod->sqnum >= SQNUM_WATERMARK) {
12276 @@ -1010,7 +1010,6 @@ out:
12277 int ubifs_replay_journal(struct ubifs_info *c)
12279 int err, i, lnum, offs, free;
12280 - void *sbuf = NULL;
12282 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
12284 @@ -1025,14 +1024,8 @@ int ubifs_replay_journal(struct ubifs_in
12288 - sbuf = vmalloc(c->leb_size);
12292 dbg_mnt("start replaying the journal");
12296 lnum = c->ltail_lnum = c->lhead_lnum;
12297 offs = c->lhead_offs;
12299 @@ -1045,7 +1038,7 @@ int ubifs_replay_journal(struct ubifs_in
12300 lnum = UBIFS_LOG_LNUM;
12303 - err = replay_log_leb(c, lnum, offs, sbuf);
12304 + err = replay_log_leb(c, lnum, offs, c->sbuf);
12306 /* We hit the end of the log */
12308 @@ -1058,27 +1051,30 @@ int ubifs_replay_journal(struct ubifs_in
12312 - err = apply_replay_tree(c);
12313 + err = apply_replay_list(c);
12317 + err = set_buds_lprops(c);
12322 - * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable
12323 - * to roughly estimate index growth. Things like @c->min_idx_lebs
12324 + * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable
12325 + * to roughly estimate index growth. Things like @c->bi.min_idx_lebs
12326 * depend on it. This means we have to initialize it to make sure
12327 * budgeting works properly.
12329 - c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
12330 - c->budg_uncommitted_idx *= c->max_idx_node_sz;
12331 + c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
12332 + c->bi.uncommitted_idx *= c->max_idx_node_sz;
12334 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
12335 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
12336 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
12337 (unsigned long)c->highest_inum);
12339 - destroy_replay_tree(c);
12340 + destroy_replay_list(c);
12341 destroy_bud_list(c);
12346 diff -uprN linux-2.6.28/fs/ubifs/sb.c ubifs-v2.6.28/fs/ubifs/sb.c
12347 --- linux-2.6.28/fs/ubifs/sb.c 2011-06-15 15:12:27.000000000 -0400
12348 +++ ubifs-v2.6.28/fs/ubifs/sb.c 2011-06-15 14:22:09.000000000 -0400
12349 @@ -181,12 +181,9 @@ static int create_default_filesystem(str
12350 sup->lsave_cnt = cpu_to_le32(c->lsave_cnt);
12351 sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION);
12352 sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN);
12353 - if (c->mount_opts.override_compr) {
12354 - if (c->mount_opts.compr_type == UBIFS_COMPR_LZO999)
12355 - sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO);
12357 - sup->default_compr = cpu_to_le16(c->mount_opts.compr_type);
12359 + if (c->mount_opts.override_compr)
12360 + sup->default_compr = cpu_to_le16(c->mount_opts.compr_type);
12362 sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO);
12364 generate_random_uuid(sup->uuid);
12365 @@ -196,6 +193,7 @@ static int create_default_filesystem(str
12366 if (tmp64 > DEFAULT_MAX_RP_SIZE)
12367 tmp64 = DEFAULT_MAX_RP_SIZE;
12368 sup->rp_size = cpu_to_le64(tmp64);
12369 + sup->ro_compat_version = cpu_to_le32(UBIFS_RO_COMPAT_VERSION);
12371 err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM);
12373 @@ -476,7 +474,8 @@ failed:
12374 * @c: UBIFS file-system description object
12376 * This function returns a pointer to the superblock node or a negative error
12378 + * code. Note, the user of this function is responsible of kfree()'ing the
12379 + * returned superblock buffer.
12381 struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
12383 @@ -535,17 +534,36 @@ int ubifs_read_superblock(struct ubifs_i
12385 return PTR_ERR(sup);
12387 + c->fmt_version = le32_to_cpu(sup->fmt_version);
12388 + c->ro_compat_version = le32_to_cpu(sup->ro_compat_version);
12391 * The software supports all previous versions but not future versions,
12392 * due to the unavailability of time-travelling equipment.
12394 - c->fmt_version = le32_to_cpu(sup->fmt_version);
12395 if (c->fmt_version > UBIFS_FORMAT_VERSION) {
12396 - ubifs_err("on-flash format version is %d, but software only "
12397 - "supports up to version %d", c->fmt_version,
12398 - UBIFS_FORMAT_VERSION);
12401 + ubifs_assert(!c->ro_media || c->ro_mount);
12402 + if (!c->ro_mount ||
12403 + c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) {
12404 + ubifs_err("on-flash format version is w%d/r%d, but "
12405 + "software only supports up to version "
12406 + "w%d/r%d", c->fmt_version,
12407 + c->ro_compat_version, UBIFS_FORMAT_VERSION,
12408 + UBIFS_RO_COMPAT_VERSION);
12409 + if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) {
12410 + ubifs_msg("only R/O mounting is possible");
12418 + * The FS is mounted R/O, and the media format is
12419 + * R/O-compatible with the UBIFS implementation, so we can
12422 + c->rw_incompat = 1;
12425 if (c->fmt_version < 3) {
12426 @@ -598,12 +616,13 @@ int ubifs_read_superblock(struct ubifs_i
12427 c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran);
12428 memcpy(&c->uuid, &sup->uuid, 16);
12429 c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT);
12430 + c->space_fixup = !!(sup_flags & UBIFS_FLG_SPACE_FIXUP);
12432 /* Automatically increase file system size to the maximum size */
12433 c->old_leb_cnt = c->leb_cnt;
12434 if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) {
12435 c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size);
12436 - if (c->vfs_sb->s_flags & MS_RDONLY)
12438 dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs",
12439 c->old_leb_cnt, c->leb_cnt);
12441 @@ -626,10 +645,158 @@ int ubifs_read_superblock(struct ubifs_i
12442 c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS;
12443 c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs;
12444 c->main_first = c->leb_cnt - c->main_lebs;
12445 - c->report_rp_size = ubifs_reported_space(c, c->rp_size);
12447 err = validate_sb(c, sup);
12454 + * fixup_leb - fixup/unmap an LEB containing free space.
12455 + * @c: UBIFS file-system description object
12456 + * @lnum: the LEB number to fix up
12457 + * @len: number of used bytes in LEB (starting at offset 0)
12459 + * This function reads the contents of the given LEB number @lnum, then fixes
12460 + * it up, so that empty min. I/O units in the end of LEB are actually erased on
12461 + * flash (rather than being just all-0xff real data). If the LEB is completely
12462 + * empty, it is simply unmapped.
12464 +static int fixup_leb(struct ubifs_info *c, int lnum, int len)
12468 + ubifs_assert(len >= 0);
12469 + ubifs_assert(len % c->min_io_size == 0);
12470 + ubifs_assert(len < c->leb_size);
12473 + dbg_mnt("unmap empty LEB %d", lnum);
12474 + return ubi_leb_unmap(c->ubi, lnum);
12477 + dbg_mnt("fixup LEB %d, data len %d", lnum, len);
12478 + err = ubi_read(c->ubi, lnum, c->sbuf, 0, len);
12482 + return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
12486 + * fixup_free_space - find & remap all LEBs containing free space.
12487 + * @c: UBIFS file-system description object
12489 + * This function walks through all LEBs in the filesystem and fiexes up those
12490 + * containing free/empty space.
12492 +static int fixup_free_space(struct ubifs_info *c)
12494 + int lnum, err = 0;
12495 + struct ubifs_lprops *lprops;
12497 + ubifs_get_lprops(c);
12499 + /* Fixup LEBs in the master area */
12500 + for (lnum = UBIFS_MST_LNUM; lnum < UBIFS_LOG_LNUM; lnum++) {
12501 + err = fixup_leb(c, lnum, c->mst_offs + c->mst_node_alsz);
12506 + /* Unmap unused log LEBs */
12507 + lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
12508 + while (lnum != c->ltail_lnum) {
12509 + err = fixup_leb(c, lnum, 0);
12512 + lnum = ubifs_next_log_lnum(c, lnum);
12515 + /* Fixup the current log head */
12516 + err = fixup_leb(c, c->lhead_lnum, c->lhead_offs);
12520 + /* Fixup LEBs in the LPT area */
12521 + for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) {
12522 + int free = c->ltab[lnum - c->lpt_first].free;
12525 + err = fixup_leb(c, lnum, c->leb_size - free);
12531 + /* Unmap LEBs in the orphans area */
12532 + for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
12533 + err = fixup_leb(c, lnum, 0);
12538 + /* Fixup LEBs in the main area */
12539 + for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) {
12540 + lprops = ubifs_lpt_lookup(c, lnum);
12541 + if (IS_ERR(lprops)) {
12542 + err = PTR_ERR(lprops);
12546 + if (lprops->free > 0) {
12547 + err = fixup_leb(c, lnum, c->leb_size - lprops->free);
12554 + ubifs_release_lprops(c);
12559 + * ubifs_fixup_free_space - find & fix all LEBs with free space.
12560 + * @c: UBIFS file-system description object
12562 + * This function fixes up LEBs containing free space on first mount, if the
12563 + * appropriate flag was set when the FS was created. Each LEB with one or more
12564 + * empty min. I/O unit (i.e. free-space-count > 0) is re-written, to make sure
12565 + * the free space is actually erased. E.g., this is necessary for some NAND
12566 + * chips, since the free space may have been programmed like real "0xff" data
12567 + * (generating a non-0xff ECC), causing future writes to the not-really-erased
12568 + * NAND pages to behave badly. After the space is fixed up, the superblock flag
12569 + * is cleared, so that this is skipped for all future mounts.
12571 +int ubifs_fixup_free_space(struct ubifs_info *c)
12574 + struct ubifs_sb_node *sup;
12576 + ubifs_assert(c->space_fixup);
12577 + ubifs_assert(!c->ro_mount);
12579 + ubifs_msg("start fixing up free space");
12581 + err = fixup_free_space(c);
12585 + sup = ubifs_read_sb_node(c);
12587 + return PTR_ERR(sup);
12589 + /* Free-space fixup is no longer required */
12590 + c->space_fixup = 0;
12591 + sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP);
12593 + err = ubifs_write_sb_node(c, sup);
12598 + ubifs_msg("free space fixup complete");
12601 diff -uprN linux-2.6.28/fs/ubifs/scan.c ubifs-v2.6.28/fs/ubifs/scan.c
12602 --- linux-2.6.28/fs/ubifs/scan.c 2008-12-24 18:26:37.000000000 -0500
12603 +++ ubifs-v2.6.28/fs/ubifs/scan.c 2011-06-15 14:22:09.000000000 -0400
12604 @@ -108,10 +108,9 @@ int ubifs_scan_a_node(const struct ubifs
12606 /* Make the node pads to 8-byte boundary */
12607 if ((node_len + pad_len) & 7) {
12610 dbg_err("bad padding length %d - %d",
12611 offs, offs + node_len + pad_len);
12613 return SCANNED_A_BAD_PAD_NODE;
12616 @@ -198,7 +197,7 @@ int ubifs_add_snod(const struct ubifs_in
12617 struct ubifs_ino_node *ino = buf;
12618 struct ubifs_scan_node *snod;
12620 - snod = kzalloc(sizeof(struct ubifs_scan_node), GFP_NOFS);
12621 + snod = kmalloc(sizeof(struct ubifs_scan_node), GFP_NOFS);
12625 @@ -213,13 +212,15 @@ int ubifs_add_snod(const struct ubifs_in
12626 case UBIFS_DENT_NODE:
12627 case UBIFS_XENT_NODE:
12628 case UBIFS_DATA_NODE:
12629 - case UBIFS_TRUN_NODE:
12631 * The key is in the same place in all keyed
12634 key_read(c, &ino->key, &snod->key);
12637 + invalid_key_init(c, &snod->key);
12640 list_add_tail(&snod->list, &sleb->nodes);
12641 sleb->nodes_cnt += 1;
12642 @@ -238,12 +239,12 @@ void ubifs_scanned_corruption(const stru
12646 - ubifs_err("corrupted data at LEB %d:%d", lnum, offs);
12647 + ubifs_err("corruption at LEB %d:%d", lnum, offs);
12648 if (dbg_failure_mode)
12650 len = c->leb_size - offs;
12655 dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs);
12656 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1);
12658 @@ -253,13 +254,19 @@ void ubifs_scanned_corruption(const stru
12659 * @c: UBIFS file-system description object
12660 * @lnum: logical eraseblock number
12661 * @offs: offset to start at (usually zero)
12662 - * @sbuf: scan buffer (must be c->leb_size)
12663 + * @sbuf: scan buffer (must be of @c->leb_size bytes in size)
12664 + * @quiet: print no messages
12666 * This function scans LEB number @lnum and returns complete information about
12667 - * its contents. Returns an error code in case of failure.
12668 + * its contents. Returns the scaned information in case of success and,
12669 + * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case
12672 + * If @quiet is non-zero, this function does not print large and scary
12673 + * error messages and flash dumps in case of errors.
12675 struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
12676 - int offs, void *sbuf)
12677 + int offs, void *sbuf, int quiet)
12679 void *buf = sbuf + offs;
12680 int err, len = c->leb_size - offs;
12681 @@ -278,8 +285,7 @@ struct ubifs_scan_leb *ubifs_scan(const
12685 - ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0);
12687 + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
12689 /* Padding bytes or a valid padding node */
12691 @@ -304,7 +310,8 @@ struct ubifs_scan_leb *ubifs_scan(const
12694 dbg_err("unknown");
12700 err = ubifs_add_snod(c, sleb, buf, offs);
12701 @@ -317,8 +324,12 @@ struct ubifs_scan_leb *ubifs_scan(const
12705 - if (offs % c->min_io_size)
12706 + if (offs % c->min_io_size) {
12708 + ubifs_err("empty space starts at non-aligned offset %d",
12713 ubifs_end_scan(c, sleb, lnum, offs);
12715 @@ -327,18 +338,25 @@ struct ubifs_scan_leb *ubifs_scan(const
12717 for (; len; offs++, buf++, len--)
12718 if (*(uint8_t *)buf != 0xff) {
12719 - ubifs_err("corrupt empty space at LEB %d:%d",
12722 + ubifs_err("corrupt empty space at LEB %d:%d",
12730 - ubifs_scanned_corruption(c, lnum, offs, buf);
12732 + ubifs_scanned_corruption(c, lnum, offs, buf);
12733 + ubifs_err("LEB %d scanning failed", lnum);
12736 + ubifs_scan_destroy(sleb);
12737 + return ERR_PTR(err);
12740 - ubifs_err("LEB %d scanning failed", lnum);
12741 + ubifs_err("LEB %d scanning failed, error %d", lnum, err);
12742 ubifs_scan_destroy(sleb);
12743 return ERR_PTR(err);
12745 diff -uprN linux-2.6.28/fs/ubifs/shrinker.c ubifs-v2.6.28/fs/ubifs/shrinker.c
12746 --- linux-2.6.28/fs/ubifs/shrinker.c 2008-12-24 18:26:37.000000000 -0500
12747 +++ ubifs-v2.6.28/fs/ubifs/shrinker.c 2011-06-15 14:22:09.000000000 -0400
12748 @@ -151,7 +151,7 @@ static int shrink_tnc(struct ubifs_info
12749 * @contention: if any contention, this is set to %1
12751 * This function walks the list of mounted UBIFS file-systems and frees clean
12752 - * znodes which are older then @age, until at least @nr znodes are freed.
12753 + * znodes which are older than @age, until at least @nr znodes are freed.
12754 * Returns the number of freed znodes.
12756 static int shrink_tnc_trees(int nr, int age, int *contention)
12757 @@ -206,8 +206,7 @@ static int shrink_tnc_trees(int nr, int
12758 * Move this one to the end of the list to provide some
12761 - list_del(&c->infos_list);
12762 - list_add_tail(&c->infos_list, &ubifs_infos);
12763 + list_move_tail(&c->infos_list, &ubifs_infos);
12764 mutex_unlock(&c->umount_mutex);
12767 @@ -251,7 +250,7 @@ static int kick_a_thread(void)
12768 dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
12770 if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
12772 + c->ro_mount || c->ro_error) {
12773 mutex_unlock(&c->umount_mutex);
12776 @@ -263,8 +262,7 @@ static int kick_a_thread(void)
12780 - list_del(&c->infos_list);
12781 - list_add_tail(&c->infos_list, &ubifs_infos);
12782 + list_move_tail(&c->infos_list, &ubifs_infos);
12783 spin_unlock(&ubifs_infos_lock);
12785 ubifs_request_bg_commit(c);
12786 @@ -285,7 +283,11 @@ int ubifs_shrinker(int nr, gfp_t gfp_mas
12787 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
12790 - return clean_zn_cnt;
12792 + * Due to the way UBIFS updates the clean znode counter it may
12793 + * temporarily be negative.
12795 + return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
12797 if (!clean_zn_cnt) {
12799 diff -uprN linux-2.6.28/fs/ubifs/super.c ubifs-v2.6.28/fs/ubifs/super.c
12800 --- linux-2.6.28/fs/ubifs/super.c 2011-06-15 15:12:27.000000000 -0400
12801 +++ ubifs-v2.6.28/fs/ubifs/super.c 2011-06-15 14:22:09.000000000 -0400
12802 @@ -317,6 +317,8 @@ static int ubifs_write_inode(struct inod
12804 ubifs_err("can't write inode %lu, error %d",
12805 inode->i_ino, err);
12807 + err = dbg_check_inode_size(c, inode, ui->ui_size);
12811 @@ -362,7 +364,7 @@ out:
12812 ubifs_release_dirty_inode_budget(c, ui);
12814 /* We've deleted something - clean the "no space" flags */
12815 - c->nospace = c->nospace_rp = 0;
12816 + c->bi.nospace = c->bi.nospace_rp = 0;
12819 clear_inode(inode);
12820 @@ -426,8 +428,8 @@ static int ubifs_show_options(struct seq
12821 seq_printf(s, ",no_chk_data_crc");
12823 if (c->mount_opts.override_compr) {
12824 - seq_printf(s, ",compr=");
12825 - seq_printf(s, ubifs_compr_name(c->mount_opts.compr_type));
12826 + seq_printf(s, ",compr=%s",
12827 + ubifs_compr_name(c->mount_opts.compr_type));
12831 @@ -511,9 +513,12 @@ static int init_constants_early(struct u
12833 c->leb_cnt = c->vi.size;
12834 c->leb_size = c->vi.usable_leb_size;
12835 + c->leb_start = c->di.leb_start;
12836 c->half_leb_size = c->leb_size / 2;
12837 c->min_io_size = c->di.min_io_size;
12838 c->min_io_shift = fls(c->min_io_size) - 1;
12839 + c->max_write_size = c->di.max_write_size;
12840 + c->max_write_shift = fls(c->max_write_size) - 1;
12842 if (c->leb_size < UBIFS_MIN_LEB_SZ) {
12843 ubifs_err("too small LEBs (%d bytes), min. is %d bytes",
12844 @@ -533,6 +538,18 @@ static int init_constants_early(struct u
12848 + * Maximum write size has to be greater or equivalent to min. I/O
12849 + * size, and be multiple of min. I/O size.
12851 + if (c->max_write_size < c->min_io_size ||
12852 + c->max_write_size % c->min_io_size ||
12853 + !is_power_of_2(c->max_write_size)) {
12854 + ubifs_err("bad write buffer size %d for %d min. I/O unit",
12855 + c->max_write_size, c->min_io_size);
12860 * UBIFS aligns all node to 8-byte boundary, so to make function in
12861 * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is
12863 @@ -540,6 +557,10 @@ static int init_constants_early(struct u
12864 if (c->min_io_size < 8) {
12865 c->min_io_size = 8;
12866 c->min_io_shift = 3;
12867 + if (c->max_write_size < c->min_io_size) {
12868 + c->max_write_size = c->min_io_size;
12869 + c->max_write_shift = c->min_io_shift;
12873 c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size);
12874 @@ -674,11 +695,11 @@ static int init_constants_sb(struct ubif
12875 * be compressed and direntries are of the maximum size.
12877 * Note, data, which may be stored in inodes is budgeted separately, so
12878 - * it is not included into 'c->inode_budget'.
12879 + * it is not included into 'c->bi.inode_budget'.
12881 - c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE;
12882 - c->inode_budget = UBIFS_INO_NODE_SZ;
12883 - c->dent_budget = UBIFS_MAX_DENT_NODE_SZ;
12884 + c->bi.page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE;
12885 + c->bi.inode_budget = UBIFS_INO_NODE_SZ;
12886 + c->bi.dent_budget = UBIFS_MAX_DENT_NODE_SZ;
12889 * When the amount of flash space used by buds becomes
12890 @@ -705,6 +726,8 @@ static int init_constants_sb(struct ubif
12894 + /* Initialize effective LEB size used in budgeting calculations */
12895 + c->idx_leb_size = c->leb_size - c->max_idx_node_sz;
12899 @@ -720,7 +743,8 @@ static void init_constants_master(struct
12903 - c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
12904 + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
12905 + c->report_rp_size = ubifs_reported_space(c, c->rp_size);
12908 * Calculate total amount of FS blocks. This number is not used
12909 @@ -788,15 +812,18 @@ static int alloc_wbufs(struct ubifs_info
12911 c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback;
12912 c->jheads[i].wbuf.jhead = i;
12913 + c->jheads[i].grouped = 1;
12916 c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM;
12918 * Garbage Collector head likely contains long-term data and
12919 - * does not need to be synchronized by timer.
12920 + * does not need to be synchronized by timer. Also GC head nodes are
12923 c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM;
12924 c->jheads[GCHD].wbuf.no_timer = 1;
12925 + c->jheads[GCHD].grouped = 0;
12929 @@ -937,6 +964,27 @@ static const match_table_t tokens = {
12933 + * parse_standard_option - parse a standard mount option.
12934 + * @option: the option to parse
12936 + * Normally, standard mount options like "sync" are passed to file-systems as
12937 + * flags. However, when a "rootflags=" kernel boot parameter is used, they may
12938 + * be present in the options string. This function tries to deal with this
12939 + * situation and parse standard options. Returns 0 if the option was not
12940 + * recognized, and the corresponding integer flag if it was.
12942 + * UBIFS is only interested in the "sync" option, so do not check for anything
12945 +static int parse_standard_option(const char *option)
12947 + ubifs_msg("parse %s", option);
12948 + if (!strcmp(option, "sync"))
12949 + return MS_SYNCHRONOUS;
12954 * ubifs_parse_options - parse mount parameters.
12955 * @c: UBIFS file-system description object
12956 * @options: parameters to parse
12957 @@ -1001,8 +1049,6 @@ static int ubifs_parse_options(struct ub
12958 c->mount_opts.compr_type = UBIFS_COMPR_LZO;
12959 else if (!strcmp(name, "zlib"))
12960 c->mount_opts.compr_type = UBIFS_COMPR_ZLIB;
12961 - else if (!strcmp(name, "lzo999"))
12962 - c->mount_opts.compr_type = UBIFS_COMPR_LZO999;
12964 ubifs_err("unknown compressor \"%s\"", name);
12966 @@ -1014,9 +1060,19 @@ static int ubifs_parse_options(struct ub
12970 - ubifs_err("unrecognized mount option \"%s\" "
12971 - "or missing value", p);
12974 + unsigned long flag;
12975 + struct super_block *sb = c->vfs_sb;
12977 + flag = parse_standard_option(p);
12979 + ubifs_err("unrecognized mount option \"%s\" "
12980 + "or missing value", p);
12983 + sb->s_flags |= flag;
12989 @@ -1092,8 +1148,8 @@ static int check_free_space(struct ubifs
12991 ubifs_assert(c->dark_wm > 0);
12992 if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) {
12993 - ubifs_err("insufficient free space to mount in read/write mode");
12994 - dbg_dump_budg(c);
12995 + ubifs_err("insufficient free space to mount in R/W mode");
12996 + dbg_dump_budg(c, &c->bi);
12997 dbg_dump_lprops(c);
13000 @@ -1112,11 +1168,11 @@ static int check_free_space(struct ubifs
13002 static int mount_ubifs(struct ubifs_info *c)
13004 - struct super_block *sb = c->vfs_sb;
13005 - int err, mounted_read_only = (sb->s_flags & MS_RDONLY);
13010 + c->ro_mount = !!(c->vfs_sb->s_flags & MS_RDONLY);
13011 err = init_constants_early(c);
13014 @@ -1129,7 +1185,7 @@ static int mount_ubifs(struct ubifs_info
13018 - if (c->empty && (mounted_read_only || c->ro_media)) {
13019 + if (c->empty && (c->ro_mount || c->ro_media)) {
13021 * This UBI volume is empty, and read-only, or the file system
13022 * is mounted read-only - we cannot format it.
13023 @@ -1140,7 +1196,7 @@ static int mount_ubifs(struct ubifs_info
13027 - if (c->ro_media && !mounted_read_only) {
13028 + if (c->ro_media && !c->ro_mount) {
13029 ubifs_err("cannot mount read-write - read-only media");
13032 @@ -1160,7 +1216,7 @@ static int mount_ubifs(struct ubifs_info
13036 - if (!mounted_read_only) {
13037 + if (!c->ro_mount) {
13038 c->ileb_buf = vmalloc(c->leb_size);
13041 @@ -1169,11 +1225,14 @@ static int mount_ubifs(struct ubifs_info
13042 if (c->bulk_read == 1)
13046 - * We have to check all CRCs, even for data nodes, when we mount the FS
13047 - * (specifically, when we are replaying).
13049 - c->always_chk_crc = 1;
13050 + if (!c->ro_mount) {
13051 + c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ,
13053 + if (!c->write_reserve_buf)
13059 err = ubifs_read_superblock(c);
13061 @@ -1186,6 +1245,7 @@ static int mount_ubifs(struct ubifs_info
13062 if (!ubifs_compr_present(c->default_compr)) {
13063 ubifs_err("'compressor \"%s\" is not compiled in",
13064 ubifs_compr_name(c->default_compr));
13069 @@ -1201,14 +1261,14 @@ static int mount_ubifs(struct ubifs_info
13073 - sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);
13074 - if (!mounted_read_only) {
13075 - err = alloc_wbufs(c);
13078 + err = alloc_wbufs(c);
13082 + sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);
13083 + if (!c->ro_mount) {
13084 /* Create background thread */
13085 - c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
13086 + c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
13087 if (IS_ERR(c->bgt)) {
13088 err = PTR_ERR(c->bgt);
13090 @@ -1228,12 +1288,25 @@ static int mount_ubifs(struct ubifs_info
13091 if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) {
13092 ubifs_msg("recovery needed");
13093 c->need_recovery = 1;
13094 - if (!mounted_read_only) {
13095 - err = ubifs_recover_inl_heads(c, c->sbuf);
13099 - } else if (!mounted_read_only) {
13102 + if (c->need_recovery && !c->ro_mount) {
13103 + err = ubifs_recover_inl_heads(c, c->sbuf);
13108 + err = ubifs_lpt_init(c, 1, !c->ro_mount);
13112 + if (!c->ro_mount && c->space_fixup) {
13113 + err = ubifs_fixup_free_space(c);
13118 + if (!c->ro_mount) {
13120 * Set the "dirty" flag so that if we reboot uncleanly we
13121 * will notice this immediately on the next mount.
13122 @@ -1241,14 +1314,10 @@ static int mount_ubifs(struct ubifs_info
13123 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
13124 err = ubifs_write_master(c);
13130 - err = ubifs_lpt_init(c, 1, !mounted_read_only);
13134 - err = dbg_check_idx_size(c, c->old_idx_sz);
13135 + err = dbg_check_idx_size(c, c->bi.old_idx_sz);
13139 @@ -1256,11 +1325,14 @@ static int mount_ubifs(struct ubifs_info
13143 - err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only);
13144 + /* Calculate 'min_idx_lebs' after journal replay */
13145 + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
13147 + err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount);
13151 - if (!mounted_read_only) {
13152 + if (!c->ro_mount) {
13155 err = check_free_space(c);
13156 @@ -1282,6 +1354,8 @@ static int mount_ubifs(struct ubifs_info
13159 err = ubifs_rcvry_gc_commit(c);
13161 + goto out_orphans;
13163 err = take_gc_lnum(c);
13165 @@ -1293,7 +1367,7 @@ static int mount_ubifs(struct ubifs_info
13167 err = ubifs_leb_unmap(c, c->gc_lnum);
13170 + goto out_orphans;
13173 err = dbg_check_lprops(c);
13174 @@ -1320,16 +1394,20 @@ static int mount_ubifs(struct ubifs_info
13175 spin_unlock(&ubifs_infos_lock);
13177 if (c->need_recovery) {
13178 - if (mounted_read_only)
13180 ubifs_msg("recovery deferred");
13182 c->need_recovery = 0;
13183 ubifs_msg("recovery completed");
13184 - /* GC LEB has to be empty and taken at this point */
13185 - ubifs_assert(c->lst.taken_empty_lebs == 1);
13187 + * GC LEB has to be empty and taken at this point. But
13188 + * the journal head LEBs may also be accounted as
13189 + * "empty taken" if they are empty.
13191 + ubifs_assert(c->lst.taken_empty_lebs > 0);
13194 - ubifs_assert(c->lst.taken_empty_lebs == 1);
13195 + ubifs_assert(c->lst.taken_empty_lebs > 0);
13197 err = dbg_check_filesystem(c);
13199 @@ -1339,11 +1417,11 @@ static int mount_ubifs(struct ubifs_info
13203 - c->always_chk_crc = 0;
13206 ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"",
13207 c->vi.ubi_num, c->vi.vol_id, c->vi.name);
13208 - if (mounted_read_only)
13210 ubifs_msg("mounted read-only");
13211 x = (long long)c->main_lebs * c->leb_size;
13212 ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d "
13213 @@ -1351,14 +1429,16 @@ static int mount_ubifs(struct ubifs_info
13214 x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
13215 ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d "
13216 "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt);
13217 - ubifs_msg("media format: %d (latest is %d)",
13218 - c->fmt_version, UBIFS_FORMAT_VERSION);
13219 + ubifs_msg("media format: w%d/r%d (latest is w%d/r%d)",
13220 + c->fmt_version, c->ro_compat_version,
13221 + UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION);
13222 ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr));
13223 ubifs_msg("reserved for root: %llu bytes (%llu KiB)",
13224 c->report_rp_size, c->report_rp_size >> 10);
13226 dbg_msg("compiled on: " __DATE__ " at " __TIME__);
13227 dbg_msg("min. I/O unit size: %d bytes", c->min_io_size);
13228 + dbg_msg("max. write size: %d bytes", c->max_write_size);
13229 dbg_msg("LEB size: %d bytes (%d KiB)",
13230 c->leb_size, c->leb_size >> 10);
13231 dbg_msg("data journal heads: %d",
13232 @@ -1380,7 +1460,8 @@ static int mount_ubifs(struct ubifs_info
13233 c->main_lebs, c->main_first, c->leb_cnt - 1);
13234 dbg_msg("index LEBs: %d", c->lst.idx_lebs);
13235 dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)",
13236 - c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20);
13237 + c->bi.old_idx_sz, c->bi.old_idx_sz >> 10,
13238 + c->bi.old_idx_sz >> 20);
13239 dbg_msg("key hash type: %d", c->key_hash_type);
13240 dbg_msg("tree fanout: %d", c->fanout);
13241 dbg_msg("reserved GC LEB: %d", c->gc_lnum);
13242 @@ -1393,9 +1474,9 @@ static int mount_ubifs(struct ubifs_info
13243 UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ);
13244 dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu",
13245 UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ);
13246 - dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu",
13247 - UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
13248 - UBIFS_MAX_DENT_NODE_SZ);
13249 + dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d",
13250 + UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
13251 + UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout));
13252 dbg_msg("dead watermark: %d", c->dead_wm);
13253 dbg_msg("dark watermark: %d", c->dark_wm);
13254 dbg_msg("LEB overhead: %d", c->leb_overhead);
13255 @@ -1435,6 +1516,7 @@ out_wbufs:
13259 + kfree(c->write_reserve_buf);
13261 vfree(c->ileb_buf);
13263 @@ -1473,6 +1555,7 @@ static void ubifs_umount(struct ubifs_in
13265 kfree(c->rcvrd_mst_node);
13266 kfree(c->mst_node);
13267 + kfree(c->write_reserve_buf);
13269 vfree(c->ileb_buf);
13271 @@ -1492,10 +1575,19 @@ static int ubifs_remount_rw(struct ubifs
13275 + if (c->rw_incompat) {
13276 + ubifs_err("the file-system is not R/W-compatible");
13277 + ubifs_msg("on-flash format version is w%d/r%d, but software "
13278 + "only supports up to version w%d/r%d", c->fmt_version,
13279 + c->ro_compat_version, UBIFS_FORMAT_VERSION,
13280 + UBIFS_RO_COMPAT_VERSION);
13284 mutex_lock(&c->umount_mutex);
13285 dbg_save_space_info(c);
13286 c->remounting_rw = 1;
13287 - c->always_chk_crc = 1;
13290 err = check_free_space(c);
13292 @@ -1511,6 +1603,7 @@ static int ubifs_remount_rw(struct ubifs
13294 sup->leb_cnt = cpu_to_le32(c->leb_cnt);
13295 err = ubifs_write_sb_node(c, sup);
13300 @@ -1550,18 +1643,16 @@ static int ubifs_remount_rw(struct ubifs
13304 - err = ubifs_lpt_init(c, 0, 1);
13306 + c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL);
13307 + if (!c->write_reserve_buf)
13310 - err = alloc_wbufs(c);
13311 + err = ubifs_lpt_init(c, 0, 1);
13315 - ubifs_create_buds_lists(c);
13317 /* Create background thread */
13318 - c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
13319 + c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
13320 if (IS_ERR(c->bgt)) {
13321 err = PTR_ERR(c->bgt);
13323 @@ -1594,20 +1685,37 @@ static int ubifs_remount_rw(struct ubifs
13327 + dbg_gen("re-mounted read-write");
13328 + c->remounting_rw = 0;
13330 if (c->need_recovery) {
13331 c->need_recovery = 0;
13332 ubifs_msg("deferred recovery completed");
13335 + * Do not run the debugging space check if the were doing
13336 + * recovery, because when we saved the information we had the
13337 + * file-system in a state where the TNC and lprops has been
13338 + * modified in memory, but all the I/O operations (including a
13339 + * commit) were deferred. So the file-system was in
13340 + * "non-committed" state. Now the file-system is in committed
13341 + * state, and of course the amount of free space will change
13342 + * because, for example, the old index size was imprecise.
13344 + err = dbg_check_space_info(c);
13347 + if (c->space_fixup) {
13348 + err = ubifs_fixup_free_space(c);
13353 - dbg_gen("re-mounted read-write");
13354 - c->vfs_sb->s_flags &= ~MS_RDONLY;
13355 - c->remounting_rw = 0;
13356 - c->always_chk_crc = 0;
13357 - err = dbg_check_space_info(c);
13358 mutex_unlock(&c->umount_mutex);
13363 vfree(c->orph_buf);
13364 c->orph_buf = NULL;
13366 @@ -1615,11 +1723,12 @@ out:
13370 + kfree(c->write_reserve_buf);
13371 + c->write_reserve_buf = NULL;
13372 vfree(c->ileb_buf);
13373 c->ileb_buf = NULL;
13374 ubifs_lpt_free(c, 1);
13375 c->remounting_rw = 0;
13376 - c->always_chk_crc = 0;
13377 mutex_unlock(&c->umount_mutex);
13380 @@ -1636,7 +1745,7 @@ static void ubifs_remount_ro(struct ubif
13383 ubifs_assert(!c->need_recovery);
13384 - ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY));
13385 + ubifs_assert(!c->ro_mount);
13387 mutex_lock(&c->umount_mutex);
13389 @@ -1646,10 +1755,8 @@ static void ubifs_remount_ro(struct ubif
13391 dbg_save_space_info(c);
13393 - for (i = 0; i < c->jhead_cnt; i++) {
13394 + for (i = 0; i < c->jhead_cnt; i++)
13395 ubifs_wbuf_sync(&c->jheads[i].wbuf);
13396 - hrtimer_cancel(&c->jheads[i].wbuf.timer);
13399 c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
13400 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
13401 @@ -1658,12 +1765,14 @@ static void ubifs_remount_ro(struct ubif
13403 ubifs_ro_mode(c, err);
13406 vfree(c->orph_buf);
13407 c->orph_buf = NULL;
13408 + kfree(c->write_reserve_buf);
13409 + c->write_reserve_buf = NULL;
13410 vfree(c->ileb_buf);
13411 c->ileb_buf = NULL;
13412 ubifs_lpt_free(c, 1);
13414 err = dbg_check_space_info(c);
13416 ubifs_ro_mode(c, err);
13417 @@ -1682,10 +1791,11 @@ static void ubifs_put_super(struct super
13418 * of the media. For example, there will be dirty inodes if we failed
13419 * to write them back because of I/O errors.
13421 - ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0);
13422 - ubifs_assert(c->budg_idx_growth == 0);
13423 - ubifs_assert(c->budg_dd_growth == 0);
13424 - ubifs_assert(c->budg_data_growth == 0);
13425 + if (!c->ro_error) {
13426 + ubifs_assert(c->bi.idx_growth == 0);
13427 + ubifs_assert(c->bi.dd_growth == 0);
13428 + ubifs_assert(c->bi.data_growth == 0);
13432 * The 'c->umount_lock' prevents races between UBIFS memory shrinker
13433 @@ -1694,7 +1804,7 @@ static void ubifs_put_super(struct super
13434 * the mutex is locked.
13436 mutex_lock(&c->umount_mutex);
13437 - if (!(c->vfs_sb->s_flags & MS_RDONLY)) {
13438 + if (!c->ro_mount) {
13440 * First of all kill the background thread to make sure it does
13441 * not interfere with un-mounting and freeing resources.
13442 @@ -1704,23 +1814,22 @@ static void ubifs_put_super(struct super
13446 - /* Synchronize write-buffers */
13448 - for (i = 0; i < c->jhead_cnt; i++)
13449 - ubifs_wbuf_sync(&c->jheads[i].wbuf);
13452 - * On fatal errors c->ro_media is set to 1, in which case we do
13453 + * On fatal errors c->ro_error is set to 1, in which case we do
13454 * not write the master node.
13456 - if (!c->ro_media) {
13457 + if (!c->ro_error) {
13460 + /* Synchronize write-buffers */
13461 + for (i = 0; i < c->jhead_cnt; i++)
13462 + ubifs_wbuf_sync(&c->jheads[i].wbuf);
13465 * We are being cleanly unmounted which means the
13466 * orphans were killed - indicate this in the master
13467 * node. Also save the reserved GC LEB number.
13471 c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
13472 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
13473 c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum);
13474 @@ -1733,6 +1842,10 @@ static void ubifs_put_super(struct super
13476 ubifs_err("failed to write master node, "
13479 + for (i = 0; i < c->jhead_cnt; i++)
13480 + /* Make sure write-buffer timers are canceled */
13481 + hrtimer_cancel(&c->jheads[i].wbuf.timer);
13485 @@ -1756,17 +1869,21 @@ static int ubifs_remount_fs(struct super
13489 - if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
13490 + if (c->ro_mount && !(*flags & MS_RDONLY)) {
13491 + if (c->ro_error) {
13492 + ubifs_msg("cannot re-mount R/W due to prior errors");
13496 - ubifs_msg("cannot re-mount due to prior errors");
13497 + ubifs_msg("cannot re-mount R/W - UBI volume is R/O");
13500 err = ubifs_remount_rw(c);
13503 - } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) {
13504 - if (c->ro_media) {
13505 - ubifs_msg("cannot re-mount due to prior errors");
13506 + } else if (!c->ro_mount && (*flags & MS_RDONLY)) {
13507 + if (c->ro_error) {
13508 + ubifs_msg("cannot re-mount R/O due to prior errors");
13511 ubifs_remount_ro(c);
13512 @@ -1780,7 +1897,7 @@ static int ubifs_remount_fs(struct super
13516 - ubifs_assert(c->lst.taken_empty_lebs == 1);
13517 + ubifs_assert(c->lst.taken_empty_lebs > 0);
13521 @@ -1802,22 +1919,32 @@ const struct super_operations ubifs_supe
13522 * @name: UBI volume name
13523 * @mode: UBI volume open mode
13525 - * There are several ways to specify UBI volumes when mounting UBIFS:
13526 - * o ubiX_Y - UBI device number X, volume Y;
13527 - * o ubiY - UBI device number 0, volume Y;
13528 + * The primary method of mounting UBIFS is by specifying the UBI volume
13529 + * character device node path. However, UBIFS may also be mounted withoug any
13530 + * character device node using one of the following methods:
13532 + * o ubiX_Y - mount UBI device number X, volume Y;
13533 + * o ubiY - mount UBI device number 0, volume Y;
13534 * o ubiX:NAME - mount UBI device X, volume with name NAME;
13535 * o ubi:NAME - mount UBI device 0, volume with name NAME.
13537 * Alternative '!' separator may be used instead of ':' (because some shells
13538 * like busybox may interpret ':' as an NFS host name separator). This function
13539 - * returns ubi volume object in case of success and a negative error code in
13540 - * case of failure.
13541 + * returns UBI volume description object in case of success and a negative
13542 + * error code in case of failure.
13544 static struct ubi_volume_desc *open_ubi(const char *name, int mode)
13546 + struct ubi_volume_desc *ubi;
13550 + /* First, try to open using the device node path method */
13551 + ubi = ubi_open_volume_path(name, mode);
13552 + if (!IS_ERR(ubi))
13555 + /* Try the "nodev" method */
13556 if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i')
13557 return ERR_PTR(-EINVAL);
13559 @@ -1872,6 +1999,7 @@ static int ubifs_fill_super(struct super
13560 mutex_init(&c->mst_mutex);
13561 mutex_init(&c->umount_mutex);
13562 mutex_init(&c->bu_mutex);
13563 + mutex_init(&c->write_reserve_mutex);
13564 init_waitqueue_head(&c->cmt_wq);
13566 c->old_idx = RB_ROOT;
13567 @@ -1889,7 +2017,9 @@ static int ubifs_fill_super(struct super
13568 INIT_LIST_HEAD(&c->old_buds);
13569 INIT_LIST_HEAD(&c->orph_list);
13570 INIT_LIST_HEAD(&c->orph_new);
13571 + c->no_chk_data_crc = 1;
13574 c->highest_inum = UBIFS_FIRST_INO;
13575 c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
13577 @@ -1916,18 +2046,19 @@ static int ubifs_fill_super(struct super
13578 err = bdi_init(&c->bdi);
13581 + err = bdi_register(&c->bdi, NULL, "ubifs_%d_%d",
13582 + c->vi.ubi_num, c->vi.vol_id);
13586 err = ubifs_parse_options(c, data, 0);
13593 sb->s_magic = UBIFS_SUPER_MAGIC;
13594 sb->s_blocksize = UBIFS_BLOCK_SIZE;
13595 sb->s_blocksize_bits = UBIFS_BLOCK_SHIFT;
13596 - sb->s_dev = c->vi.cdev;
13597 sb->s_maxbytes = c->max_inode_sz = key_max_inode_size(c);
13598 if (c->max_inode_sz > MAX_LFS_FILESIZE)
13599 sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE;
13600 @@ -1972,16 +2103,9 @@ out_free:
13601 static int sb_test(struct super_block *sb, void *data)
13604 + struct ubifs_info *c = sb->s_fs_info;
13606 - return sb->s_dev == *dev;
13609 -static int sb_set(struct super_block *sb, void *data)
13611 - dev_t *dev = data;
13613 - sb->s_dev = *dev;
13615 + return c->vi.cdev == *dev;
13618 static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
13619 @@ -2001,24 +2125,26 @@ static int ubifs_get_sb(struct file_syst
13621 ubi = open_ubi(name, UBI_READONLY);
13623 - ubifs_err("cannot open \"%s\", error %d",
13624 - name, (int)PTR_ERR(ubi));
13625 + dbg_err("cannot open \"%s\", error %d",
13626 + name, (int)PTR_ERR(ubi));
13627 return PTR_ERR(ubi);
13629 ubi_get_volume_info(ubi, &vi);
13631 dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id);
13633 - sb = sget(fs_type, &sb_test, &sb_set, &vi.cdev);
13634 + sb = sget(fs_type, &sb_test, &set_anon_super, &vi.cdev);
13641 + struct ubifs_info *c1 = sb->s_fs_info;
13643 /* A new mount point for already mounted UBIFS */
13644 dbg_gen("this ubi volume is already mounted");
13645 - if ((flags ^ sb->s_flags) & MS_RDONLY) {
13646 + if (!!(flags & MS_RDONLY) != c1->ro_mount) {
13650 @@ -2049,16 +2175,11 @@ out_close:
13654 -static void ubifs_kill_sb(struct super_block *sb)
13656 - generic_shutdown_super(sb);
13659 static struct file_system_type ubifs_fs_type = {
13661 .owner = THIS_MODULE,
13662 .get_sb = ubifs_get_sb,
13663 - .kill_sb = ubifs_kill_sb
13664 + .kill_sb = kill_anon_super,
13668 diff -uprN linux-2.6.28/fs/ubifs/tnc.c ubifs-v2.6.28/fs/ubifs/tnc.c
13669 --- linux-2.6.28/fs/ubifs/tnc.c 2011-06-15 15:12:27.000000000 -0400
13670 +++ ubifs-v2.6.28/fs/ubifs/tnc.c 2011-06-15 14:22:09.000000000 -0400
13671 @@ -446,8 +446,11 @@ static int tnc_read_node_nm(struct ubifs
13673 * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc
13674 * is true (it is controlled by corresponding mount option). However, if
13675 - * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always
13677 + * @c->mounting or @c->remounting_rw is true (we are mounting or re-mounting to
13678 + * R/W mode), @c->no_chk_data_crc is ignored and CRC is checked. This is
13679 + * because during mounting or re-mounting from R/O mode to R/W mode we may read
13680 + * journal nodes (when replying the journal or doing the recovery) and the
13681 + * journal nodes may potentially be corrupted, so checking is required.
13683 static int try_read_node(const struct ubifs_info *c, void *buf, int type,
13684 int len, int lnum, int offs)
13685 @@ -475,7 +478,8 @@ static int try_read_node(const struct ub
13686 if (node_len != len)
13689 - if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc)
13690 + if (type == UBIFS_DATA_NODE && c->no_chk_data_crc && !c->mounting &&
13691 + !c->remounting_rw)
13694 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
13695 @@ -1159,8 +1163,8 @@ static struct ubifs_znode *dirty_cow_bot
13696 * o exact match, i.e. the found zero-level znode contains key @key, then %1
13697 * is returned and slot number of the matched branch is stored in @n;
13698 * o not exact match, which means that zero-level znode does not contain
13699 - * @key, then %0 is returned and slot number of the closed branch is stored
13701 + * @key, then %0 is returned and slot number of the closest branch is stored
13703 * o @key is so small that it is even less than the lowest key of the
13704 * leftmost zero-level node, then %0 is returned and %0 is stored in @n.
13706 @@ -1176,6 +1180,7 @@ int ubifs_lookup_level0(struct ubifs_inf
13707 unsigned long time = get_seconds();
13709 dbg_tnc("search key %s", DBGKEY(key));
13710 + ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY);
13712 znode = c->zroot.znode;
13713 if (unlikely(!znode)) {
13714 @@ -1252,7 +1257,7 @@ int ubifs_lookup_level0(struct ubifs_inf
13715 * splitting in the middle of the colliding sequence. Also, when
13716 * removing the leftmost key, we would have to correct the key of the
13717 * parent node, which would introduce additional complications. Namely,
13718 - * if we changed the the leftmost key of the parent znode, the garbage
13719 + * if we changed the leftmost key of the parent znode, the garbage
13720 * collector would be unable to find it (GC is doing this when GC'ing
13721 * indexing LEBs). Although we already have an additional RB-tree where
13722 * we save such changed znodes (see 'ins_clr_old_idx_znode()') until
13723 @@ -1433,7 +1438,7 @@ static int maybe_leb_gced(struct ubifs_i
13724 * @lnum: LEB number is returned here
13725 * @offs: offset is returned here
13727 - * This function look up and reads node with key @key. The caller has to make
13728 + * This function looks up and reads node with key @key. The caller has to make
13729 * sure the @node buffer is large enough to fit the node. Returns zero in case
13730 * of success, %-ENOENT if the node was not found, and a negative error code in
13731 * case of failure. The node location can be returned in @lnum and @offs.
13732 @@ -2551,11 +2556,11 @@ int ubifs_tnc_remove_nm(struct ubifs_inf
13734 /* Ensure the znode is dirtied */
13735 if (znode->cnext || !ubifs_zn_dirty(znode)) {
13736 - znode = dirty_cow_bottom_up(c, znode);
13737 - if (IS_ERR(znode)) {
13738 - err = PTR_ERR(znode);
13741 + znode = dirty_cow_bottom_up(c, znode);
13742 + if (IS_ERR(znode)) {
13743 + err = PTR_ERR(znode);
13747 err = tnc_delete(c, znode, n);
13749 @@ -2870,12 +2875,13 @@ static void tnc_destroy_cnext(struct ubi
13751 void ubifs_tnc_close(struct ubifs_info *c)
13753 - long clean_freed;
13755 tnc_destroy_cnext(c);
13756 if (c->zroot.znode) {
13757 - clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode);
13758 - atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt);
13761 + ubifs_destroy_tnc_subtree(c->zroot.znode);
13762 + n = atomic_long_read(&c->clean_zn_cnt);
13763 + atomic_long_sub(n, &ubifs_clean_zn_cnt);
13765 kfree(c->gap_lebs);
13767 @@ -2965,7 +2971,7 @@ static struct ubifs_znode *right_znode(s
13769 * This function searches an indexing node by its first key @key and its
13770 * address @lnum:@offs. It looks up the indexing tree by pulling all indexing
13771 - * nodes it traverses to TNC. This function is called fro indexing nodes which
13772 + * nodes it traverses to TNC. This function is called for indexing nodes which
13773 * were found on the media by scanning, for example when garbage-collecting or
13774 * when doing in-the-gaps commit. This means that the indexing node which is
13775 * looked for does not have to have exactly the same leftmost key @key, because
13776 @@ -2987,6 +2993,8 @@ static struct ubifs_znode *lookup_znode(
13777 struct ubifs_znode *znode, *zn;
13780 + ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY);
13783 * The arguments have probably been read off flash, so don't assume
13785 @@ -3268,3 +3276,73 @@ out_unlock:
13786 mutex_unlock(&c->tnc_mutex);
13790 +#ifdef CONFIG_UBIFS_FS_DEBUG
13793 + * dbg_check_inode_size - check if inode size is correct.
13794 + * @c: UBIFS file-system description object
13795 + * @inum: inode number
13796 + * @size: inode size
13798 + * This function makes sure that the inode size (@size) is correct and it does
13799 + * not have any pages beyond @size. Returns zero if the inode is OK, %-EINVAL
13800 + * if it has a data page beyond @size, and other negative error code in case of
13803 +int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
13807 + union ubifs_key from_key, to_key, *key;
13808 + struct ubifs_znode *znode;
13809 + unsigned int block;
13811 + if (!S_ISREG(inode->i_mode))
13813 + if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
13816 + block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT;
13817 + data_key_init(c, &from_key, inode->i_ino, block);
13818 + highest_data_key(c, &to_key, inode->i_ino);
13820 + mutex_lock(&c->tnc_mutex);
13821 + err = ubifs_lookup_level0(c, &from_key, &znode, &n);
13831 + err = tnc_next(c, &znode, &n);
13832 + if (err == -ENOENT) {
13839 + ubifs_assert(err == 0);
13840 + key = &znode->zbranch[n].key;
13841 + if (!key_in_range(c, key, &from_key, &to_key))
13845 + block = key_block(c, key);
13846 + ubifs_err("inode %lu has size %lld, but there are data at offset %lld "
13847 + "(data key %s)", (unsigned long)inode->i_ino, size,
13848 + ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key));
13849 + dbg_dump_inode(c, inode);
13850 + dbg_dump_stack();
13854 + mutex_unlock(&c->tnc_mutex);
13858 +#endif /* CONFIG_UBIFS_FS_DEBUG */
13859 diff -uprN linux-2.6.28/fs/ubifs/tnc_commit.c ubifs-v2.6.28/fs/ubifs/tnc_commit.c
13860 --- linux-2.6.28/fs/ubifs/tnc_commit.c 2011-06-15 15:12:27.000000000 -0400
13861 +++ ubifs-v2.6.28/fs/ubifs/tnc_commit.c 2011-06-15 14:22:09.000000000 -0400
13862 @@ -245,7 +245,7 @@ static int layout_leb_in_gaps(struct ubi
13863 * it is more comprehensive and less efficient than is needed for this
13866 - sleb = ubifs_scan(c, lnum, 0, c->ileb_buf);
13867 + sleb = ubifs_scan(c, lnum, 0, c->ileb_buf, 0);
13870 return PTR_ERR(sleb);
13871 @@ -377,15 +377,13 @@ static int layout_in_gaps(struct ubifs_i
13872 c->gap_lebs = NULL;
13875 - if (!dbg_force_in_the_gaps_enabled) {
13876 + if (dbg_force_in_the_gaps_enabled()) {
13878 * Do not print scary warnings if the debugging
13879 * option which forces in-the-gaps is enabled.
13881 - ubifs_err("out of space");
13882 - spin_lock(&c->space_lock);
13883 - dbg_dump_budg(c);
13884 - spin_unlock(&c->space_lock);
13885 + ubifs_warn("out of space");
13886 + dbg_dump_budg(c, &c->bi);
13887 dbg_dump_lprops(c);
13889 /* Try to commit anyway */
13890 @@ -796,16 +794,16 @@ int ubifs_tnc_start_commit(struct ubifs_
13891 spin_lock(&c->space_lock);
13893 * Although we have not finished committing yet, update size of the
13894 - * committed index ('c->old_idx_sz') and zero out the index growth
13895 + * committed index ('c->bi.old_idx_sz') and zero out the index growth
13896 * budget. It is OK to do this now, because we've reserved all the
13897 * space which is needed to commit the index, and it is save for the
13898 * budgeting subsystem to assume the index is already committed,
13899 * even though it is not.
13901 - ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c));
13902 - c->old_idx_sz = c->calc_idx_sz;
13903 - c->budg_uncommitted_idx = 0;
13904 - c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
13905 + ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
13906 + c->bi.old_idx_sz = c->calc_idx_sz;
13907 + c->bi.uncommitted_idx = 0;
13908 + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
13909 spin_unlock(&c->space_lock);
13910 mutex_unlock(&c->tnc_mutex);
13912 diff -uprN linux-2.6.28/fs/ubifs/ubifs.h ubifs-v2.6.28/fs/ubifs/ubifs.h
13913 --- linux-2.6.28/fs/ubifs/ubifs.h 2011-06-15 15:12:27.000000000 -0400
13914 +++ ubifs-v2.6.28/fs/ubifs/ubifs.h 2011-06-15 14:22:09.000000000 -0400
13915 @@ -105,12 +105,10 @@
13916 /* Number of non-data journal heads */
13917 #define NONDATA_JHEADS_CNT 2
13919 -/* Garbage collector head */
13921 -/* Base journal head number */
13923 -/* First "general purpose" journal head */
13925 +/* Shorter names for journal head numbers for internal usage */
13926 +#define GCHD UBIFS_GC_HEAD
13927 +#define BASEHD UBIFS_BASE_HEAD
13928 +#define DATAHD UBIFS_DATA_HEAD
13930 /* 'No change' value for 'ubifs_change_lp()' */
13931 #define LPROPS_NC 0x80000001
13932 @@ -120,8 +118,12 @@
13933 * in TNC. However, when replaying, it is handy to introduce fake "truncation"
13934 * keys for truncation nodes because the code becomes simpler. So we define
13935 * %UBIFS_TRUN_KEY type.
13937 + * But otherwise, out of the journal reply scope, the truncation keys are
13940 -#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT
13941 +#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT
13942 +#define UBIFS_INVALID_KEY UBIFS_KEY_TYPES_CNT
13945 * How much a directory entry/extended attribute entry adds to the parent/host
13946 @@ -148,6 +150,12 @@
13948 #define WORST_COMPR_FACTOR 2
13951 + * How much memory is needed for a buffer where we comress a data node.
13953 +#define COMPRESSED_DATA_NODE_BUF_SZ \
13954 + (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR)
13956 /* Maximum expected tree height for use by bottom_up_buf */
13957 #define BOTTOM_UP_HEIGHT 64
13959 @@ -643,6 +651,7 @@ typedef int (*ubifs_lpt_scan_callback)(s
13960 * @offs: write-buffer offset in this logical eraseblock
13961 * @avail: number of bytes available in the write-buffer
13962 * @used: number of used bytes in the write-buffer
13963 + * @size: write-buffer size (in [@c->min_io_size, @c->max_write_size] range)
13964 * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM,
13966 * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep
13967 @@ -677,6 +686,7 @@ struct ubifs_wbuf {
13974 int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad);
13975 @@ -711,12 +721,14 @@ struct ubifs_bud {
13976 * struct ubifs_jhead - journal head.
13977 * @wbuf: head's write-buffer
13978 * @buds_list: list of bud LEBs belonging to this journal head
13979 + * @grouped: non-zero if UBIFS groups nodes when writing to this journal head
13981 * Note, the @buds list is protected by the @c->buds_lock.
13983 struct ubifs_jhead {
13984 struct ubifs_wbuf wbuf;
13985 struct list_head buds_list;
13986 + unsigned int grouped:1;
13990 @@ -926,6 +938,40 @@ struct ubifs_mount_opts {
13991 unsigned int compr_type:2;
13995 + * struct ubifs_budg_info - UBIFS budgeting information.
13996 + * @idx_growth: amount of bytes budgeted for index growth
13997 + * @data_growth: amount of bytes budgeted for cached data
13998 + * @dd_growth: amount of bytes budgeted for cached data that will make
13999 + * other data dirty
14000 + * @uncommitted_idx: amount of bytes were budgeted for growth of the index, but
14001 + * which still have to be taken into account because the index
14002 + * has not been committed so far
14003 + * @old_idx_sz: size of index on flash
14004 + * @min_idx_lebs: minimum number of LEBs required for the index
14005 + * @nospace: non-zero if the file-system does not have flash space (used as
14007 + * @nospace_rp: the same as @nospace, but additionally means that even reserved
14009 + * @page_budget: budget for a page (constant, nenver changed after mount)
14010 + * @inode_budget: budget for an inode (constant, nenver changed after mount)
14011 + * @dent_budget: budget for a directory entry (constant, nenver changed after
14014 +struct ubifs_budg_info {
14015 + long long idx_growth;
14016 + long long data_growth;
14017 + long long dd_growth;
14018 + long long uncommitted_idx;
14019 + unsigned long long old_idx_sz;
14020 + int min_idx_lebs;
14021 + unsigned int nospace:1;
14022 + unsigned int nospace_rp:1;
14024 + int inode_budget;
14028 struct ubifs_debug_info;
14031 @@ -940,6 +986,7 @@ struct ubifs_debug_info;
14033 * @cnt_lock: protects @highest_inum and @max_sqnum counters
14034 * @fmt_version: UBIFS on-flash format version
14035 + * @ro_compat_version: R/O compatibility version
14036 * @uuid: UUID from super block
14038 * @lhead_lnum: log head logical eraseblock number
14039 @@ -968,10 +1015,12 @@ struct ubifs_debug_info;
14040 * @cmt_wq: wait queue to sleep on if the log is full and a commit is running
14042 * @big_lpt: flag that LPT is too big to write whole during commit
14043 + * @space_fixup: flag indicating that free space in LEBs needs to be cleaned up
14044 * @no_chk_data_crc: do not check CRCs when reading data nodes (except during
14046 * @bulk_read: enable bulk-reads
14047 * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
14048 + * @rw_incompat: the media is not R/W compatible
14050 * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and
14052 @@ -998,6 +1047,11 @@ struct ubifs_debug_info;
14053 * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu
14054 * @bu: pre-allocated bulk-read information
14056 + * @write_reserve_mutex: protects @write_reserve_buf
14057 + * @write_reserve_buf: on the write path we allocate memory, which might
14058 + * sometimes be unavailable, in which case we use this
14059 + * write reserve buffer
14061 * @log_lebs: number of logical eraseblocks in the log
14062 * @log_bytes: log size in bytes
14063 * @log_last: last LEB of the log
14064 @@ -1019,43 +1073,34 @@ struct ubifs_debug_info;
14066 * @min_io_size: minimal input/output unit size
14067 * @min_io_shift: number of bits in @min_io_size minus one
14068 + * @max_write_size: maximum amount of bytes the underlying flash can write at a
14069 + * time (MTD write buffer size)
14070 + * @max_write_shift: number of bits in @max_write_size minus one
14071 * @leb_size: logical eraseblock size in bytes
14072 + * @leb_start: starting offset of logical eraseblocks within physical
14074 * @half_leb_size: half LEB size
14075 + * @idx_leb_size: how many bytes of an LEB are effectively available when it is
14076 + * used to store indexing nodes (@leb_size - @max_idx_node_sz)
14077 * @leb_cnt: count of logical eraseblocks
14078 * @max_leb_cnt: maximum count of logical eraseblocks
14079 * @old_leb_cnt: count of logical eraseblocks before re-size
14080 * @ro_media: the underlying UBI volume is read-only
14081 + * @ro_mount: the file-system was mounted as read-only
14082 + * @ro_error: UBIFS switched to R/O mode because an error happened
14084 * @dirty_pg_cnt: number of dirty pages (not used)
14085 * @dirty_zn_cnt: number of dirty znodes
14086 * @clean_zn_cnt: number of clean znodes
14088 - * @budg_idx_growth: amount of bytes budgeted for index growth
14089 - * @budg_data_growth: amount of bytes budgeted for cached data
14090 - * @budg_dd_growth: amount of bytes budgeted for cached data that will make
14091 - * other data dirty
14092 - * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index,
14093 - * but which still have to be taken into account because
14094 - * the index has not been committed so far
14095 - * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth,
14096 - * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst,
14097 - * @nospace, and @nospace_rp;
14098 - * @min_idx_lebs: minimum number of LEBs required for the index
14099 - * @old_idx_sz: size of index on flash
14100 + * @space_lock: protects @bi and @lst
14101 + * @lst: lprops statistics
14102 + * @bi: budgeting information
14103 * @calc_idx_sz: temporary variable which is used to calculate new index size
14104 * (contains accurate new index size at end of TNC commit start)
14105 - * @lst: lprops statistics
14106 - * @nospace: non-zero if the file-system does not have flash space (used as
14108 - * @nospace_rp: the same as @nospace, but additionally means that even reserved
14111 - * @page_budget: budget for a page
14112 - * @inode_budget: budget for an inode
14113 - * @dent_budget: budget for a directory entry
14115 * @ref_node_alsz: size of the LEB reference node aligned to the min. flash
14118 * @mst_node_alsz: master node aligned size
14119 * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary
14120 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
14121 @@ -1138,8 +1183,8 @@ struct ubifs_debug_info;
14122 * previous commit start
14123 * @uncat_list: list of un-categorized LEBs
14124 * @empty_list: list of empty LEBs
14125 - * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size)
14126 - * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size)
14127 + * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size)
14128 + * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size)
14129 * @freeable_cnt: number of freeable LEBs in @freeable_list
14131 * @ltab_lnum: LEB number of LPT's own lprops table
14132 @@ -1157,19 +1202,20 @@ struct ubifs_debug_info;
14133 * @rp_uid: reserved pool user ID
14134 * @rp_gid: reserved pool group ID
14136 - * @empty: if the UBI device is empty
14137 - * @replay_tree: temporary tree used during journal replay
14138 + * @empty: %1 if the UBI device is empty
14139 + * @need_recovery: %1 if the file-system needs recovery
14140 + * @replaying: %1 during journal replay
14141 + * @mounting: %1 while mounting
14142 + * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode
14143 * @replay_list: temporary list used during journal replay
14144 * @replay_buds: list of buds to replay
14145 * @cs_sqnum: sequence number of first node in the log (commit start node)
14146 * @replay_sqnum: sequence number of node currently being replayed
14147 - * @need_recovery: file-system needs recovery
14148 - * @replaying: set to %1 during journal replay
14149 - * @unclean_leb_list: LEBs to recover when mounting ro to rw
14150 - * @rcvrd_mst_node: recovered master node to write when mounting ro to rw
14151 + * @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W
14153 + * @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted
14155 * @size_tree: inode size information for recovery
14156 - * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY)
14157 - * @always_chk_crc: always check CRCs (while mounting and remounting rw)
14158 * @mount_opts: UBIFS-specific mount options
14160 * @dbg: debugging-related information
14161 @@ -1183,6 +1229,7 @@ struct ubifs_info {
14162 unsigned long long cmt_no;
14163 spinlock_t cnt_lock;
14165 + int ro_compat_version;
14166 unsigned char uuid[16];
14169 @@ -1208,9 +1255,11 @@ struct ubifs_info {
14170 wait_queue_head_t cmt_wq;
14172 unsigned int big_lpt:1;
14173 + unsigned int space_fixup:1;
14174 unsigned int no_chk_data_crc:1;
14175 unsigned int bulk_read:1;
14176 unsigned int default_compr:2;
14177 + unsigned int rw_incompat:1;
14179 struct mutex tnc_mutex;
14180 struct ubifs_zbranch zroot;
14181 @@ -1236,6 +1285,9 @@ struct ubifs_info {
14182 struct mutex bu_mutex;
14185 + struct mutex write_reserve_mutex;
14186 + void *write_reserve_buf;
14189 long long log_bytes;
14191 @@ -1257,32 +1309,27 @@ struct ubifs_info {
14195 + int max_write_size;
14196 + int max_write_shift;
14200 + int idx_leb_size;
14205 + unsigned int ro_media:1;
14206 + unsigned int ro_mount:1;
14207 + unsigned int ro_error:1;
14209 atomic_long_t dirty_pg_cnt;
14210 atomic_long_t dirty_zn_cnt;
14211 atomic_long_t clean_zn_cnt;
14213 - long long budg_idx_growth;
14214 - long long budg_data_growth;
14215 - long long budg_dd_growth;
14216 - long long budg_uncommitted_idx;
14217 spinlock_t space_lock;
14218 - int min_idx_lebs;
14219 - unsigned long long old_idx_sz;
14220 - unsigned long long calc_idx_sz;
14221 struct ubifs_lp_stats lst;
14222 - unsigned int nospace:1;
14223 - unsigned int nospace_rp:1;
14226 - int inode_budget;
14228 + struct ubifs_budg_info bi;
14229 + unsigned long long calc_idx_sz;
14233 @@ -1385,19 +1432,18 @@ struct ubifs_info {
14236 /* The below fields are used only during mounting and re-mounting */
14238 - struct rb_root replay_tree;
14239 + unsigned int empty:1;
14240 + unsigned int need_recovery:1;
14241 + unsigned int replaying:1;
14242 + unsigned int mounting:1;
14243 + unsigned int remounting_rw:1;
14244 struct list_head replay_list;
14245 struct list_head replay_buds;
14246 unsigned long long cs_sqnum;
14247 unsigned long long replay_sqnum;
14248 - int need_recovery;
14250 struct list_head unclean_leb_list;
14251 struct ubifs_mst_node *rcvrd_mst_node;
14252 struct rb_root size_tree;
14253 - int remounting_rw;
14254 - int always_chk_crc;
14255 struct ubifs_mount_opts mount_opts;
14257 #ifdef CONFIG_UBIFS_FS_DEBUG
14258 @@ -1444,7 +1490,7 @@ int ubifs_sync_wbufs_by_inode(struct ubi
14261 struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
14262 - int offs, void *sbuf);
14263 + int offs, void *sbuf, int quiet);
14264 void ubifs_scan_destroy(struct ubifs_scan_leb *sleb);
14265 int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
14266 int offs, int quiet);
14267 @@ -1506,7 +1552,7 @@ long long ubifs_reported_space(const str
14268 long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs);
14271 -int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
14272 +int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
14274 int ubifs_find_free_leb_for_idx(struct ubifs_info *c);
14275 int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
14276 @@ -1588,6 +1634,7 @@ int ubifs_write_master(struct ubifs_info
14277 int ubifs_read_superblock(struct ubifs_info *c);
14278 struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c);
14279 int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup);
14280 +int ubifs_fixup_free_space(struct ubifs_info *c);
14283 int ubifs_validate_entry(struct ubifs_info *c,
14284 @@ -1669,6 +1716,7 @@ const struct ubifs_lprops *ubifs_fast_fi
14285 const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c);
14286 const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c);
14287 const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c);
14288 +int ubifs_calc_dark(const struct ubifs_info *c, int spc);
14291 int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync);
14292 @@ -1695,7 +1743,7 @@ struct inode *ubifs_iget(struct super_bl
14293 int ubifs_recover_master_node(struct ubifs_info *c);
14294 int ubifs_write_rcvrd_mst_node(struct ubifs_info *c);
14295 struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
14296 - int offs, void *sbuf, int grouped);
14297 + int offs, void *sbuf, int jhead);
14298 struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
14299 int offs, void *sbuf);
14300 int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf);
14301 diff -uprN linux-2.6.28/fs/ubifs/ubifs-media.h ubifs-v2.6.28/fs/ubifs/ubifs-media.h
14302 --- linux-2.6.28/fs/ubifs/ubifs-media.h 2011-06-15 15:12:27.000000000 -0400
14303 +++ ubifs-v2.6.28/fs/ubifs/ubifs-media.h 2011-06-15 14:22:09.000000000 -0400
14305 /* UBIFS node magic number (must not have the padding byte first or last) */
14306 #define UBIFS_NODE_MAGIC 0x06101831
14308 -/* UBIFS on-flash format version */
14310 + * UBIFS on-flash format version. This version is increased when the on-flash
14311 + * format is changing. If this happens, UBIFS is will support older versions as
14312 + * well. But older UBIFS code will not support newer formats. Format changes
14313 + * will be rare and only when absolutely necessary, e.g. to fix a bug or to add
14316 + * UBIFS went into mainline kernel with format version 4. The older formats
14317 + * were development formats.
14319 #define UBIFS_FORMAT_VERSION 4
14322 + * Read-only compatibility version. If the UBIFS format is changed, older UBIFS
14323 + * implementations will not be able to mount newer formats in read-write mode.
14324 + * However, depending on the change, it may be possible to mount newer formats
14325 + * in R/O mode. This is indicated by the R/O compatibility version which is
14326 + * stored in the super-block.
14328 + * This is needed to support boot-loaders which only need R/O mounting. With
14329 + * this flag it is possible to do UBIFS format changes without a need to update
14332 +#define UBIFS_RO_COMPAT_VERSION 0
14334 /* Minimum logical eraseblock size in bytes */
14335 #define UBIFS_MIN_LEB_SZ (15*1024)
14340 * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes
14341 - * shorter than uncompressed data length, UBIFS preferes to leave this data
14342 + * shorter than uncompressed data length, UBIFS prefers to leave this data
14343 * node uncompress, because it'll be read faster.
14345 #define UBIFS_MIN_COMPRESS_DIFF 64
14346 @@ -113,6 +135,13 @@
14347 /* The key is always at the same position in all keyed nodes */
14348 #define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key)
14350 +/* Garbage collector journal head number */
14351 +#define UBIFS_GC_HEAD 0
14352 +/* Base journal head number */
14353 +#define UBIFS_BASE_HEAD 1
14354 +/* Data journal head number */
14355 +#define UBIFS_DATA_HEAD 2
14358 * LEB Properties Tree node types.
14360 @@ -303,14 +332,12 @@ enum {
14361 * UBIFS_COMPR_NONE: no compression
14362 * UBIFS_COMPR_LZO: LZO compression
14363 * UBIFS_COMPR_ZLIB: ZLIB compression
14364 - * UBIFS_COMPR_LZO999: LZO999 compression
14365 * UBIFS_COMPR_TYPES_CNT: count of supported compression types
14371 - UBIFS_COMPR_LZO999,
14372 UBIFS_COMPR_TYPES_CNT,
14375 @@ -381,9 +408,11 @@ enum {
14376 * Superblock flags.
14378 * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set
14379 + * UBIFS_FLG_SPACE_FIXUP: first-mount "fixup" of free space within LEBs needed
14382 UBIFS_FLG_BIGLPT = 0x02,
14383 + UBIFS_FLG_SPACE_FIXUP = 0x04,
14387 @@ -407,7 +436,7 @@ struct ubifs_ch {
14391 -} __attribute__ ((packed));
14395 * union ubifs_dev_desc - device node descriptor.
14396 @@ -421,7 +450,7 @@ struct ubifs_ch {
14397 union ubifs_dev_desc {
14400 -} __attribute__ ((packed));
14404 * struct ubifs_ino_node - inode node.
14405 @@ -482,7 +511,7 @@ struct ubifs_ino_node {
14407 __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */
14409 -} __attribute__ ((packed));
14413 * struct ubifs_dent_node - directory entry node.
14414 @@ -507,7 +536,7 @@ struct ubifs_dent_node {
14416 __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */
14418 -} __attribute__ ((packed));
14422 * struct ubifs_data_node - data node.
14423 @@ -528,7 +557,7 @@ struct ubifs_data_node {
14425 __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */
14427 -} __attribute__ ((packed));
14431 * struct ubifs_trun_node - truncation node.
14432 @@ -548,7 +577,7 @@ struct ubifs_trun_node {
14433 __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */
14436 -} __attribute__ ((packed));
14440 * struct ubifs_pad_node - padding node.
14441 @@ -559,7 +588,7 @@ struct ubifs_trun_node {
14442 struct ubifs_pad_node {
14443 struct ubifs_ch ch;
14445 -} __attribute__ ((packed));
14449 * struct ubifs_sb_node - superblock node.
14450 @@ -588,6 +617,7 @@ struct ubifs_pad_node {
14451 * @padding2: reserved for future, zeroes
14452 * @time_gran: time granularity in nanoseconds
14453 * @uuid: UUID generated when the file system image was created
14454 + * @ro_compat_version: UBIFS R/O compatibility version
14456 struct ubifs_sb_node {
14457 struct ubifs_ch ch;
14458 @@ -614,8 +644,9 @@ struct ubifs_sb_node {
14462 - __u8 padding2[3972];
14463 -} __attribute__ ((packed));
14464 + __le32 ro_compat_version;
14465 + __u8 padding2[3968];
14469 * struct ubifs_mst_node - master node.
14470 @@ -682,7 +713,7 @@ struct ubifs_mst_node {
14474 -} __attribute__ ((packed));
14478 * struct ubifs_ref_node - logical eraseblock reference node.
14479 @@ -698,7 +729,7 @@ struct ubifs_ref_node {
14483 -} __attribute__ ((packed));
14487 * struct ubifs_branch - key/reference/length branch
14488 @@ -712,7 +743,7 @@ struct ubifs_branch {
14492 -} __attribute__ ((packed));
14496 * struct ubifs_idx_node - indexing node.
14497 @@ -726,7 +757,7 @@ struct ubifs_idx_node {
14501 -} __attribute__ ((packed));
14505 * struct ubifs_cs_node - commit start node.
14506 @@ -736,7 +767,7 @@ struct ubifs_idx_node {
14507 struct ubifs_cs_node {
14508 struct ubifs_ch ch;
14510 -} __attribute__ ((packed));
14514 * struct ubifs_orph_node - orphan node.
14515 @@ -748,6 +779,6 @@ struct ubifs_orph_node {
14516 struct ubifs_ch ch;
14519 -} __attribute__ ((packed));
14522 #endif /* __UBIFS_MEDIA_H__ */
14523 diff -uprN linux-2.6.28/fs/ubifs/xattr.c ubifs-v2.6.28/fs/ubifs/xattr.c
14524 --- linux-2.6.28/fs/ubifs/xattr.c 2008-12-24 18:26:37.000000000 -0500
14525 +++ ubifs-v2.6.28/fs/ubifs/xattr.c 2011-06-15 14:22:09.000000000 -0400
14526 @@ -78,9 +78,9 @@ enum {
14530 -static struct inode_operations none_inode_operations;
14531 -static struct address_space_operations none_address_operations;
14532 -static struct file_operations none_file_operations;
14533 +static const struct inode_operations empty_iops;
14534 +static const struct file_operations empty_fops;
14535 +static struct address_space_operations empty_aops;
14538 * create_xattr - create an extended attribute.
14539 @@ -129,9 +129,9 @@ static int create_xattr(struct ubifs_inf
14542 /* Re-define all operations to be "nothing" */
14543 - inode->i_mapping->a_ops = &none_address_operations;
14544 - inode->i_op = &none_inode_operations;
14545 - inode->i_fop = &none_file_operations;
14546 + inode->i_mapping->a_ops = &empty_aops;
14547 + inode->i_op = &empty_iops;
14548 + inode->i_fop = &empty_fops;
14550 inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA;
14551 ui = ubifs_inode(inode);
14552 diff -uprN linux-2.6.28/include/linux/kernel.h ubifs-v2.6.28/include/linux/kernel.h
14553 --- linux-2.6.28/include/linux/kernel.h 2008-12-24 18:26:37.000000000 -0500
14554 +++ ubifs-v2.6.28/include/linux/kernel.h 2011-06-15 14:22:09.000000000 -0400
14555 @@ -45,6 +45,16 @@ extern const char linux_proc_banner[];
14557 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
14560 + * This looks more complex than it should be. But we need to
14561 + * get the type for the ~ right in round_down (it needs to be
14562 + * as wide as the result!), and we want to evaluate the macro
14563 + * arguments just once each.
14565 +#define __round_mask(x, y) ((__typeof__(x))((y)-1))
14566 +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
14567 +#define round_down(x, y) ((x) & ~__round_mask(x, y))
14569 #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
14570 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
14571 #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
14572 diff -uprN linux-2.6.28/include/linux/list_sort.h ubifs-v2.6.28/include/linux/list_sort.h
14573 --- linux-2.6.28/include/linux/list_sort.h 1969-12-31 19:00:00.000000000 -0500
14574 +++ ubifs-v2.6.28/include/linux/list_sort.h 2011-06-15 14:22:09.000000000 -0400
14576 +#ifndef _LINUX_LIST_SORT_H
14577 +#define _LINUX_LIST_SORT_H
14579 +#include <linux/types.h>
14583 +void list_sort(void *priv, struct list_head *head,
14584 + int (*cmp)(void *priv, struct list_head *a,
14585 + struct list_head *b));
14587 diff -uprN linux-2.6.28/include/linux/mtd/mtd.h ubifs-v2.6.28/include/linux/mtd/mtd.h
14588 --- linux-2.6.28/include/linux/mtd/mtd.h 2011-06-15 15:12:27.000000000 -0400
14589 +++ ubifs-v2.6.28/include/linux/mtd/mtd.h 2011-06-15 15:16:03.000000000 -0400
14590 @@ -117,6 +117,17 @@ struct mtd_info {
14592 u_int32_t writesize;
14595 + * Size of the write buffer used by the MTD. MTD devices having a write
14596 + * buffer can write multiple writesize chunks at a time. E.g. while
14597 + * writing 4 * writesize bytes to a device with 2 * writesize bytes
14598 + * buffer the MTD driver can (but doesn't have to) do 2 writesize
14599 + * operations, but not 4. Currently, all NANDs have writebufsize
14600 + * equivalent to writesize (NAND page size). Some NOR flashes do have
14601 + * writebufsize greater than writesize.
14603 + uint32_t writebufsize;
14605 u_int32_t oobsize; // Amount of OOB data per block (e.g. 16)
14606 u_int32_t oobavail; // Available OOB bytes per block
14608 diff -uprN linux-2.6.28/include/linux/mtd/ubi.h ubifs-v2.6.28/include/linux/mtd/ubi.h
14609 --- linux-2.6.28/include/linux/mtd/ubi.h 2008-12-24 18:26:37.000000000 -0500
14610 +++ ubifs-v2.6.28/include/linux/mtd/ubi.h 2011-06-15 14:22:09.000000000 -0400
14612 #ifndef __LINUX_UBI_H__
14613 #define __LINUX_UBI_H__
14615 -#include <asm/ioctl.h>
14616 +#include <linux/ioctl.h>
14617 #include <linux/types.h>
14618 #include <mtd/ubi-user.h>
14620 @@ -87,7 +87,7 @@ enum {
14621 * physical eraseblock size and on how much bytes UBI headers consume. But
14622 * because of the volume alignment (@alignment), the usable size of logical
14623 * eraseblocks if a volume may be less. The following equation is true:
14624 - * @usable_leb_size = LEB size - (LEB size mod @alignment),
14625 + * @usable_leb_size = LEB size - (LEB size mod @alignment),
14626 * where LEB size is the logical eraseblock size defined by the UBI device.
14628 * The alignment is multiple to the minimal flash input/output unit size or %1
14629 @@ -116,22 +116,77 @@ struct ubi_volume_info {
14630 * struct ubi_device_info - UBI device description data structure.
14631 * @ubi_num: ubi device number
14632 * @leb_size: logical eraseblock size on this UBI device
14633 + * @leb_start: starting offset of logical eraseblocks within physical
14635 * @min_io_size: minimal I/O unit size
14636 + * @max_write_size: maximum amount of bytes the underlying flash can write at a
14637 + * time (MTD write buffer size)
14638 * @ro_mode: if this device is in read-only mode
14639 * @cdev: UBI character device major and minor numbers
14641 * Note, @leb_size is the logical eraseblock size offered by the UBI device.
14642 * Volumes of this UBI device may have smaller logical eraseblock size if their
14643 * alignment is not equivalent to %1.
14645 + * The @max_write_size field describes flash write maximum write unit. For
14646 + * example, NOR flash allows for changing individual bytes, so @min_io_size is
14647 + * %1. However, it does not mean than NOR flash has to write data byte-by-byte.
14648 + * Instead, CFI NOR flashes have a write-buffer of, e.g., 64 bytes, and when
14649 + * writing large chunks of data, they write 64-bytes at a time. Obviously, this
14650 + * improves write throughput.
14652 + * Also, the MTD device may have N interleaved (striped) flash chips
14653 + * underneath, in which case @min_io_size can be physical min. I/O size of
14654 + * single flash chip, while @max_write_size can be N * @min_io_size.
14656 + * The @max_write_size field is always greater or equivalent to @min_io_size.
14657 + * E.g., some NOR flashes may have (@min_io_size = 1, @max_write_size = 64). In
14658 + * contrast, NAND flashes usually have @min_io_size = @max_write_size = NAND
14661 struct ubi_device_info {
14666 + int max_write_size;
14672 + * enum - volume notification types.
14673 + * @UBI_VOLUME_ADDED: volume has been added
14674 + * @UBI_VOLUME_REMOVED: start volume volume
14675 + * @UBI_VOLUME_RESIZED: volume size has been re-sized
14676 + * @UBI_VOLUME_RENAMED: volume name has been re-named
14677 + * @UBI_VOLUME_UPDATED: volume name has been updated
14679 + * These constants define which type of event has happened when a volume
14680 + * notification function is invoked.
14683 + UBI_VOLUME_ADDED,
14684 + UBI_VOLUME_REMOVED,
14685 + UBI_VOLUME_RESIZED,
14686 + UBI_VOLUME_RENAMED,
14687 + UBI_VOLUME_UPDATED,
14691 + * struct ubi_notification - UBI notification description structure.
14692 + * @di: UBI device description object
14693 + * @vi: UBI volume description object
14695 + * UBI notifiers are called with a pointer to an object of this type. The
14696 + * object describes the notification. Namely, it provides a description of the
14697 + * UBI device and UBI volume the notification informs about.
14699 +struct ubi_notification {
14700 + struct ubi_device_info di;
14701 + struct ubi_volume_info vi;
14704 /* UBI descriptor given to users when they open UBI volumes */
14705 struct ubi_volume_desc;
14707 @@ -141,6 +196,12 @@ void ubi_get_volume_info(struct ubi_volu
14708 struct ubi_volume_desc *ubi_open_volume(int ubi_num, int vol_id, int mode);
14709 struct ubi_volume_desc *ubi_open_volume_nm(int ubi_num, const char *name,
14711 +struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode);
14713 +int ubi_register_volume_notifier(struct notifier_block *nb,
14714 + int ignore_existing);
14715 +int ubi_unregister_volume_notifier(struct notifier_block *nb);
14717 void ubi_close_volume(struct ubi_volume_desc *desc);
14718 int ubi_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
14719 int len, int check);
14720 diff -uprN linux-2.6.28/include/mtd/ubi-user.h ubifs-v2.6.28/include/mtd/ubi-user.h
14721 --- linux-2.6.28/include/mtd/ubi-user.h 2008-12-24 18:26:37.000000000 -0500
14722 +++ ubifs-v2.6.28/include/mtd/ubi-user.h 2011-06-15 14:22:09.000000000 -0400
14723 @@ -40,37 +40,37 @@
14724 * UBI volume creation
14725 * ~~~~~~~~~~~~~~~~~~~
14727 - * UBI volumes are created via the %UBI_IOCMKVOL IOCTL command of UBI character
14728 + * UBI volumes are created via the %UBI_IOCMKVOL ioctl command of UBI character
14729 * device. A &struct ubi_mkvol_req object has to be properly filled and a
14730 - * pointer to it has to be passed to the IOCTL.
14731 + * pointer to it has to be passed to the ioctl.
14733 * UBI volume deletion
14734 * ~~~~~~~~~~~~~~~~~~~
14736 - * To delete a volume, the %UBI_IOCRMVOL IOCTL command of the UBI character
14737 + * To delete a volume, the %UBI_IOCRMVOL ioctl command of the UBI character
14738 * device should be used. A pointer to the 32-bit volume ID hast to be passed
14742 * UBI volume re-size
14743 * ~~~~~~~~~~~~~~~~~~
14745 - * To re-size a volume, the %UBI_IOCRSVOL IOCTL command of the UBI character
14746 + * To re-size a volume, the %UBI_IOCRSVOL ioctl command of the UBI character
14747 * device should be used. A &struct ubi_rsvol_req object has to be properly
14748 - * filled and a pointer to it has to be passed to the IOCTL.
14749 + * filled and a pointer to it has to be passed to the ioctl.
14751 * UBI volumes re-name
14752 * ~~~~~~~~~~~~~~~~~~~
14754 * To re-name several volumes atomically at one go, the %UBI_IOCRNVOL command
14755 * of the UBI character device should be used. A &struct ubi_rnvol_req object
14756 - * has to be properly filled and a pointer to it has to be passed to the IOCTL.
14757 + * has to be properly filled and a pointer to it has to be passed to the ioctl.
14759 * UBI volume update
14760 * ~~~~~~~~~~~~~~~~~
14762 - * Volume update should be done via the %UBI_IOCVOLUP IOCTL command of the
14763 + * Volume update should be done via the %UBI_IOCVOLUP ioctl command of the
14764 * corresponding UBI volume character device. A pointer to a 64-bit update
14765 - * size should be passed to the IOCTL. After this, UBI expects user to write
14766 + * size should be passed to the ioctl. After this, UBI expects user to write
14767 * this number of bytes to the volume character device. The update is finished
14768 * when the claimed number of bytes is passed. So, the volume update sequence
14769 * is something like:
14770 @@ -80,14 +80,58 @@
14771 * write(fd, buf, image_size);
14774 - * Atomic eraseblock change
14775 + * Logical eraseblock erase
14776 * ~~~~~~~~~~~~~~~~~~~~~~~~
14778 - * Atomic eraseblock change operation is done via the %UBI_IOCEBCH IOCTL
14779 - * command of the corresponding UBI volume character device. A pointer to
14780 - * &struct ubi_leb_change_req has to be passed to the IOCTL. Then the user is
14781 - * expected to write the requested amount of bytes. This is similar to the
14782 - * "volume update" IOCTL.
14783 + * To erase a logical eraseblock, the %UBI_IOCEBER ioctl command of the
14784 + * corresponding UBI volume character device should be used. This command
14785 + * unmaps the requested logical eraseblock, makes sure the corresponding
14786 + * physical eraseblock is successfully erased, and returns.
14788 + * Atomic logical eraseblock change
14789 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
14791 + * Atomic logical eraseblock change operation is called using the %UBI_IOCEBCH
14792 + * ioctl command of the corresponding UBI volume character device. A pointer to
14793 + * a &struct ubi_leb_change_req object has to be passed to the ioctl. Then the
14794 + * user is expected to write the requested amount of bytes (similarly to what
14795 + * should be done in case of the "volume update" ioctl).
14797 + * Logical eraseblock map
14798 + * ~~~~~~~~~~~~~~~~~~~~~
14800 + * To map a logical eraseblock to a physical eraseblock, the %UBI_IOCEBMAP
14801 + * ioctl command should be used. A pointer to a &struct ubi_map_req object is
14802 + * expected to be passed. The ioctl maps the requested logical eraseblock to
14803 + * a physical eraseblock and returns. Only non-mapped logical eraseblocks can
14804 + * be mapped. If the logical eraseblock specified in the request is already
14805 + * mapped to a physical eraseblock, the ioctl fails and returns error.
14807 + * Logical eraseblock unmap
14808 + * ~~~~~~~~~~~~~~~~~~~~~~~~
14810 + * To unmap a logical eraseblock to a physical eraseblock, the %UBI_IOCEBUNMAP
14811 + * ioctl command should be used. The ioctl unmaps the logical eraseblocks,
14812 + * schedules corresponding physical eraseblock for erasure, and returns. Unlike
14813 + * the "LEB erase" command, it does not wait for the physical eraseblock being
14814 + * erased. Note, the side effect of this is that if an unclean reboot happens
14815 + * after the unmap ioctl returns, you may find the LEB mapped again to the same
14816 + * physical eraseblock after the UBI is run again.
14818 + * Check if logical eraseblock is mapped
14819 + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
14821 + * To check if a logical eraseblock is mapped to a physical eraseblock, the
14822 + * %UBI_IOCEBISMAP ioctl command should be used. It returns %0 if the LEB is
14823 + * not mapped, and %1 if it is mapped.
14825 + * Set an UBI volume property
14826 + * ~~~~~~~~~~~~~~~~~~~~~~~~~
14828 + * To set an UBI volume property the %UBI_IOCSETPROP ioctl command should be
14829 + * used. A pointer to a &struct ubi_set_vol_prop_req object is expected to be
14830 + * passed. The object describes which property should be set, and to which value
14831 + * it should be set.
14835 @@ -101,7 +145,7 @@
14836 /* Maximum volume name length */
14837 #define UBI_MAX_VOLUME_NAME 127
14839 -/* IOCTL commands of UBI character devices */
14840 +/* ioctl commands of UBI character devices */
14842 #define UBI_IOC_MAGIC 'o'
14844 @@ -114,7 +158,7 @@
14845 /* Re-name volumes */
14846 #define UBI_IOCRNVOL _IOW(UBI_IOC_MAGIC, 3, struct ubi_rnvol_req)
14848 -/* IOCTL commands of the UBI control character device */
14849 +/* ioctl commands of the UBI control character device */
14851 #define UBI_CTRL_IOC_MAGIC 'o'
14853 @@ -123,16 +167,25 @@
14854 /* Detach an MTD device */
14855 #define UBI_IOCDET _IOW(UBI_CTRL_IOC_MAGIC, 65, int32_t)
14857 -/* IOCTL commands of UBI volume character devices */
14858 +/* ioctl commands of UBI volume character devices */
14860 #define UBI_VOL_IOC_MAGIC 'O'
14862 /* Start UBI volume update */
14863 #define UBI_IOCVOLUP _IOW(UBI_VOL_IOC_MAGIC, 0, int64_t)
14864 -/* An eraseblock erasure command, used for debugging, disabled by default */
14865 +/* LEB erasure command, used for debugging, disabled by default */
14866 #define UBI_IOCEBER _IOW(UBI_VOL_IOC_MAGIC, 1, int32_t)
14867 -/* An atomic eraseblock change command */
14868 +/* Atomic LEB change command */
14869 #define UBI_IOCEBCH _IOW(UBI_VOL_IOC_MAGIC, 2, int32_t)
14870 +/* Map LEB command */
14871 +#define UBI_IOCEBMAP _IOW(UBI_VOL_IOC_MAGIC, 3, struct ubi_map_req)
14872 +/* Unmap LEB command */
14873 +#define UBI_IOCEBUNMAP _IOW(UBI_VOL_IOC_MAGIC, 4, int32_t)
14874 +/* Check if LEB is mapped command */
14875 +#define UBI_IOCEBISMAP _IOR(UBI_VOL_IOC_MAGIC, 5, int32_t)
14876 +/* Set an UBI volume property */
14877 +#define UBI_IOCSETVOLPROP _IOW(UBI_VOL_IOC_MAGIC, 6, \
14878 + struct ubi_set_vol_prop_req)
14880 /* Maximum MTD device name length supported by UBI */
14881 #define MAX_UBI_MTD_NAME_LEN 127
14882 @@ -168,6 +221,17 @@ enum {
14883 UBI_STATIC_VOLUME = 4,
14887 + * UBI set volume property ioctl constants.
14889 + * @UBI_VOL_PROP_DIRECT_WRITE: allow (any non-zero value) or disallow (value 0)
14890 + * user to directly write and erase individual
14891 + * eraseblocks on dynamic volumes
14894 + UBI_VOL_PROP_DIRECT_WRITE = 1,
14898 * struct ubi_attach_req - attach MTD device request.
14899 * @ubi_num: UBI device number to create
14900 @@ -244,7 +308,7 @@ struct ubi_mkvol_req {
14902 int8_t padding2[4];
14903 char name[UBI_MAX_VOLUME_NAME + 1];
14904 -} __attribute__ ((packed));
14908 * struct ubi_rsvol_req - a data structure used in volume re-size requests.
14909 @@ -260,7 +324,7 @@ struct ubi_mkvol_req {
14910 struct ubi_rsvol_req {
14913 -} __attribute__ ((packed));
14917 * struct ubi_rnvol_req - volumes re-name request.
14918 @@ -302,11 +366,11 @@ struct ubi_rnvol_req {
14919 int8_t padding2[2];
14920 char name[UBI_MAX_VOLUME_NAME + 1];
14921 } ents[UBI_MAX_RNVOL];
14922 -} __attribute__ ((packed));
14926 - * struct ubi_leb_change_req - a data structure used in atomic logical
14927 - * eraseblock change requests.
14928 + * struct ubi_leb_change_req - a data structure used in atomic LEB change
14930 * @lnum: logical eraseblock number to change
14931 * @bytes: how many bytes will be written to the logical eraseblock
14932 * @dtype: data type (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN)
14933 @@ -317,6 +381,32 @@ struct ubi_leb_change_req {
14937 -} __attribute__ ((packed));
14941 + * struct ubi_map_req - a data structure used in map LEB requests.
14942 + * @lnum: logical eraseblock number to unmap
14943 + * @dtype: data type (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN)
14944 + * @padding: reserved for future, not used, has to be zeroed
14946 +struct ubi_map_req {
14949 + int8_t padding[3];
14954 + * struct ubi_set_vol_prop_req - a data structure used to set an UBI volume
14956 + * @property: property to set (%UBI_VOL_PROP_DIRECT_WRITE)
14957 + * @padding: reserved for future, not used, has to be zeroed
14958 + * @value: value to set
14960 +struct ubi_set_vol_prop_req {
14961 + uint8_t property;
14962 + uint8_t padding[7];
14966 #endif /* __UBI_USER_H__ */
14967 diff -uprN linux-2.6.28/lib/list_sort.c ubifs-v2.6.28/lib/list_sort.c
14968 --- linux-2.6.28/lib/list_sort.c 1969-12-31 19:00:00.000000000 -0500
14969 +++ ubifs-v2.6.28/lib/list_sort.c 2011-06-15 14:22:09.000000000 -0400
14971 +#include <linux/kernel.h>
14972 +#include <linux/module.h>
14973 +#include <linux/list_sort.h>
14974 +#include <linux/slab.h>
14975 +#include <linux/list.h>
14977 +#define MAX_LIST_LENGTH_BITS 20
14980 + * Returns a list organized in an intermediate format suited
14981 + * to chaining of merge() calls: null-terminated, no reserved or
14982 + * sentinel head node, "prev" links not maintained.
14984 +static struct list_head *merge(void *priv,
14985 + int (*cmp)(void *priv, struct list_head *a,
14986 + struct list_head *b),
14987 + struct list_head *a, struct list_head *b)
14989 + struct list_head head, *tail = &head;
14992 + /* if equal, take 'a' -- important for sort stability */
14993 + if ((*cmp)(priv, a, b) <= 0) {
15000 + tail = tail->next;
15002 + tail->next = a?:b;
15003 + return head.next;
15007 + * Combine final list merge with restoration of standard doubly-linked
15008 + * list structure. This approach duplicates code from merge(), but
15009 + * runs faster than the tidier alternatives of either a separate final
15010 + * prev-link restoration pass, or maintaining the prev links
15013 +static void merge_and_restore_back_links(void *priv,
15014 + int (*cmp)(void *priv, struct list_head *a,
15015 + struct list_head *b),
15016 + struct list_head *head,
15017 + struct list_head *a, struct list_head *b)
15019 + struct list_head *tail = head;
15022 + /* if equal, take 'a' -- important for sort stability */
15023 + if ((*cmp)(priv, a, b) <= 0) {
15032 + tail = tail->next;
15034 + tail->next = a ? : b;
15038 + * In worst cases this loop may run many iterations.
15039 + * Continue callbacks to the client even though no
15040 + * element comparison is needed, so the client's cmp()
15041 + * routine can invoke cond_resched() periodically.
15043 + (*cmp)(priv, tail->next, tail->next);
15045 + tail->next->prev = tail;
15046 + tail = tail->next;
15047 + } while (tail->next);
15049 + tail->next = head;
15050 + head->prev = tail;
15054 + * list_sort - sort a list
15055 + * @priv: private data, opaque to list_sort(), passed to @cmp
15056 + * @head: the list to sort
15057 + * @cmp: the elements comparison function
15059 + * This function implements "merge sort", which has O(nlog(n))
15062 + * The comparison function @cmp must return a negative value if @a
15063 + * should sort before @b, and a positive value if @a should sort after
15064 + * @b. If @a and @b are equivalent, and their original relative
15065 + * ordering is to be preserved, @cmp must return 0.
15067 +void list_sort(void *priv, struct list_head *head,
15068 + int (*cmp)(void *priv, struct list_head *a,
15069 + struct list_head *b))
15071 + struct list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists
15072 + -- last slot is a sentinel */
15073 + int lev; /* index into part[] */
15075 + struct list_head *list;
15077 + if (list_empty(head))
15080 + memset(part, 0, sizeof(part));
15082 + head->prev->next = NULL;
15083 + list = head->next;
15086 + struct list_head *cur = list;
15087 + list = list->next;
15088 + cur->next = NULL;
15090 + for (lev = 0; part[lev]; lev++) {
15091 + cur = merge(priv, cmp, part[lev], cur);
15092 + part[lev] = NULL;
15094 + if (lev > max_lev) {
15095 + if (unlikely(lev >= ARRAY_SIZE(part)-1)) {
15096 + printk(KERN_DEBUG "list passed to"
15097 + " list_sort() too long for"
15098 + " efficiency\n");
15106 + for (lev = 0; lev < max_lev; lev++)
15108 + list = merge(priv, cmp, part[lev], list);
15110 + merge_and_restore_back_links(priv, cmp, head, part[max_lev], list);
15112 +EXPORT_SYMBOL(list_sort);
15114 +#ifdef CONFIG_TEST_LIST_SORT
15116 +#include <linux/random.h>
15119 + * The pattern of set bits in the list length determines which cases
15120 + * are hit in list_sort().
15122 +#define TEST_LIST_LEN (512+128+2) /* not including head */
15124 +#define TEST_POISON1 0xDEADBEEF
15125 +#define TEST_POISON2 0xA324354C
15128 + unsigned int poison1;
15129 + struct list_head list;
15130 + unsigned int poison2;
15135 +/* Array, containing pointers to all elements in the test list */
15136 +static struct debug_el **elts __initdata;
15138 +static int __init check(struct debug_el *ela, struct debug_el *elb)
15140 + if (ela->serial >= TEST_LIST_LEN) {
15141 + printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
15145 + if (elb->serial >= TEST_LIST_LEN) {
15146 + printk(KERN_ERR "list_sort_test: error: incorrect serial %d\n",
15150 + if (elts[ela->serial] != ela || elts[elb->serial] != elb) {
15151 + printk(KERN_ERR "list_sort_test: error: phantom element\n");
15154 + if (ela->poison1 != TEST_POISON1 || ela->poison2 != TEST_POISON2) {
15155 + printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
15156 + ela->poison1, ela->poison2);
15159 + if (elb->poison1 != TEST_POISON1 || elb->poison2 != TEST_POISON2) {
15160 + printk(KERN_ERR "list_sort_test: error: bad poison: %#x/%#x\n",
15161 + elb->poison1, elb->poison2);
15167 +static int __init cmp(void *priv, struct list_head *a, struct list_head *b)
15169 + struct debug_el *ela, *elb;
15171 + ela = container_of(a, struct debug_el, list);
15172 + elb = container_of(b, struct debug_el, list);
15175 + return ela->value - elb->value;
15178 +static int __init list_sort_test(void)
15180 + int i, count = 1, err = -EINVAL;
15181 + struct debug_el *el;
15182 + struct list_head *cur, *tmp;
15185 + printk(KERN_DEBUG "list_sort_test: start testing list_sort()\n");
15187 + elts = kmalloc(sizeof(void *) * TEST_LIST_LEN, GFP_KERNEL);
15189 + printk(KERN_ERR "list_sort_test: error: cannot allocate "
15194 + for (i = 0; i < TEST_LIST_LEN; i++) {
15195 + el = kmalloc(sizeof(*el), GFP_KERNEL);
15197 + printk(KERN_ERR "list_sort_test: error: cannot "
15198 + "allocate memory\n");
15201 + /* force some equivalencies */
15202 + el->value = random32() % (TEST_LIST_LEN/3);
15204 + el->poison1 = TEST_POISON1;
15205 + el->poison2 = TEST_POISON2;
15207 + list_add_tail(&el->list, &head);
15210 + list_sort(NULL, &head, cmp);
15212 + for (cur = head.next; cur->next != &head; cur = cur->next) {
15213 + struct debug_el *el1;
15216 + if (cur->next->prev != cur) {
15217 + printk(KERN_ERR "list_sort_test: error: list is "
15222 + cmp_result = cmp(NULL, cur, cur->next);
15223 + if (cmp_result > 0) {
15224 + printk(KERN_ERR "list_sort_test: error: list is not "
15229 + el = container_of(cur, struct debug_el, list);
15230 + el1 = container_of(cur->next, struct debug_el, list);
15231 + if (cmp_result == 0 && el->serial >= el1->serial) {
15232 + printk(KERN_ERR "list_sort_test: error: order of "
15233 + "equivalent elements not preserved\n");
15237 + if (check(el, el1)) {
15238 + printk(KERN_ERR "list_sort_test: error: element check "
15245 + if (count != TEST_LIST_LEN) {
15246 + printk(KERN_ERR "list_sort_test: error: bad list length %d",
15254 + list_for_each_safe(cur, tmp, &head) {
15256 + kfree(container_of(cur, struct debug_el, list));
15260 +module_init(list_sort_test);
15261 +#endif /* CONFIG_TEST_LIST_SORT */
15262 diff -uprN linux-2.6.28/lib/Makefile ubifs-v2.6.28/lib/Makefile
15263 --- linux-2.6.28/lib/Makefile 2008-12-24 18:26:37.000000000 -0500
15264 +++ ubifs-v2.6.28/lib/Makefile 2011-06-15 14:22:09.000000000 -0400
15265 @@ -20,7 +20,7 @@ lib-y += kobject.o kref.o klist.o
15267 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
15268 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
15270 + string_helpers.o list_sort.o
15272 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
15273 CFLAGS_kobject.o += -DDEBUG
15274 diff -uprN linux-2.6.28/MAINTAINERS ubifs-v2.6.28/MAINTAINERS
15275 --- linux-2.6.28/MAINTAINERS 2011-06-15 15:12:26.000000000 -0400
15276 +++ ubifs-v2.6.28/MAINTAINERS 2011-06-15 15:16:03.000000000 -0400
15277 @@ -4242,9 +4242,9 @@ S: Maintained
15279 UBI FILE SYSTEM (UBIFS)
15280 P: Artem Bityutskiy
15281 -M: dedekind@infradead.org
15282 +M: dedekind1@gmail.com
15284 -M: ext-adrian.hunter@nokia.com
15285 +M: adrian.hunter@nokia.com
15286 L: linux-mtd@lists.infradead.org
15287 T: git git://git.infradead.org/ubifs-2.6.git
15288 W: http://www.linux-mtd.infradead.org/doc/ubifs.html
15289 @@ -4297,7 +4297,7 @@ S: Maintained
15291 UNSORTED BLOCK IMAGES (UBI)
15292 P: Artem Bityutskiy
15293 -M: dedekind@infradead.org
15294 +M: dedekind1@gmail.com
15295 W: http://www.linux-mtd.infradead.org/
15296 L: linux-mtd@lists.infradead.org
15297 T: git git://git.infradead.org/ubi-2.6.git
15298 diff -uprN linux-2.6.28/scripts/unifdef.c ubifs-v2.6.28/scripts/unifdef.c
15299 --- linux-2.6.28/scripts/unifdef.c 2008-12-24 18:26:37.000000000 -0500
15300 +++ ubifs-v2.6.28/scripts/unifdef.c 2011-06-15 14:22:10.000000000 -0400
15301 @@ -206,7 +206,7 @@ static void done(void);
15302 static void error(const char *);
15303 static int findsym(const char *);
15304 static void flushline(bool);
15305 -static Linetype getline(void);
15306 +static Linetype get_line(void);
15307 static Linetype ifeval(const char **);
15308 static void ignoreoff(void);
15309 static void ignoreon(void);
15310 @@ -512,7 +512,7 @@ process(void)
15314 - lineval = getline();
15315 + lineval = get_line();
15316 trans_table[ifstate[depth]][lineval]();
15317 debug("process %s -> %s depth %d",
15318 linetype_name[lineval],
15319 @@ -526,7 +526,7 @@ process(void)
15320 * help from skipcomment().