vcs.maemo.org Git - kernel-power/blob - kernel-power-2.6.28/debian/patches/ext4-data-corruption.diff

   1 diff -uprN linux-2.6.28.orig/Documentation/filesystems/ext4.txt linux-2.6.28/Documentation/filesystems/ext4.txt
   2 --- linux-2.6.28.orig/Documentation/filesystems/ext4.txt        2009-05-02 20:54:43.000000000 +0200
   3 +++ linux-2.6.28/Documentation/filesystems/ext4.txt     2009-05-23 16:05:41.000000000 +0200
   4 @@ -76,7 +76,7 @@ Note: More extensive information for get
   5  * extent format more robust in face of on-disk corruption due to magics,
   6  * internal redunancy in tree
   7  * improved file allocation (multi-block alloc)
   8 -* fix 32000 subdirectory limit
   9 +* lift 32000 subdirectory limit imposed by i_links_count[1]
  10  * nsec timestamps for mtime, atime, ctime, create time
  11  * inode version field on disk (NFSv4, Lustre)
  12  * reduced e2fsck time via uninit_bg feature
  13 @@ -91,6 +91,9 @@ Note: More extensive information for get
  14  * efficent new ordered mode in JBD2 and ext4(avoid using buffer head to force
  15    the ordering)
  16
  17 +[1] Filesystems with a block size of 1k may see a limit imposed by the
  18 +directory hash tree having a maximum depth of two.
  19 +
  20  2.2 Candidate features for future inclusion
  21
  22  * Online defrag (patches available but not well tested)
  23 diff -uprN linux-2.6.28.orig/fs/ext4/balloc.c linux-2.6.28/fs/ext4/balloc.c
  24 --- linux-2.6.28.orig/fs/ext4/balloc.c  2009-05-02 20:54:43.000000000 +0200
  25 +++ linux-2.6.28/fs/ext4/balloc.c       2009-05-23 16:05:41.000000000 +0200
  26 @@ -608,7 +608,9 @@ int ext4_claim_free_blocks(struct ext4_s
  27   */
  28  int ext4_should_retry_alloc(struct super_block *sb, int *retries)
  29  {
  30 -       if (!ext4_has_free_blocks(EXT4_SB(sb), 1) || (*retries)++ > 3)
  31 +       if (!ext4_has_free_blocks(EXT4_SB(sb), 1) ||
  32 +           (*retries)++ > 3 ||
  33 +           !EXT4_SB(sb)->s_journal)
  34                 return 0;
  35
  36         jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
  37 diff -uprN linux-2.6.28.orig/fs/ext4/ext4.h linux-2.6.28/fs/ext4/ext4.h
  38 --- linux-2.6.28.orig/fs/ext4/ext4.h    2009-05-02 20:54:43.000000000 +0200
  39 +++ linux-2.6.28/fs/ext4/ext4.h 2009-05-23 16:05:41.000000000 +0200
  40 @@ -248,6 +248,30 @@ struct flex_groups {
  41  #define EXT4_FL_USER_VISIBLE           0x000BDFFF /* User visible flags */
  42  #define EXT4_FL_USER_MODIFIABLE                0x000B80FF /* User modifiable flags */
  43
  44 +/* Flags that should be inherited by new inodes from their parent. */
  45 +#define EXT4_FL_INHERITED (EXT4_SECRM_FL | EXT4_UNRM_FL | EXT4_COMPR_FL |\
  46 +                          EXT4_SYNC_FL | EXT4_IMMUTABLE_FL | EXT4_APPEND_FL |\
  47 +                          EXT4_NODUMP_FL | EXT4_NOATIME_FL |\
  48 +                          EXT4_NOCOMPR_FL | EXT4_JOURNAL_DATA_FL |\
  49 +                          EXT4_NOTAIL_FL | EXT4_DIRSYNC_FL)
  50 +
  51 +/* Flags that are appropriate for regular files (all but dir-specific ones). */
  52 +#define EXT4_REG_FLMASK (~(EXT4_DIRSYNC_FL | EXT4_TOPDIR_FL))
  53 +
  54 +/* Flags that are appropriate for non-directories/regular files. */
  55 +#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
  56 +
  57 +/* Mask out flags that are inappropriate for the given type of inode. */
  58 +static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
  59 +{
  60 +       if (S_ISDIR(mode))
  61 +               return flags;
  62 +       else if (S_ISREG(mode))
  63 +               return flags & EXT4_REG_FLMASK;
  64 +       else
  65 +               return flags & EXT4_OTHER_FLMASK;
  66 +}
  67 +
  68  /*
  69   * Inode dynamic state flags
  70   */
  71 @@ -529,7 +556,7 @@ do {                                                                               \
  72  #define EXT4_MOUNT_NO_UID32            0x02000  /* Disable 32-bit UIDs */
  73  #define EXT4_MOUNT_XATTR_USER          0x04000 /* Extended user attributes */
  74  #define EXT4_MOUNT_POSIX_ACL           0x08000 /* POSIX Access Control Lists */
  75 -#define EXT4_MOUNT_RESERVATION         0x10000 /* Preallocation */
  76 +#define EXT4_MOUNT_NO_AUTO_DA_ALLOC    0x10000 /* No auto delalloc mapping */
  77  #define EXT4_MOUNT_BARRIER             0x20000 /* Use block barriers */
  78  #define EXT4_MOUNT_NOBH                        0x40000 /* No bufferheads */
  79  #define EXT4_MOUNT_QUOTA               0x80000 /* Some quota option set */
  80 diff -uprN linux-2.6.28.orig/fs/ext4/extents.c linux-2.6.28/fs/ext4/extents.c
  81 --- linux-2.6.28.orig/fs/ext4/extents.c 2009-05-02 20:54:43.000000000 +0200
  82 +++ linux-2.6.28/fs/ext4/extents.c      2009-05-23 16:05:41.000000000 +0200
  83 @@ -1120,7 +1120,8 @@ ext4_ext_search_right(struct inode *inod
  84         struct ext4_extent_idx *ix;
  85         struct ext4_extent *ex;
  86         ext4_fsblk_t block;
  87 -       int depth, ee_len;
  88 +       int depth;      /* Note, NOT eh_depth; depth from top of tree */
  89 +       int ee_len;
  90
  91         BUG_ON(path == NULL);
  92         depth = path->p_depth;
  93 @@ -1179,7 +1180,8 @@ ext4_ext_search_right(struct inode *inod
  94                 if (bh == NULL)
  95                         return -EIO;
  96                 eh = ext_block_hdr(bh);
  97 -               if (ext4_ext_check_header(inode, eh, depth)) {
  98 +               /* subtract from p_depth to get proper eh_depth */
  99 +               if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) {
 100                         put_bh(bh);
 101                         return -EIO;
 102                 }
 103 @@ -1740,11 +1742,13 @@ ext4_ext_put_in_cache(struct inode *inod
 104  {
 105         struct ext4_ext_cache *cex;
 106         BUG_ON(len == 0);
 107 +       spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
 108         cex = &EXT4_I(inode)->i_cached_extent;
 109         cex->ec_type = type;
 110         cex->ec_block = block;
 111         cex->ec_len = len;
 112         cex->ec_start = start;
 113 +       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 114  }
 115
 116  /*
 117 @@ -1801,12 +1805,17 @@ ext4_ext_in_cache(struct inode *inode, e
 118                         struct ext4_extent *ex)
 119  {
 120         struct ext4_ext_cache *cex;
 121 +       int ret = EXT4_EXT_CACHE_NO;
 122
 123 +       /*
 124 +        * We borrow i_block_reservation_lock to protect i_cached_extent
 125 +        */
 126 +       spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
 127         cex = &EXT4_I(inode)->i_cached_extent;
 128
 129         /* has cache valid data? */
 130         if (cex->ec_type == EXT4_EXT_CACHE_NO)
 131 -               return EXT4_EXT_CACHE_NO;
 132 +               goto errout;
 133
 134         BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
 135                         cex->ec_type != EXT4_EXT_CACHE_EXTENT);
 136 @@ -1817,11 +1826,11 @@ ext4_ext_in_cache(struct inode *inode, e
 137                 ext_debug("%u cached by %u:%u:%llu\n",
 138                                 block,
 139                                 cex->ec_block, cex->ec_len, cex->ec_start);
 140 -               return cex->ec_type;
 141 +               ret = cex->ec_type;
 142         }
 143 -
 144 -       /* not in cache */
 145 -       return EXT4_EXT_CACHE_NO;
 146 +errout:
 147 +       spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 148 +       return ret;
 149  }
 150
 151  /*
 152 @@ -2777,6 +2786,8 @@ int ext4_ext_get_blocks(handle_t *handle
 153                                 if (allocated > max_blocks)
 154                                         allocated = max_blocks;
 155                                 set_buffer_unwritten(bh_result);
 156 +                               bh_result->b_bdev = inode->i_sb->s_bdev;
 157 +                               bh_result->b_blocknr = newblock;
 158                                 goto out2;
 159                         }
 160
 161 diff -uprN linux-2.6.28.orig/fs/ext4/ialloc.c linux-2.6.28/fs/ext4/ialloc.c
 162 --- linux-2.6.28.orig/fs/ext4/ialloc.c  2009-05-02 20:54:43.000000000 +0200
 163 +++ linux-2.6.28/fs/ext4/ialloc.c       2009-05-23 16:05:41.000000000 +0200
 164 @@ -188,7 +188,7 @@ void ext4_free_inode(handle_t *handle, s
 165         struct ext4_group_desc *gdp;
 166         struct ext4_super_block *es;
 167         struct ext4_sb_info *sbi;
 168 -       int fatal = 0, err;
 169 +       int fatal = 0, err, cleared;
 170         ext4_group_t flex_group;
 171
 172         if (atomic_read(&inode->i_count) > 1) {
 173 @@ -243,8 +243,10 @@ void ext4_free_inode(handle_t *handle, s
 174                 goto error_return;
 175
 176         /* Ok, now we can actually update the inode bitmaps.. */
 177 -       if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
 178 -                                       bit, bitmap_bh->b_data))
 179 +       spin_lock(sb_bgl_lock(sbi, block_group));
 180 +       cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
 181 +       spin_unlock(sb_bgl_lock(sbi, block_group));
 182 +       if (!cleared)
 183                 ext4_error(sb, "ext4_free_inode",
 184                            "bit already cleared for inode %lu", ino);
 185         else {
 186 @@ -686,6 +688,7 @@ struct inode *ext4_new_inode(handle_t *h
 187         struct inode *ret;
 188         ext4_group_t i;
 189         int free = 0;
 190 +       static int once = 1;
 191         ext4_group_t flex_group;
 192
 193         /* Cannot create files in a deleted directory */
 194 @@ -705,10 +708,12 @@ struct inode *ext4_new_inode(handle_t *h
 195                 ret2 = find_group_flex(sb, dir, &group);
 196                 if (ret2 == -1) {
 197                         ret2 = find_group_other(sb, dir, &group);
 198 -                       if (ret2 == 0 && printk_ratelimit())
 199 +                       if (ret2 == 0 && once) {
 200 +                               once = 0;
 201                                 printk(KERN_NOTICE "ext4: find_group_flex "
 202                                        "failed, fallback succeeded dir %lu\n",
 203                                        dir->i_ino);
 204 +                       }
 205                 }
 206                 goto got_group;
 207         }
 208 @@ -862,16 +867,12 @@ got:
 209         ei->i_disksize = 0;
 210
 211         /*
 212 -        * Don't inherit extent flag from directory. We set extent flag on
 213 -        * newly created directory and file only if -o extent mount option is
 214 -        * specified
 215 +        * Don't inherit extent flag from directory, amongst others. We set
 216 +        * extent flag on newly created directory and file only if -o extent
 217 +        * mount option is specified
 218          */
 219 -       ei->i_flags = EXT4_I(dir)->i_flags & ~(EXT4_INDEX_FL|EXT4_EXTENTS_FL);
 220 -       if (S_ISLNK(mode))
 221 -               ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL);
 222 -       /* dirsync only applies to directories */
 223 -       if (!S_ISDIR(mode))
 224 -               ei->i_flags &= ~EXT4_DIRSYNC_FL;
 225 +       ei->i_flags =
 226 +               ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
 227         ei->i_file_acl = 0;
 228         ei->i_dtime = 0;
 229         ei->i_block_group = group;
 230 diff -uprN linux-2.6.28.orig/fs/ext4/inode.c linux-2.6.28/fs/ext4/inode.c
 231 --- linux-2.6.28.orig/fs/ext4/inode.c   2009-05-02 20:54:43.000000000 +0200
 232 +++ linux-2.6.28/fs/ext4/inode.c        2009-05-23 16:05:41.000000000 +0200
 233 @@ -1052,6 +1059,7 @@ int ext4_get_blocks_wrap(handle_t *handl
 234         int retval;
 235
 236         clear_buffer_mapped(bh);
 237 +       clear_buffer_unwritten(bh);
 238
 239         /*
 240          * Try to see if we can get  the block without requesting
 241 @@ -1082,6 +1090,18 @@ int ext4_get_blocks_wrap(handle_t *handl
 242                 return retval;
 243
 244         /*
 245 +        * When we call get_blocks without the create flag, the
 246 +        * BH_Unwritten flag could have gotten set if the blocks
 247 +        * requested were part of a uninitialized extent.  We need to
 248 +        * clear this flag now that we are committed to convert all or
 249 +        * part of the uninitialized extent to be an initialized
 250 +        * extent.  This is because we need to avoid the combination
 251 +        * of BH_Unwritten and BH_Mapped flags being simultaneously
 252 +        * set on the buffer_head.
 253 +        */
 254 +       clear_buffer_unwritten(bh);
 255 +
 256 +       /*
 257          * New blocks allocate and/or writing to uninitialized extent
 258          * will possibly result in updating i_data, so we take
 259          * the write lock of i_data_sem, and call get_blocks()
 260 @@ -2180,6 +2200,10 @@ static int ext4_da_get_block_prep(struct
 261                                   struct buffer_head *bh_result, int create)
 262  {
 263         int ret = 0;
 264 +       sector_t invalid_block = ~((sector_t) 0xffff);
 265 +
 266 +       if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
 267 +               invalid_block = ~0;
 268
 269         BUG_ON(create == 0);
 270         BUG_ON(bh_result->b_size != inode->i_sb->s_blocksize);
 271 @@ -2201,11 +2225,18 @@ static int ext4_da_get_block_prep(struct
 272                         /* not enough space to reserve */
 273                         return ret;
 274
 275 -               map_bh(bh_result, inode->i_sb, 0);
 276 +               map_bh(bh_result, inode->i_sb, invalid_block);
 277                 set_buffer_new(bh_result);
 278                 set_buffer_delay(bh_result);
 279         } else if (ret > 0) {
 280                 bh_result->b_size = (ret << inode->i_blkbits);
 281 +               /*
 282 +                * With sub-block writes into unwritten extents
 283 +                * we also need to mark the buffer as new so that
 284 +                * the unwritten parts of the buffer gets correctly zeroed.
 285 +                */
 286 +               if (buffer_unwritten(bh_result))
 287 +                       set_buffer_new(bh_result);
 288                 ret = 0;
 289         }
 290
 291 @@ -2493,7 +2524,7 @@ retry:
 292
 293                 ext4_journal_stop(handle);
 294
 295 -               if (mpd.retval == -ENOSPC) {
 296 +               if ((mpd.retval == -ENOSPC) && sbi->s_journal) {
 297                         /* commit the transaction which would
 298                          * free blocks released in the transaction
 299                          * and try again
 300 @@ -4167,11 +4243,9 @@ struct inode *ext4_iget(struct super_blo
 301         ei->i_flags = le32_to_cpu(raw_inode->i_flags);
 302         inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
 303         ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
 304 -       if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
 305 -           cpu_to_le32(EXT4_OS_HURD)) {
 306 +       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT))
 307                 ei->i_file_acl |=
 308                         ((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
 309 -       }
 310         inode->i_size = ext4_isize(raw_inode);
 311         ei->i_disksize = inode->i_size;
 312         inode->i_generation = le32_to_cpu(raw_inode->i_generation);
 313 @@ -4218,6 +4292,18 @@ struct inode *ext4_iget(struct super_blo
 314                         (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32;
 315         }
 316
 317 +       if (ei->i_file_acl &&
 318 +           ((ei->i_file_acl <
 319 +             (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
 320 +              EXT4_SB(sb)->s_gdb_count)) ||
 321 +            (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
 322 +               ext4_error(sb, __func__,
 323 +                          "bad extended attribute block %llu in inode #%lu",
 324 +                          ei->i_file_acl, inode->i_ino);
 325 +               ret = -EIO;
 326 +               goto bad_inode;
 327 +       }
 328 +
 329         if (S_ISREG(inode->i_mode)) {
 330                 inode->i_op = &ext4_file_inode_operations;
 331                 inode->i_fop = &ext4_file_operations;
 332 @@ -4232,7 +4318,8 @@ struct inode *ext4_iget(struct super_blo
 333                         inode->i_op = &ext4_symlink_inode_operations;
 334                         ext4_set_aops(inode);
 335                 }
 336 -       } else {
 337 +       } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
 338 +             S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
 339                 inode->i_op = &ext4_special_inode_operations;
 340                 if (raw_inode->i_block[0])
 341                         init_special_inode(inode, inode->i_mode,
 342 @@ -4240,6 +4327,13 @@ struct inode *ext4_iget(struct super_blo
 343                 else
 344                         init_special_inode(inode, inode->i_mode,
 345                            new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
 346 +       } else {
 347 +               brelse(bh);
 348 +               ret = -EIO;
 349 +               ext4_error(inode->i_sb, __func__,
 350 +                          "bogus i_mode (%o) for inode=%lu",
 351 +                          inode->i_mode, inode->i_ino);
 352 +               goto bad_inode;
 353         }
 354         brelse(iloc.bh);
 355         ext4_set_inode_flags(inode);
 356 diff -uprN linux-2.6.28.orig/fs/ext4/ioctl.c linux-2.6.28/fs/ext4/ioctl.c
 357 --- linux-2.6.28.orig/fs/ext4/ioctl.c   2009-05-02 20:54:43.000000000 +0200
 358 +++ linux-2.6.28/fs/ext4/ioctl.c        2009-05-23 16:05:41.000000000 +0200
 359 @@ -48,8 +48,7 @@ long ext4_ioctl(struct file *filp, unsig
 360                 if (err)
 361                         return err;
 362
 363 -               if (!S_ISDIR(inode->i_mode))
 364 -                       flags &= ~EXT4_DIRSYNC_FL;
 365 +               flags = ext4_mask_flags(inode->i_mode, flags);
 366
 367                 err = -EPERM;
 368                 mutex_lock(&inode->i_mutex);
 369 diff -uprN linux-2.6.28.orig/fs/ext4/mballoc.c linux-2.6.28/fs/ext4/mballoc.c
 370 --- linux-2.6.28.orig/fs/ext4/mballoc.c 2009-05-02 20:54:43.000000000 +0200
 371 +++ linux-2.6.28/fs/ext4/mballoc.c      2009-05-23 16:05:41.000000000 +0200
 372 @@ -1448,7 +1448,7 @@ static void ext4_mb_measure_extent(struc
 373         struct ext4_free_extent *gex = &ac->ac_g_ex;
 374
 375         BUG_ON(ex->fe_len <= 0);
 376 -       BUG_ON(ex->fe_len >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
 377 +       BUG_ON(ex->fe_len > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
 378         BUG_ON(ex->fe_start >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
 379         BUG_ON(ac->ac_status != AC_STATUS_CONTINUE);
 380
 381 @@ -2692,7 +2692,7 @@ int ext4_mb_init(struct super_block *sb,
 382         i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int);
 383         sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
 384         if (sbi->s_mb_maxs == NULL) {
 385 -               kfree(sbi->s_mb_maxs);
 386 +               kfree(sbi->s_mb_offsets);
 387                 return -ENOMEM;
 388         }
 389
 390 @@ -3289,7 +3289,7 @@ ext4_mb_normalize_request(struct ext4_al
 391         }
 392         BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
 393                         start > ac->ac_o_ex.fe_logical);
 394 -       BUG_ON(size <= 0 || size >= EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
 395 +       BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
 396
 397         /* now prepare goal request */
 398
 399 @@ -3586,6 +3586,7 @@ static void ext4_mb_put_pa(struct ext4_a
 400                         struct super_block *sb, struct ext4_prealloc_space *pa)
 401  {
 402         unsigned long grp;
 403 +       ext4_fsblk_t grp_blk;
 404
 405         if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
 406                 return;
 407 @@ -3600,8 +3601,12 @@ static void ext4_mb_put_pa(struct ext4_a
 408         pa->pa_deleted = 1;
 409         spin_unlock(&pa->pa_lock);
 410
 411 -       /* -1 is to protect from crossing allocation group */
 412 -       ext4_get_group_no_and_offset(sb, pa->pa_pstart - 1, &grp, NULL);
 413 +       grp_blk = pa->pa_pstart;
 414 +       /* If linear, pa_pstart may be in the next group when pa is used up */
 415 +       if (pa->pa_linear)
 416 +               grp_blk--;
 417 +
 418 +       ext4_get_group_no_and_offset(sb, grp_blk, &grp, NULL);
 419
 420         /*
 421          * possible race:
 422 @@ -4414,7 +4419,7 @@ static void ext4_mb_add_n_trim(struct ex
 423                                                 pa_inode_list) {
 424                 spin_lock(&tmp_pa->pa_lock);
 425                 if (tmp_pa->pa_deleted) {
 426 -                       spin_unlock(&pa->pa_lock);
 427 +                       spin_unlock(&tmp_pa->pa_lock);
 428                         continue;
 429                 }
 430                 if (!added && pa->pa_free < tmp_pa->pa_free) {
 431 diff -uprN linux-2.6.28.orig/fs/ext4/namei.c linux-2.6.28/fs/ext4/namei.c
 432 --- linux-2.6.28.orig/fs/ext4/namei.c   2009-05-02 20:54:43.000000000 +0200
 433 +++ linux-2.6.28/fs/ext4/namei.c        2009-05-23 16:05:41.000000000 +0200
 434 @@ -1056,8 +1056,16 @@ static struct dentry *ext4_lookup(struct
 435                         return ERR_PTR(-EIO);
 436                 }
 437                 inode = ext4_iget(dir->i_sb, ino);
 438 -               if (IS_ERR(inode))
 439 -                       return ERR_CAST(inode);
 440 +               if (unlikely(IS_ERR(inode))) {
 441 +                       if (PTR_ERR(inode) == -ESTALE) {
 442 +                               ext4_error(dir->i_sb, __func__,
 443 +                                               "deleted inode referenced: %u",
 444 +                                               ino);
 445 +                               return ERR_PTR(-EIO);
 446 +                       } else {
 447 +                               return ERR_CAST(inode);
 448 +                       }
 449 +               }
 450         }
 451         return d_splice_alias(inode, dentry);
 452  }
 453 @@ -2436,7 +2444,8 @@ static int ext4_rename(struct inode *old
 454                 ext4_mark_inode_dirty(handle, new_inode);
 455                 if (!new_inode->i_nlink)
 456                         ext4_orphan_add(handle, new_inode);
 457 -               force_da_alloc = 1;
 458 +               if (!test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC))
 459 +                       force_da_alloc = 1;
 460         }
 461         retval = 0;
 462
 463 diff -uprN linux-2.6.28.orig/fs/ext4/super.c linux-2.6.28/fs/ext4/super.c
 464 --- linux-2.6.28.orig/fs/ext4/super.c   2009-05-02 20:54:43.000000000 +0200
 465 +++ linux-2.6.28/fs/ext4/super.c        2009-05-23 16:05:41.000000000 +0200
 466 @@ -679,8 +679,6 @@ static int ext4_show_options(struct seq_
 467         if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
 468                 seq_puts(seq, ",noacl");
 469  #endif
 470 -       if (!test_opt(sb, RESERVATION))
 471 -               seq_puts(seq, ",noreservation");
 472         if (sbi->s_commit_interval) {
 473                 seq_printf(seq, ",commit=%u",
 474                            (unsigned) (sbi->s_commit_interval / HZ));
 475 @@ -724,6 +722,9 @@ static int ext4_show_options(struct seq_
 476         if (test_opt(sb, DATA_ERR_ABORT))
 477                 seq_puts(seq, ",data_err=abort");
 478
 479 +       if (test_opt(sb, NO_AUTO_DA_ALLOC))
 480 +               seq_puts(seq, ",auto_da_alloc=0");
 481 +
 482         ext4_show_quota_options(seq, sb);
 483         return 0;
 484  }
 485 @@ -849,7 +850,7 @@ enum {
 486         Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
 487         Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
 488         Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
 489 -       Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
 490 +       Opt_auto_da_alloc, Opt_noload, Opt_nobh, Opt_bh,
 491         Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
 492         Opt_journal_checksum, Opt_journal_async_commit,
 493         Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
 494 @@ -883,8 +884,6 @@ static const match_table_t tokens = {
 495         {Opt_nouser_xattr, "nouser_xattr"},
 496         {Opt_acl, "acl"},
 497         {Opt_noacl, "noacl"},
 498 -       {Opt_reservation, "reservation"},
 499 -       {Opt_noreservation, "noreservation"},
 500         {Opt_noload, "noload"},
 501         {Opt_nobh, "nobh"},
 502         {Opt_bh, "bh"},
 503 @@ -919,6 +918,7 @@ static const match_table_t tokens = {
 504         {Opt_delalloc, "delalloc"},
 505         {Opt_nodelalloc, "nodelalloc"},
 506         {Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
 507 +       {Opt_auto_da_alloc, "auto_da_alloc=%u"},
 508         {Opt_err, NULL},
 509  };
 510
 511 @@ -1049,12 +1049,6 @@ static int parse_options(char *options,
 512                                "not supported\n");
 513                         break;
 514  #endif
 515 -               case Opt_reservation:
 516 -                       set_opt(sbi->s_mount_opt, RESERVATION);
 517 -                       break;
 518 -               case Opt_noreservation:
 519 -                       clear_opt(sbi->s_mount_opt, RESERVATION);
 520 -                       break;
 521                 case Opt_journal_update:
 522                         /* @@@ FIXME */
 523                         /* Eventually we will want to be able to create
 524 @@ -1331,6 +1325,14 @@ set_qf_format:
 525                                 return 0;
 526                         sbi->s_inode_readahead_blks = option;
 527                         break;
 528 +               case Opt_auto_da_alloc:
 529 +                       if (match_int(&args[0], &option))
 530 +                               return 0;
 531 +                       if (option)
 532 +                               clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
 533 +                       else
 534 +                               set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
 535 +                       break;
 536                 default:
 537                         printk(KERN_ERR
 538                                "EXT4-fs: Unrecognized mount option \"%s\" "
 539 @@ -1956,7 +1958,6 @@ static int ext4_fill_super(struct super_
 540         sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
 541         sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
 542
 543 -       set_opt(sbi->s_mount_opt, RESERVATION);
 544         set_opt(sbi->s_mount_opt, BARRIER);
 545
 546         /*
 547 diff -uprN linux-2.6.28.orig/fs/jbd2/revoke.c linux-2.6.28/fs/jbd2/revoke.c
 548 --- linux-2.6.28.orig/fs/jbd2/revoke.c  2009-05-02 20:54:43.000000000 +0200
 549 +++ linux-2.6.28/fs/jbd2/revoke.c       2009-05-23 16:05:41.000000000 +0200
 550 @@ -55,6 +55,25 @@
 551   *                     need do nothing.
 552   * RevokeValid set, Revoked set:
 553   *                     buffer has been revoked.
 554 + *
 555 + * Locking rules:
 556 + * We keep two hash tables of revoke records. One hashtable belongs to the
 557 + * running transaction (is pointed to by journal->j_revoke), the other one
 558 + * belongs to the committing transaction. Accesses to the second hash table
 559 + * happen only from the kjournald and no other thread touches this table.  Also
 560 + * journal_switch_revoke_table() which switches which hashtable belongs to the
 561 + * running and which to the committing transaction is called only from
 562 + * kjournald. Therefore we need no locks when accessing the hashtable belonging
 563 + * to the committing transaction.
 564 + *
 565 + * All users operating on the hash table belonging to the running transaction
 566 + * have a handle to the transaction. Therefore they are safe from kjournald
 567 + * switching hash tables under them. For operations on the lists of entries in
 568 + * the hash table j_revoke_lock is used.
 569 + *
 570 + * Finally, also replay code uses the hash tables but at this moment noone else
 571 + * can touch them (filesystem isn't mounted yet) and hence no locking is
 572 + * needed.
 573   */
 574
 575  #ifndef __KERNEL__
 576 @@ -401,8 +420,6 @@ int jbd2_journal_revoke(handle_t *handle
 577   * the second time we would still have a pending revoke to cancel.  So,
 578   * do not trust the Revoked bit on buffers unless RevokeValid is also
 579   * set.
 580 - *
 581 - * The caller must have the journal locked.
 582   */
 583  int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
 584  {
 585 @@ -480,10 +497,7 @@ void jbd2_journal_switch_revoke_table(jo
 586  /*
 587   * Write revoke records to the journal for all entries in the current
 588   * revoke hash, deleting the entries as we go.
 589 - *
 590 - * Called with the journal lock held.
 591   */
 592 -
 593  void jbd2_journal_write_revoke_records(journal_t *journal,
 594                                   transaction_t *transaction)
 595  {
 596 diff -uprN linux-2.6.28.orig/fs/ocfs2/ocfs2_jbd_compat.h linux-2.6.28/fs/ocfs2/ocfs2_jbd_compat.h
 597 --- linux-2.6.28.orig/fs/ocfs2/ocfs2_jbd_compat.h       2009-05-02 20:54:43.000000000 +0200
 598 +++ linux-2.6.28/fs/ocfs2/ocfs2_jbd_compat.h    2009-05-23 16:05:41.000000000 +0200
 599 @@ -60,7 +60,8 @@ static inline int jbd2_journal_file_inod
 600         return 0;
 601  }
 602
 603 -static inline int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
 604 +static inline int jbd2_journal_begin_ordered_truncate(journal_t *journal,
 605 +                                                     struct jbd2_inode *inode,
 606                                                       loff_t new_size)
 607  {
 608         return 0;