1 --- kernel-maemo-2.6.28.test.orig/fs/ext4/ext4.h
2 +++ kernel-maemo-2.6.28.test/fs/ext4/ext4.h
4 #define EXT4_STATE_NEW 0x00000002 /* inode is newly created */
5 #define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
6 #define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
7 +#define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */
9 /* Used to pass group descriptor data when online resize is done */
10 struct ext4_new_group_input {
12 #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
13 #define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input)
14 #define EXT4_IOC_MIGRATE _IO('f', 9)
15 + /* note ioctl 10 reserved for an early version of the FIEMAP ioctl */
16 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
17 +#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
20 * ioctl commands in 32 bit emulation
22 extern void ext4_truncate(struct inode *);
23 extern void ext4_set_inode_flags(struct inode *);
24 extern void ext4_get_inode_flags(struct ext4_inode_info *);
25 +extern int ext4_alloc_da_blocks(struct inode *inode);
26 extern void ext4_set_aops(struct inode *inode);
27 extern int ext4_writepage_trans_blocks(struct inode *);
28 extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
29 --- kernel-maemo-2.6.28.test.orig/fs/ext4/file.c
30 +++ kernel-maemo-2.6.28.test/fs/ext4/file.c
33 static int ext4_release_file(struct inode *inode, struct file *filp)
35 + if (EXT4_I(inode)->i_state & EXT4_STATE_DA_ALLOC_CLOSE) {
36 + ext4_alloc_da_blocks(inode);
37 + EXT4_I(inode)->i_state &= ~EXT4_STATE_DA_ALLOC_CLOSE;
39 /* if we are the last writer on the inode, drop the block reservation */
40 if ((filp->f_mode & FMODE_WRITE) &&
41 - (atomic_read(&inode->i_writecount) == 1))
42 + (atomic_read(&inode->i_writecount) == 1) &&
43 + !EXT4_I(inode)->i_reserved_data_blocks)
45 down_write(&EXT4_I(inode)->i_data_sem);
46 ext4_discard_preallocations(inode);
47 --- kernel-maemo-2.6.28.test.orig/fs/ext4/inode.c
48 +++ kernel-maemo-2.6.28.test/fs/ext4/inode.c
50 static inline int ext4_begin_ordered_truncate(struct inode *inode,
53 - return jbd2_journal_begin_ordered_truncate(
54 - EXT4_SB(inode->i_sb)->s_journal,
55 - &EXT4_I(inode)->jinode,
57 + return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode,
61 static void ext4_invalidatepage(struct page *page, unsigned long offset);
62 @@ -1021,6 +1019,14 @@
63 EXT4_I(inode)->i_reserved_data_blocks -= used;
65 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
68 + * If have done all the pending block allocation and if the we
69 + * don't have any writer on the inode, we can discard the
70 + * inode's preallocations.
72 + if (!total && (atomic_read(&inode->i_writecount) == 0))
73 + ext4_discard_preallocations(inode);
77 @@ -2748,6 +2754,48 @@
82 + * Force all delayed allocation blocks to be allocated for a given inode.
84 +int ext4_alloc_da_blocks(struct inode *inode)
86 + if (!EXT4_I(inode)->i_reserved_data_blocks &&
87 + !EXT4_I(inode)->i_reserved_meta_blocks)
91 + * We do something simple for now. The filemap_flush() will
92 + * also start triggering a write of the data blocks, which is
93 + * not strictly speaking necessary (and for users of
94 + * laptop_mode, not even desirable). However, to do otherwise
95 + * would require replicating code paths in:
97 + * ext4_da_writepages() ->
98 + * write_cache_pages() ---> (via passed in callback function)
99 + * __mpage_da_writepage() -->
100 + * mpage_add_bh_to_extent()
101 + * mpage_da_map_blocks()
103 + * The problem is that write_cache_pages(), located in
104 + * mm/page-writeback.c, marks pages clean in preparation for
105 + * doing I/O, which is not desirable if we're not planning on
106 + * doing I/O at all.
108 + * We could call write_cache_pages(), and then redirty all of
109 + * the pages by calling redirty_page_for_writeback() but that
110 + * would be ugly in the extreme. So instead we would need to
111 + * replicate parts of the code in the above functions,
112 + * simplifying them becuase we wouldn't actually intend to
113 + * write out the pages, but rather only collect contiguous
114 + * logical block extents, call the multi-block allocator, and
115 + * then update the buffer heads with the block allocations.
117 + * For now, though, we'll cheat by calling filemap_flush(),
118 + * which will map the blocks, and start the I/O, but not
119 + * actually wait for the I/O to complete.
121 + return filemap_flush(inode->i_mapping);
125 * bmap() is special. It gets used by applications such as lilo and by
126 @@ -3757,6 +3805,9 @@
127 if (!ext4_can_truncate(inode))
130 + if (inode->i_size == 0)
131 + ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE;
133 if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) {
134 ext4_ext_truncate(inode);
136 --- kernel-maemo-2.6.28.test.orig/fs/ext4/ioctl.c
137 +++ kernel-maemo-2.6.28.test/fs/ext4/ioctl.c
142 + case EXT4_IOC_ALLOC_DA_BLKS:
145 + if (!is_owner_or_cap(inode))
148 + err = mnt_want_write(filp->f_path.mnt);
151 + err = ext4_alloc_da_blocks(inode);
152 + mnt_drop_write(filp->f_path.mnt);
159 --- kernel-maemo-2.6.28.test.orig/fs/ext4/namei.c
160 +++ kernel-maemo-2.6.28.test/fs/ext4/namei.c
161 @@ -2298,7 +2298,7 @@
162 struct inode *old_inode, *new_inode;
163 struct buffer_head *old_bh, *new_bh, *dir_bh;
164 struct ext4_dir_entry_2 *old_de, *new_de;
166 + int retval, force_da_alloc = 0;
168 old_bh = new_bh = dir_bh = NULL;
170 @@ -2436,6 +2436,7 @@
171 ext4_mark_inode_dirty(handle, new_inode);
172 if (!new_inode->i_nlink)
173 ext4_orphan_add(handle, new_inode);
174 + force_da_alloc = 1;
178 @@ -2444,6 +2445,8 @@
181 ext4_journal_stop(handle);
182 + if (retval == 0 && force_da_alloc)
183 + ext4_alloc_da_blocks(old_inode);
187 --- kernel-maemo-2.6.28.test.orig/fs/jbd/journal.c
188 +++ kernel-maemo-2.6.28.test/fs/jbd/journal.c
193 - * Called under j_state_lock. Returns true if a transaction was started.
194 + * Called under j_state_lock. Returns true if a transaction commit was started.
196 int __log_start_commit(journal_t *journal, tid_t target)
201 * Start a commit of the current running transaction (if any). Returns true
202 - * if a transaction was started, and fills its tid in at *ptid
203 + * if a transaction is going to be committed (or is currently already
204 + * committing), and fills its tid in at *ptid
206 int journal_start_commit(journal_t *journal, tid_t *ptid)
208 @@ -505,15 +506,19 @@
209 if (journal->j_running_transaction) {
210 tid_t tid = journal->j_running_transaction->t_tid;
212 - ret = __log_start_commit(journal, tid);
214 + __log_start_commit(journal, tid);
215 + /* There's a running transaction and we've just made sure
216 + * it's commit has been scheduled. */
219 - } else if (journal->j_committing_transaction && ptid) {
221 + } else if (journal->j_committing_transaction) {
223 * If ext3_write_super() recently started a commit, then we
224 * have to wait for completion of that transaction
226 - *ptid = journal->j_committing_transaction->t_tid;
228 + *ptid = journal->j_committing_transaction->t_tid;
231 spin_unlock(&journal->j_state_lock);
232 --- kernel-maemo-2.6.28.test.orig/fs/jbd2/checkpoint.c
233 +++ kernel-maemo-2.6.28.test/fs/jbd2/checkpoint.c
235 safely remove this transaction from the log */
237 __jbd2_journal_drop_transaction(journal, transaction);
238 + kfree(transaction);
240 /* Just in case anybody was waiting for more transactions to be
243 J_ASSERT(journal->j_running_transaction != transaction);
245 jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
246 - kfree(transaction);
248 --- kernel-maemo-2.6.28.test.orig/fs/jbd2/commit.c
249 +++ kernel-maemo-2.6.28.test/fs/jbd2/commit.c
255 + int i, to_free = 0;
256 int tag_bytes = journal_tag_bytes(journal);
257 struct buffer_head *cbh = NULL; /* For transactional checksums */
258 __u32 crc32_sum = ~0;
259 @@ -997,12 +997,10 @@
260 journal->j_committing_transaction = NULL;
261 spin_unlock(&journal->j_state_lock);
263 - if (journal->j_commit_callback)
264 - journal->j_commit_callback(journal, commit_transaction);
266 if (commit_transaction->t_checkpoint_list == NULL &&
267 commit_transaction->t_checkpoint_io_list == NULL) {
268 __jbd2_journal_drop_transaction(journal, commit_transaction);
271 if (journal->j_checkpoint_transactions == NULL) {
272 journal->j_checkpoint_transactions = commit_transaction;
273 @@ -1021,11 +1019,16 @@
275 spin_unlock(&journal->j_list_lock);
277 + if (journal->j_commit_callback)
278 + journal->j_commit_callback(journal, commit_transaction);
280 trace_mark(jbd2_end_commit, "dev %s transaction %d head %d",
281 - journal->j_devname, journal->j_commit_sequence,
282 + journal->j_devname, commit_transaction->t_tid,
283 journal->j_tail_sequence);
284 jbd_debug(1, "JBD: commit %d complete, head %d\n",
285 journal->j_commit_sequence, journal->j_tail_sequence);
287 + kfree(commit_transaction);
289 wake_up(&journal->j_wait_done_commit);
291 --- kernel-maemo-2.6.28.test.orig/fs/jbd2/transaction.c
292 +++ kernel-maemo-2.6.28.test/fs/jbd2/transaction.c
293 @@ -2050,46 +2050,26 @@
297 - * File truncate and transaction commit interact with each other in a
298 - * non-trivial way. If a transaction writing data block A is
299 - * committing, we cannot discard the data by truncate until we have
300 - * written them. Otherwise if we crashed after the transaction with
301 - * write has committed but before the transaction with truncate has
302 - * committed, we could see stale data in block A. This function is a
303 - * helper to solve this problem. It starts writeout of the truncated
304 - * part in case it is in the committing transaction.
306 - * Filesystem code must call this function when inode is journaled in
307 - * ordered mode before truncation happens and after the inode has been
308 - * placed on orphan list with the new inode size. The second condition
309 - * avoids the race that someone writes new data and we start
310 - * committing the transaction after this function has been called but
311 - * before a transaction for truncate is started (and furthermore it
312 - * allows us to optimize the case where the addition to orphan list
313 - * happens in the same transaction as write --- we don't have to write
314 - * any data in such case).
315 + * This function must be called when inode is journaled in ordered mode
316 + * before truncation happens. It starts writeout of truncated part in
317 + * case it is in the committing transaction so that we stand to ordered
318 + * mode consistency guarantees.
320 -int jbd2_journal_begin_ordered_truncate(journal_t *journal,
321 - struct jbd2_inode *jinode,
322 +int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
325 - transaction_t *inode_trans, *commit_trans;
326 + journal_t *journal;
327 + transaction_t *commit_trans;
330 - /* This is a quick check to avoid locking if not necessary */
331 - if (!jinode->i_transaction)
332 + if (!inode->i_transaction && !inode->i_next_transaction)
334 - /* Locks are here just to force reading of recent values, it is
335 - * enough that the transaction was not committing before we started
336 - * a transaction adding the inode to orphan list */
337 + journal = inode->i_transaction->t_journal;
338 spin_lock(&journal->j_state_lock);
339 commit_trans = journal->j_committing_transaction;
340 spin_unlock(&journal->j_state_lock);
341 - spin_lock(&journal->j_list_lock);
342 - inode_trans = jinode->i_transaction;
343 - spin_unlock(&journal->j_list_lock);
344 - if (inode_trans == commit_trans) {
345 - ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping,
346 + if (inode->i_transaction == commit_trans) {
347 + ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping,
348 new_size, LLONG_MAX);
350 jbd2_journal_abort(journal, ret);
351 --- kernel-maemo-2.6.28.test.orig/fs/ocfs2/journal.h
352 +++ kernel-maemo-2.6.28.test/fs/ocfs2/journal.h
354 static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
357 - return jbd2_journal_begin_ordered_truncate(
358 - OCFS2_SB(inode->i_sb)->journal->j_journal,
359 - &OCFS2_I(inode)->ip_jinode,
361 + return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode,
365 #endif /* OCFS2_JOURNAL_H */
366 --- kernel-maemo-2.6.28.test.orig/include/linux/jbd2.h
367 +++ kernel-maemo-2.6.28.test/include/linux/jbd2.h
368 @@ -1087,8 +1087,7 @@
369 extern int jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
370 extern int jbd2_journal_force_commit(journal_t *);
371 extern int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *inode);
372 -extern int jbd2_journal_begin_ordered_truncate(journal_t *journal,
373 - struct jbd2_inode *inode, loff_t new_size);
374 +extern int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, loff_t new_size);
375 extern void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode);
376 extern void jbd2_journal_release_jbd_inode(journal_t *journal, struct jbd2_inode *jinode);
378 @@ -1148,8 +1147,8 @@
379 int jbd2_log_do_checkpoint(journal_t *journal);
381 void __jbd2_log_wait_for_space(journal_t *journal);
382 -extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *);
383 -extern int jbd2_cleanup_journal_tail(journal_t *);
384 +extern void __jbd2_journal_drop_transaction(journal_t *, transaction_t *);
385 +extern int jbd2_cleanup_journal_tail(journal_t *);
387 /* Debugging code only: */