Discussion:
[PATCH 04/09] f2fs: fix race conditon on truncation with inline_data
(too old to reply)
Jaegeuk Kim
2014-10-20 05:18:27 UTC
Permalink
Let's consider the following scenario.

blkaddr[0] inline_data i_size i_blocks writepage truncate
NEW X 4096 2 dirty page #0
NEW X 0 change i_size
NEW X 0 2 f2fs_write_inline_data
NEW X 0 2 get_dnode_of_data
NEW X 0 2 truncate_data_blocks_range
NULL O 0 1 memcpy(inline_data)
NULL O 0 1 f2fs_put_dnode
NULL O 0 1 f2fs_truncate
NULL O 0 1 get_dnode_of_data
NULL O 0 1 *invalid block addr*

This patch adds checking inline_data flag during f2fs_truncate not to refer
corrupted block indices.

Signed-off-by: Jaegeuk Kim <***@kernel.org>
---
fs/f2fs/file.c | 6 ++++++
1 file changed, 6 insertions(+)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 8e68bb6..543d8c6 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -473,6 +473,12 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
return err;
}

+ /* writepage can convert inline_data under get_donde_of_data */
+ if (f2fs_has_inline_data(inode)) {
+ f2fs_put_dnode(&dn);
+ goto done;
+ }
+
count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));

count -= dn.ofs_in_node;
--
2.1.1
Jaegeuk Kim
2014-10-20 05:18:26 UTC
Permalink
When trying to write inline_data, we should truncate any data block allocated
and pointed by the inode block.
We should consider the data index is not 0.

Signed-off-by: Jaegeuk Kim <***@kernel.org>
---
fs/f2fs/inline.c | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 88036fd..e3abcfb 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -166,6 +166,14 @@ int f2fs_write_inline_data(struct inode *inode,
return err;
ipage = dn.inode_page;

+ /* Release any data block if it is allocated */
+ if (!f2fs_has_inline_data(inode)) {
+ int count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+ truncate_data_blocks_range(&dn, count);
+ set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+ stat_inc_inline_inode(inode);
+ }
+
f2fs_wait_on_page_writeback(ipage, NODE);
zero_user_segment(ipage, INLINE_DATA_OFFSET,
INLINE_DATA_OFFSET + MAX_INLINE_DATA);
@@ -174,13 +182,6 @@ int f2fs_write_inline_data(struct inode *inode,
memcpy(dst_addr, src_addr, size);
kunmap(page);

- /* Release the first data block if it is allocated */
- if (!f2fs_has_inline_data(inode)) {
- truncate_data_blocks_range(&dn, 1);
- set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
- stat_inc_inline_inode(inode);
- }
-
set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
sync_inode_page(&dn);
f2fs_put_dnode(&dn);
--
2.1.1
Jaegeuk Kim
2014-10-20 05:18:28 UTC
Permalink
This patch fixes to use highmem for directory pages.

Signed-off-by: Jaegeuk Kim <***@kernel.org>
---
fs/f2fs/inode.c | 2 +-
include/linux/f2fs_fs.h | 3 ++-
2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 0deead4..52d6f54 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -156,7 +156,7 @@ make_now:
inode->i_op = &f2fs_dir_inode_operations;
inode->i_fop = &f2fs_dir_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
- mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+ mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
} else if (S_ISLNK(inode->i_mode)) {
inode->i_op = &f2fs_symlink_inode_operations;
inode->i_mapping->a_ops = &f2fs_dblock_aops;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 860313a..6d7381b 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -33,7 +33,8 @@
#define F2FS_META_INO(sbi) (sbi->meta_ino_num)

/* This flag is used by node and meta inodes, and by recovery */
-#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO)
+#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO)
+#define GFP_F2FS_HIGH_ZERO (GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM)

/*
* For further optimization on multi-head logs, on-disk layout supports maximum
--
2.1.1
Jaegeuk Kim
2014-10-20 05:18:31 UTC
Permalink
This patch removes build warning.

Signed-off-by: Jaegeuk Kim <***@kernel.org>
---
fs/f2fs/segment.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 902c4c3..2c1e608 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1715,7 +1715,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
* #2, flush sit entries to sit page.
*/
list_for_each_entry_safe(ses, tmp, head, set_list) {
- struct page *page;
+ struct page *page = NULL;
struct f2fs_sit_block *raw_sit = NULL;
unsigned int start_segno = ses->start_segno;
unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
--
2.1.1
Jaegeuk Kim
2014-10-20 05:18:25 UTC
Permalink
If user truncates file's data, we should truncate inmemory pages too.

Signed-off-by: Jaegeuk Kim <***@kernel.org>
---
fs/f2fs/data.c | 3 +++
fs/f2fs/f2fs.h | 1 +
fs/f2fs/segment.c | 16 ++++++++++++++++
3 files changed, 20 insertions(+)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 84f20e9..5b80ada 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1116,6 +1116,9 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE)
return;

+ if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode))
+ invalidate_inmem_page(inode, page);
+
if (PageDirty(page))
inode_dec_dirty_pages(inode);
ClearPagePrivate(page);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 28f24ea..d41d1b7 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1297,6 +1297,7 @@ void destroy_node_manager_caches(void);
* segment.c
*/
void register_inmem_page(struct inode *, struct page *);
+void invalidate_inmem_page(struct inode *, struct page *);
void commit_inmem_pages(struct inode *, bool);
void f2fs_balance_fs(struct f2fs_sb_info *);
void f2fs_balance_fs_bg(struct f2fs_sb_info *);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 9d4a7ab..902c4c3 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -203,6 +203,22 @@ retry:
mutex_unlock(&fi->inmem_lock);
}

+void invalidate_inmem_page(struct inode *inode, struct page *page)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct inmem_pages *cur;
+
+ mutex_lock(&fi->inmem_lock);
+ cur = radix_tree_lookup(&fi->inmem_root, page->index);
+ if (cur) {
+ radix_tree_delete(&fi->inmem_root, cur->page->index);
+ f2fs_put_page(cur->page, 0);
+ list_del(&cur->list);
+ kmem_cache_free(inmem_entry_slab, cur);
+ }
+ mutex_unlock(&fi->inmem_lock);
+}
+
void commit_inmem_pages(struct inode *inode, bool abort)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
--
2.1.1
Jaegeuk Kim
2014-10-20 05:18:30 UTC
Permalink
This patch fixes to call f2fs_unlock_op, which was missing before.

Signed-off-by: Jaegeuk Kim <***@kernel.org>
---
fs/f2fs/file.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 456df07..80d9a04 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -494,7 +494,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
/* writepage can convert inline_data under get_donde_of_data */
if (f2fs_has_inline_data(inode)) {
f2fs_put_dnode(&dn);
- goto done;
+ goto unlock_done;
}

count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
@@ -510,6 +510,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
f2fs_put_dnode(&dn);
free_next:
err = truncate_inode_blocks(inode, free_from);
+unlock_done:
if (lock)
f2fs_unlock_op(sbi);
done:
--
2.1.1
Jaegeuk Kim
2014-10-20 05:18:32 UTC
Permalink
This patch avoids an infinite loop in sync_dirty_inode_page when -EIO was
detected.

Signed-off-by: Jaegeuk Kim <***@kernel.org>
---
fs/f2fs/checkpoint.c | 3 +++
1 file changed, 3 insertions(+)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index dd10a03..ca514d5 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -731,6 +731,9 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
struct dir_inode_entry *entry;
struct inode *inode;
retry:
+ if (unlikely(f2fs_cp_error(sbi)))
+ return;
+
spin_lock(&sbi->dir_inode_lock);

head = &sbi->dir_inode_list;
--
2.1.1
Jaegeuk Kim
2014-10-20 05:18:29 UTC
Permalink
The sceanrio is like this.
inline_data i_size page write_begin/vm_page_mkwrite
X 30 dirty_page
X 30 write to #4096 position
X 30 get_dnode_of_data wait for get_dnode_of_data
O 30 write inline_data
O 30 get_dnode_of_data
O 30 reserve data block
..

In this case, we have #0 = NEW_ADDR and inline_data as well.
We should not allow this condition for further access.

Signed-off-by: Jaegeuk Kim <***@kernel.org>
---
fs/f2fs/data.c | 32 +++++++++++++++++++++++---------
fs/f2fs/file.c | 26 ++++++++++++++++++++++----
2 files changed, 45 insertions(+), 13 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 5b80ada..973fd77 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -257,9 +257,6 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
bool need_put = dn->inode_page ? false : true;
int err;

- /* if inode_page exists, index should be zero */
- f2fs_bug_on(F2FS_I_SB(dn->inode), !need_put && index);
-
err = get_dnode_of_data(dn, index, ALLOC_NODE);
if (err)
return err;
@@ -951,7 +948,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
{
struct inode *inode = mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct page *page;
+ struct page *page, *ipage;
pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
struct dnode_of_data dn;
int err = 0;
@@ -979,13 +976,26 @@ repeat:
goto inline_data;

f2fs_lock_op(sbi);
- set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = f2fs_reserve_block(&dn, index);
- f2fs_unlock_op(sbi);
- if (err) {
+
+ /* check inline_data */
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage))
+ goto unlock_fail;
+
+ if (f2fs_has_inline_data(inode)) {
+ f2fs_put_page(ipage, 1);
+ f2fs_unlock_op(sbi);
f2fs_put_page(page, 0);
- goto fail;
+ goto repeat;
}
+
+ set_new_dnode(&dn, inode, ipage, NULL, 0);
+ err = f2fs_reserve_block(&dn, index);
+ if (err)
+ goto unlock_fail;
+ f2fs_put_dnode(&dn);
+ f2fs_unlock_op(sbi);
+
inline_data:
lock_page(page);
if (unlikely(page->mapping != mapping)) {
@@ -1038,6 +1048,10 @@ out:
SetPageUptodate(page);
clear_cold_data(page);
return 0;
+
+unlock_fail:
+ f2fs_unlock_op(sbi);
+ f2fs_put_page(page, 0);
fail:
f2fs_write_failed(mapping, pos + len);
return err;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 543d8c6..456df07 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -35,12 +35,13 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
struct inode *inode = file_inode(vma->vm_file);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct dnode_of_data dn;
+ struct page *ipage;
int err;

f2fs_balance_fs(sbi);

sb_start_pagefault(inode->i_sb);
-
+retry:
/* force to convert with normal data indices */
err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page);
if (err)
@@ -48,11 +49,28 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,

/* block allocation */
f2fs_lock_op(sbi);
- set_new_dnode(&dn, inode, NULL, NULL, 0);
+
+ /* check inline_data */
+ ipage = get_node_page(sbi, inode->i_ino);
+ if (IS_ERR(ipage)) {
+ f2fs_unlock_op(sbi);
+ goto out;
+ }
+
+ if (f2fs_has_inline_data(inode)) {
+ f2fs_put_page(ipage, 1);
+ f2fs_unlock_op(sbi);
+ goto retry;
+ }
+
+ set_new_dnode(&dn, inode, ipage, NULL, 0);
err = f2fs_reserve_block(&dn, page->index);
- f2fs_unlock_op(sbi);
- if (err)
+ if (err) {
+ f2fs_unlock_op(sbi);
goto out;
+ }
+ f2fs_put_dnode(&dn);
+ f2fs_unlock_op(sbi);

file_update_time(vma->vm_file);
lock_page(page);
--
2.1.1
Loading...